1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
51 #include "elf/dwarf2.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1761 HOST_WIDE_INT frame;
1763 int outgoing_arguments_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1973 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1975 /* Define a set of ISAs which aren't available when a given ISA is
1976 disabled. MMX and SSE ISAs are handled separately. */
1978 #define OPTION_MASK_ISA_MMX_UNSET \
1979 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1980 #define OPTION_MASK_ISA_3DNOW_UNSET \
1981 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1982 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1984 #define OPTION_MASK_ISA_SSE_UNSET \
1985 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1986 #define OPTION_MASK_ISA_SSE2_UNSET \
1987 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1988 #define OPTION_MASK_ISA_SSE3_UNSET \
1989 (OPTION_MASK_ISA_SSE3 \
1990 | OPTION_MASK_ISA_SSSE3_UNSET \
1991 | OPTION_MASK_ISA_SSE4A_UNSET )
1992 #define OPTION_MASK_ISA_SSSE3_UNSET \
1993 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1994 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1995 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1996 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1997 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1998 #define OPTION_MASK_ISA_AVX_UNSET \
1999 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
2000 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2002 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2004 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2006 #define OPTION_MASK_ISA_SSE4A_UNSET \
2007 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2008 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2009 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2010 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2011 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2012 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2013 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2014 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2015 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2016 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2018 /* Vectorization library interface and handlers. */
2019 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2020 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2021 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2023 /* Processor target table, indexed by processor number */
2026 const struct processor_costs *cost; /* Processor costs */
2027 const int align_loop; /* Default alignments. */
2028 const int align_loop_max_skip;
2029 const int align_jump;
2030 const int align_jump_max_skip;
2031 const int align_func;
2034 static const struct ptt processor_target_table[PROCESSOR_max] =
2036 {&i386_cost, 4, 3, 4, 3, 4},
2037 {&i486_cost, 16, 15, 16, 15, 16},
2038 {&pentium_cost, 16, 7, 16, 7, 16},
2039 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2040 {&geode_cost, 0, 0, 0, 0, 0},
2041 {&k6_cost, 32, 7, 32, 7, 32},
2042 {&athlon_cost, 16, 7, 16, 7, 16},
2043 {&pentium4_cost, 0, 0, 0, 0, 0},
2044 {&k8_cost, 16, 7, 16, 7, 16},
2045 {&nocona_cost, 0, 0, 0, 0, 0},
2046 {&core2_cost, 16, 10, 16, 10, 16},
2047 {&generic32_cost, 16, 7, 16, 7, 16},
2048 {&generic64_cost, 16, 10, 16, 10, 16},
2049 {&amdfam10_cost, 32, 24, 32, 7, 32},
2050 {&atom_cost, 16, 7, 16, 7, 16}
2053 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2079 /* Implement TARGET_HANDLE_OPTION. */
2082 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2089 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2090 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2094 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2095 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2102 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2103 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2107 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2108 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2118 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2123 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2124 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2131 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2132 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2136 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2137 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2144 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2145 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2149 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2150 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2157 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2158 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2162 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2163 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2170 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2171 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2175 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2176 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2183 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2184 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2188 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2189 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2196 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2197 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2201 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2202 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2209 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2210 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2214 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2215 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2220 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2232 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2237 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2238 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2245 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2246 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2250 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2251 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2258 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2259 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2263 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2264 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2271 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2272 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2276 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2277 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2284 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2285 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2289 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2290 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2297 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2298 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2302 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2303 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2310 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2311 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2315 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2316 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2323 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2324 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2328 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2329 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2336 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2337 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2341 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2342 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2349 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2350 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2354 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2355 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2364 /* Return a string the documents the current -m options. The caller is
2365 responsible for freeing the string. */
2368 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2369 const char *fpmath, bool add_nl_p)
2371 struct ix86_target_opts
2373 const char *option; /* option string */
2374 int mask; /* isa mask options */
2377 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2378 preceding options while match those first. */
2379 static struct ix86_target_opts isa_opts[] =
2381 { "-m64", OPTION_MASK_ISA_64BIT },
2382 { "-msse5", OPTION_MASK_ISA_SSE5 },
2383 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2384 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2385 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2386 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2387 { "-msse3", OPTION_MASK_ISA_SSE3 },
2388 { "-msse2", OPTION_MASK_ISA_SSE2 },
2389 { "-msse", OPTION_MASK_ISA_SSE },
2390 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2391 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2392 { "-mmmx", OPTION_MASK_ISA_MMX },
2393 { "-mabm", OPTION_MASK_ISA_ABM },
2394 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2395 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2396 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2397 { "-maes", OPTION_MASK_ISA_AES },
2398 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2402 static struct ix86_target_opts flag_opts[] =
2404 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2405 { "-m80387", MASK_80387 },
2406 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2407 { "-malign-double", MASK_ALIGN_DOUBLE },
2408 { "-mcld", MASK_CLD },
2409 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2410 { "-mieee-fp", MASK_IEEE_FP },
2411 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2412 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2413 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2414 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2415 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2416 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2417 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2418 { "-mno-red-zone", MASK_NO_RED_ZONE },
2419 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2420 { "-mrecip", MASK_RECIP },
2421 { "-mrtd", MASK_RTD },
2422 { "-msseregparm", MASK_SSEREGPARM },
2423 { "-mstack-arg-probe", MASK_STACK_PROBE },
2424 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2427 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2430 char target_other[40];
2439 memset (opts, '\0', sizeof (opts));
2441 /* Add -march= option. */
2444 opts[num][0] = "-march=";
2445 opts[num++][1] = arch;
2448 /* Add -mtune= option. */
2451 opts[num][0] = "-mtune=";
2452 opts[num++][1] = tune;
2455 /* Pick out the options in isa options. */
2456 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2458 if ((isa & isa_opts[i].mask) != 0)
2460 opts[num++][0] = isa_opts[i].option;
2461 isa &= ~ isa_opts[i].mask;
2465 if (isa && add_nl_p)
2467 opts[num++][0] = isa_other;
2468 sprintf (isa_other, "(other isa: 0x%x)", isa);
2471 /* Add flag options. */
2472 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2474 if ((flags & flag_opts[i].mask) != 0)
2476 opts[num++][0] = flag_opts[i].option;
2477 flags &= ~ flag_opts[i].mask;
2481 if (flags && add_nl_p)
2483 opts[num++][0] = target_other;
2484 sprintf (target_other, "(other flags: 0x%x)", isa);
2487 /* Add -fpmath= option. */
2490 opts[num][0] = "-mfpmath=";
2491 opts[num++][1] = fpmath;
2498 gcc_assert (num < ARRAY_SIZE (opts));
2500 /* Size the string. */
2502 sep_len = (add_nl_p) ? 3 : 1;
2503 for (i = 0; i < num; i++)
2506 for (j = 0; j < 2; j++)
2508 len += strlen (opts[i][j]);
2511 /* Build the string. */
2512 ret = ptr = (char *) xmalloc (len);
2515 for (i = 0; i < num; i++)
2519 for (j = 0; j < 2; j++)
2520 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2527 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2535 for (j = 0; j < 2; j++)
2538 memcpy (ptr, opts[i][j], len2[j]);
2540 line_len += len2[j];
2545 gcc_assert (ret + len >= ptr);
2550 /* Function that is callable from the debugger to print the current
2553 ix86_debug_options (void)
2555 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2556 ix86_arch_string, ix86_tune_string,
2557 ix86_fpmath_string, true);
2561 fprintf (stderr, "%s\n\n", opts);
2565 fprintf (stderr, "<no options>\n\n");
2570 /* Sometimes certain combinations of command options do not make
2571 sense on a particular target machine. You can define a macro
2572 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2573 defined, is executed once just after all the command options have
2576 Don't use this macro to turn on various extra optimizations for
2577 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2580 override_options (bool main_args_p)
2583 unsigned int ix86_arch_mask, ix86_tune_mask;
2588 /* Comes from final.c -- no real reason to change it. */
2589 #define MAX_CODE_ALIGN 16
2597 PTA_PREFETCH_SSE = 1 << 4,
2599 PTA_3DNOW_A = 1 << 6,
2603 PTA_POPCNT = 1 << 10,
2605 PTA_SSE4A = 1 << 12,
2606 PTA_NO_SAHF = 1 << 13,
2607 PTA_SSE4_1 = 1 << 14,
2608 PTA_SSE4_2 = 1 << 15,
2611 PTA_PCLMUL = 1 << 18,
2619 const char *const name; /* processor name or nickname. */
2620 const enum processor_type processor;
2621 const enum attr_cpu schedule;
2622 const unsigned /*enum pta_flags*/ flags;
2624 const processor_alias_table[] =
2626 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2627 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2628 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2629 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2630 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2631 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2632 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2633 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2634 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2635 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2636 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2637 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2638 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2640 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2642 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2643 PTA_MMX | PTA_SSE | PTA_SSE2},
2644 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2645 PTA_MMX |PTA_SSE | PTA_SSE2},
2646 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2647 PTA_MMX | PTA_SSE | PTA_SSE2},
2648 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2649 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2650 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2651 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2652 | PTA_CX16 | PTA_NO_SAHF},
2653 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2654 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2655 | PTA_SSSE3 | PTA_CX16},
2656 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2657 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2658 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2659 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2660 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2661 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2662 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2663 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2664 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2665 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2666 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2667 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2668 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2669 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2670 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2671 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2672 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2673 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2674 {"x86-64", PROCESSOR_K8, CPU_K8,
2675 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2676 {"k8", PROCESSOR_K8, CPU_K8,
2677 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2678 | PTA_SSE2 | PTA_NO_SAHF},
2679 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2680 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2681 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2682 {"opteron", PROCESSOR_K8, CPU_K8,
2683 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2684 | PTA_SSE2 | PTA_NO_SAHF},
2685 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2686 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2687 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2688 {"athlon64", PROCESSOR_K8, CPU_K8,
2689 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2690 | PTA_SSE2 | PTA_NO_SAHF},
2691 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2692 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2693 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2694 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2695 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2696 | PTA_SSE2 | PTA_NO_SAHF},
2697 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2698 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2699 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2700 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2701 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2702 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2703 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2704 0 /* flags are only used for -march switch. */ },
2705 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2706 PTA_64BIT /* flags are only used for -march switch. */ },
2709 int const pta_size = ARRAY_SIZE (processor_alias_table);
2711 /* Set up prefix/suffix so the error messages refer to either the command
2712 line argument, or the attribute(target). */
2721 prefix = "option(\"";
2726 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2727 SUBTARGET_OVERRIDE_OPTIONS;
2730 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2731 SUBSUBTARGET_OVERRIDE_OPTIONS;
2734 /* -fPIC is the default for x86_64. */
2735 if (TARGET_MACHO && TARGET_64BIT)
2738 /* Set the default values for switches whose default depends on TARGET_64BIT
2739 in case they weren't overwritten by command line options. */
2742 /* Mach-O doesn't support omitting the frame pointer for now. */
2743 if (flag_omit_frame_pointer == 2)
2744 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2745 if (flag_asynchronous_unwind_tables == 2)
2746 flag_asynchronous_unwind_tables = 1;
2747 if (flag_pcc_struct_return == 2)
2748 flag_pcc_struct_return = 0;
2752 if (flag_omit_frame_pointer == 2)
2753 flag_omit_frame_pointer = 0;
2754 if (flag_asynchronous_unwind_tables == 2)
2755 flag_asynchronous_unwind_tables = 0;
2756 if (flag_pcc_struct_return == 2)
2757 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2760 /* Need to check -mtune=generic first. */
2761 if (ix86_tune_string)
2763 if (!strcmp (ix86_tune_string, "generic")
2764 || !strcmp (ix86_tune_string, "i686")
2765 /* As special support for cross compilers we read -mtune=native
2766 as -mtune=generic. With native compilers we won't see the
2767 -mtune=native, as it was changed by the driver. */
2768 || !strcmp (ix86_tune_string, "native"))
2771 ix86_tune_string = "generic64";
2773 ix86_tune_string = "generic32";
2775 /* If this call is for setting the option attribute, allow the
2776 generic32/generic64 that was previously set. */
2777 else if (!main_args_p
2778 && (!strcmp (ix86_tune_string, "generic32")
2779 || !strcmp (ix86_tune_string, "generic64")))
2781 else if (!strncmp (ix86_tune_string, "generic", 7))
2782 error ("bad value (%s) for %stune=%s %s",
2783 ix86_tune_string, prefix, suffix, sw);
2787 if (ix86_arch_string)
2788 ix86_tune_string = ix86_arch_string;
2789 if (!ix86_tune_string)
2791 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2792 ix86_tune_defaulted = 1;
2795 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2796 need to use a sensible tune option. */
2797 if (!strcmp (ix86_tune_string, "generic")
2798 || !strcmp (ix86_tune_string, "x86-64")
2799 || !strcmp (ix86_tune_string, "i686"))
2802 ix86_tune_string = "generic64";
2804 ix86_tune_string = "generic32";
2807 if (ix86_stringop_string)
2809 if (!strcmp (ix86_stringop_string, "rep_byte"))
2810 stringop_alg = rep_prefix_1_byte;
2811 else if (!strcmp (ix86_stringop_string, "libcall"))
2812 stringop_alg = libcall;
2813 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2814 stringop_alg = rep_prefix_4_byte;
2815 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2817 /* rep; movq isn't available in 32-bit code. */
2818 stringop_alg = rep_prefix_8_byte;
2819 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2820 stringop_alg = loop_1_byte;
2821 else if (!strcmp (ix86_stringop_string, "loop"))
2822 stringop_alg = loop;
2823 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2824 stringop_alg = unrolled_loop;
2826 error ("bad value (%s) for %sstringop-strategy=%s %s",
2827 ix86_stringop_string, prefix, suffix, sw);
2829 if (!strcmp (ix86_tune_string, "x86-64"))
2830 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2831 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2832 prefix, suffix, prefix, suffix, prefix, suffix);
2834 if (!ix86_arch_string)
2835 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2837 ix86_arch_specified = 1;
2839 if (!strcmp (ix86_arch_string, "generic"))
2840 error ("generic CPU can be used only for %stune=%s %s",
2841 prefix, suffix, sw);
2842 if (!strncmp (ix86_arch_string, "generic", 7))
2843 error ("bad value (%s) for %sarch=%s %s",
2844 ix86_arch_string, prefix, suffix, sw);
2846 /* Validate -mabi= value. */
2847 if (ix86_abi_string)
2849 if (strcmp (ix86_abi_string, "sysv") == 0)
2850 ix86_abi = SYSV_ABI;
2851 else if (strcmp (ix86_abi_string, "ms") == 0)
2854 error ("unknown ABI (%s) for %sabi=%s %s",
2855 ix86_abi_string, prefix, suffix, sw);
2858 ix86_abi = DEFAULT_ABI;
2860 if (ix86_cmodel_string != 0)
2862 if (!strcmp (ix86_cmodel_string, "small"))
2863 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2864 else if (!strcmp (ix86_cmodel_string, "medium"))
2865 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2866 else if (!strcmp (ix86_cmodel_string, "large"))
2867 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2869 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2870 else if (!strcmp (ix86_cmodel_string, "32"))
2871 ix86_cmodel = CM_32;
2872 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2873 ix86_cmodel = CM_KERNEL;
2875 error ("bad value (%s) for %scmodel=%s %s",
2876 ix86_cmodel_string, prefix, suffix, sw);
2880 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2881 use of rip-relative addressing. This eliminates fixups that
2882 would otherwise be needed if this object is to be placed in a
2883 DLL, and is essentially just as efficient as direct addressing. */
2884 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2885 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2886 else if (TARGET_64BIT)
2887 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2889 ix86_cmodel = CM_32;
2891 if (ix86_asm_string != 0)
2894 && !strcmp (ix86_asm_string, "intel"))
2895 ix86_asm_dialect = ASM_INTEL;
2896 else if (!strcmp (ix86_asm_string, "att"))
2897 ix86_asm_dialect = ASM_ATT;
2899 error ("bad value (%s) for %sasm=%s %s",
2900 ix86_asm_string, prefix, suffix, sw);
2902 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2903 error ("code model %qs not supported in the %s bit mode",
2904 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2905 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2906 sorry ("%i-bit mode not compiled in",
2907 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2909 for (i = 0; i < pta_size; i++)
2910 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2912 ix86_schedule = processor_alias_table[i].schedule;
2913 ix86_arch = processor_alias_table[i].processor;
2914 /* Default cpu tuning to the architecture. */
2915 ix86_tune = ix86_arch;
2917 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2918 error ("CPU you selected does not support x86-64 "
2921 if (processor_alias_table[i].flags & PTA_MMX
2922 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2923 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2924 if (processor_alias_table[i].flags & PTA_3DNOW
2925 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2926 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2927 if (processor_alias_table[i].flags & PTA_3DNOW_A
2928 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2929 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2930 if (processor_alias_table[i].flags & PTA_SSE
2931 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2932 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2933 if (processor_alias_table[i].flags & PTA_SSE2
2934 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2935 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2936 if (processor_alias_table[i].flags & PTA_SSE3
2937 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2938 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2939 if (processor_alias_table[i].flags & PTA_SSSE3
2940 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2941 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2942 if (processor_alias_table[i].flags & PTA_SSE4_1
2943 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2944 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2945 if (processor_alias_table[i].flags & PTA_SSE4_2
2946 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2947 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2948 if (processor_alias_table[i].flags & PTA_AVX
2949 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2950 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2951 if (processor_alias_table[i].flags & PTA_FMA
2952 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2953 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2954 if (processor_alias_table[i].flags & PTA_SSE4A
2955 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2956 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2957 if (processor_alias_table[i].flags & PTA_SSE5
2958 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2959 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2960 if (processor_alias_table[i].flags & PTA_ABM
2961 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2962 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2963 if (processor_alias_table[i].flags & PTA_CX16
2964 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2965 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2966 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2967 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2968 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2969 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2970 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2971 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2972 if (processor_alias_table[i].flags & PTA_MOVBE
2973 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
2974 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
2975 if (processor_alias_table[i].flags & PTA_AES
2976 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2977 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2978 if (processor_alias_table[i].flags & PTA_PCLMUL
2979 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2980 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2981 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2982 x86_prefetch_sse = true;
2988 error ("bad value (%s) for %sarch=%s %s",
2989 ix86_arch_string, prefix, suffix, sw);
2991 ix86_arch_mask = 1u << ix86_arch;
2992 for (i = 0; i < X86_ARCH_LAST; ++i)
2993 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2995 for (i = 0; i < pta_size; i++)
2996 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2998 ix86_schedule = processor_alias_table[i].schedule;
2999 ix86_tune = processor_alias_table[i].processor;
3000 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3002 if (ix86_tune_defaulted)
3004 ix86_tune_string = "x86-64";
3005 for (i = 0; i < pta_size; i++)
3006 if (! strcmp (ix86_tune_string,
3007 processor_alias_table[i].name))
3009 ix86_schedule = processor_alias_table[i].schedule;
3010 ix86_tune = processor_alias_table[i].processor;
3013 error ("CPU you selected does not support x86-64 "
3016 /* Intel CPUs have always interpreted SSE prefetch instructions as
3017 NOPs; so, we can enable SSE prefetch instructions even when
3018 -mtune (rather than -march) points us to a processor that has them.
3019 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3020 higher processors. */
3022 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3023 x86_prefetch_sse = true;
3027 error ("bad value (%s) for %stune=%s %s",
3028 ix86_tune_string, prefix, suffix, sw);
3030 ix86_tune_mask = 1u << ix86_tune;
3031 for (i = 0; i < X86_TUNE_LAST; ++i)
3032 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3035 ix86_cost = &ix86_size_cost;
3037 ix86_cost = processor_target_table[ix86_tune].cost;
3039 /* Arrange to set up i386_stack_locals for all functions. */
3040 init_machine_status = ix86_init_machine_status;
3042 /* Validate -mregparm= value. */
3043 if (ix86_regparm_string)
3046 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3047 i = atoi (ix86_regparm_string);
3048 if (i < 0 || i > REGPARM_MAX)
3049 error ("%sregparm=%d%s is not between 0 and %d",
3050 prefix, i, suffix, REGPARM_MAX);
3055 ix86_regparm = REGPARM_MAX;
3057 /* If the user has provided any of the -malign-* options,
3058 warn and use that value only if -falign-* is not set.
3059 Remove this code in GCC 3.2 or later. */
3060 if (ix86_align_loops_string)
3062 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3063 prefix, suffix, suffix);
3064 if (align_loops == 0)
3066 i = atoi (ix86_align_loops_string);
3067 if (i < 0 || i > MAX_CODE_ALIGN)
3068 error ("%salign-loops=%d%s is not between 0 and %d",
3069 prefix, i, suffix, MAX_CODE_ALIGN);
3071 align_loops = 1 << i;
3075 if (ix86_align_jumps_string)
3077 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3078 prefix, suffix, suffix);
3079 if (align_jumps == 0)
3081 i = atoi (ix86_align_jumps_string);
3082 if (i < 0 || i > MAX_CODE_ALIGN)
3083 error ("%salign-loops=%d%s is not between 0 and %d",
3084 prefix, i, suffix, MAX_CODE_ALIGN);
3086 align_jumps = 1 << i;
3090 if (ix86_align_funcs_string)
3092 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3093 prefix, suffix, suffix);
3094 if (align_functions == 0)
3096 i = atoi (ix86_align_funcs_string);
3097 if (i < 0 || i > MAX_CODE_ALIGN)
3098 error ("%salign-loops=%d%s is not between 0 and %d",
3099 prefix, i, suffix, MAX_CODE_ALIGN);
3101 align_functions = 1 << i;
3105 /* Default align_* from the processor table. */
3106 if (align_loops == 0)
3108 align_loops = processor_target_table[ix86_tune].align_loop;
3109 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3111 if (align_jumps == 0)
3113 align_jumps = processor_target_table[ix86_tune].align_jump;
3114 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3116 if (align_functions == 0)
3118 align_functions = processor_target_table[ix86_tune].align_func;
3121 /* Validate -mbranch-cost= value, or provide default. */
3122 ix86_branch_cost = ix86_cost->branch_cost;
3123 if (ix86_branch_cost_string)
3125 i = atoi (ix86_branch_cost_string);
3127 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3129 ix86_branch_cost = i;
3131 if (ix86_section_threshold_string)
3133 i = atoi (ix86_section_threshold_string);
3135 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3137 ix86_section_threshold = i;
3140 if (ix86_tls_dialect_string)
3142 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3143 ix86_tls_dialect = TLS_DIALECT_GNU;
3144 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3145 ix86_tls_dialect = TLS_DIALECT_GNU2;
3146 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3147 ix86_tls_dialect = TLS_DIALECT_SUN;
3149 error ("bad value (%s) for %stls-dialect=%s %s",
3150 ix86_tls_dialect_string, prefix, suffix, sw);
3153 if (ix87_precision_string)
3155 i = atoi (ix87_precision_string);
3156 if (i != 32 && i != 64 && i != 80)
3157 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3162 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3164 /* Enable by default the SSE and MMX builtins. Do allow the user to
3165 explicitly disable any of these. In particular, disabling SSE and
3166 MMX for kernel code is extremely useful. */
3167 if (!ix86_arch_specified)
3169 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3170 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3173 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3177 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3179 if (!ix86_arch_specified)
3181 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3183 /* i386 ABI does not specify red zone. It still makes sense to use it
3184 when programmer takes care to stack from being destroyed. */
3185 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3186 target_flags |= MASK_NO_RED_ZONE;
3189 /* Keep nonleaf frame pointers. */
3190 if (flag_omit_frame_pointer)
3191 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3192 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3193 flag_omit_frame_pointer = 1;
3195 /* If we're doing fast math, we don't care about comparison order
3196 wrt NaNs. This lets us use a shorter comparison sequence. */
3197 if (flag_finite_math_only)
3198 target_flags &= ~MASK_IEEE_FP;
3200 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3201 since the insns won't need emulation. */
3202 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3203 target_flags &= ~MASK_NO_FANCY_MATH_387;
3205 /* Likewise, if the target doesn't have a 387, or we've specified
3206 software floating point, don't use 387 inline intrinsics. */
3208 target_flags |= MASK_NO_FANCY_MATH_387;
3210 /* Turn on MMX builtins for -msse. */
3213 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3214 x86_prefetch_sse = true;
3217 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3218 if (TARGET_SSE4_2 || TARGET_ABM)
3219 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3221 /* Validate -mpreferred-stack-boundary= value or default it to
3222 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3223 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3224 if (ix86_preferred_stack_boundary_string)
3226 i = atoi (ix86_preferred_stack_boundary_string);
3227 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3228 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3229 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3231 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3234 /* Set the default value for -mstackrealign. */
3235 if (ix86_force_align_arg_pointer == -1)
3236 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3238 /* Validate -mincoming-stack-boundary= value or default it to
3239 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3240 if (ix86_force_align_arg_pointer)
3241 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3243 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3244 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3245 if (ix86_incoming_stack_boundary_string)
3247 i = atoi (ix86_incoming_stack_boundary_string);
3248 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3249 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3250 i, TARGET_64BIT ? 4 : 2);
3253 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3254 ix86_incoming_stack_boundary
3255 = ix86_user_incoming_stack_boundary;
3259 /* Accept -msseregparm only if at least SSE support is enabled. */
3260 if (TARGET_SSEREGPARM
3262 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3264 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3265 if (ix86_fpmath_string != 0)
3267 if (! strcmp (ix86_fpmath_string, "387"))
3268 ix86_fpmath = FPMATH_387;
3269 else if (! strcmp (ix86_fpmath_string, "sse"))
3273 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3274 ix86_fpmath = FPMATH_387;
3277 ix86_fpmath = FPMATH_SSE;
3279 else if (! strcmp (ix86_fpmath_string, "387,sse")
3280 || ! strcmp (ix86_fpmath_string, "387+sse")
3281 || ! strcmp (ix86_fpmath_string, "sse,387")
3282 || ! strcmp (ix86_fpmath_string, "sse+387")
3283 || ! strcmp (ix86_fpmath_string, "both"))
3287 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3288 ix86_fpmath = FPMATH_387;
3290 else if (!TARGET_80387)
3292 warning (0, "387 instruction set disabled, using SSE arithmetics");
3293 ix86_fpmath = FPMATH_SSE;
3296 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3299 error ("bad value (%s) for %sfpmath=%s %s",
3300 ix86_fpmath_string, prefix, suffix, sw);
3303 /* If the i387 is disabled, then do not return values in it. */
3305 target_flags &= ~MASK_FLOAT_RETURNS;
3307 /* Use external vectorized library in vectorizing intrinsics. */
3308 if (ix86_veclibabi_string)
3310 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3311 ix86_veclib_handler = ix86_veclibabi_svml;
3312 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3313 ix86_veclib_handler = ix86_veclibabi_acml;
3315 error ("unknown vectorization library ABI type (%s) for "
3316 "%sveclibabi=%s %s", ix86_veclibabi_string,
3317 prefix, suffix, sw);
3320 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3321 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3323 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3325 /* ??? Unwind info is not correct around the CFG unless either a frame
3326 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3327 unwind info generation to be aware of the CFG and propagating states
3329 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3330 || flag_exceptions || flag_non_call_exceptions)
3331 && flag_omit_frame_pointer
3332 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3334 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3335 warning (0, "unwind tables currently require either a frame pointer "
3336 "or %saccumulate-outgoing-args%s for correctness",
3338 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3341 /* If stack probes are required, the space used for large function
3342 arguments on the stack must also be probed, so enable
3343 -maccumulate-outgoing-args so this happens in the prologue. */
3344 if (TARGET_STACK_PROBE
3345 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3347 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3348 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3349 "for correctness", prefix, suffix);
3350 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3353 /* For sane SSE instruction set generation we need fcomi instruction.
3354 It is safe to enable all CMOVE instructions. */
3358 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3361 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3362 p = strchr (internal_label_prefix, 'X');
3363 internal_label_prefix_len = p - internal_label_prefix;
3367 /* When scheduling description is not available, disable scheduler pass
3368 so it won't slow down the compilation and make x87 code slower. */
3369 if (!TARGET_SCHEDULE)
3370 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3372 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3373 set_param_value ("simultaneous-prefetches",
3374 ix86_cost->simultaneous_prefetches);
3375 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3376 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3377 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3378 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3379 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3380 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3382 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3383 can be optimized to ap = __builtin_next_arg (0). */
3385 targetm.expand_builtin_va_start = NULL;
3389 ix86_gen_leave = gen_leave_rex64;
3390 ix86_gen_pop1 = gen_popdi1;
3391 ix86_gen_add3 = gen_adddi3;
3392 ix86_gen_sub3 = gen_subdi3;
3393 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3394 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3395 ix86_gen_monitor = gen_sse3_monitor64;
3396 ix86_gen_andsp = gen_anddi3;
3400 ix86_gen_leave = gen_leave;
3401 ix86_gen_pop1 = gen_popsi1;
3402 ix86_gen_add3 = gen_addsi3;
3403 ix86_gen_sub3 = gen_subsi3;
3404 ix86_gen_sub3_carry = gen_subsi3_carry;
3405 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3406 ix86_gen_monitor = gen_sse3_monitor;
3407 ix86_gen_andsp = gen_andsi3;
3411 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3413 target_flags |= MASK_CLD & ~target_flags_explicit;
3416 /* Save the initial options in case the user does function specific options */
3418 target_option_default_node = target_option_current_node
3419 = build_target_option_node ();
3422 /* Update register usage after having seen the compiler flags. */
3425 ix86_conditional_register_usage (void)
3430 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3432 if (fixed_regs[i] > 1)
3433 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3434 if (call_used_regs[i] > 1)
3435 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3438 /* The PIC register, if it exists, is fixed. */
3439 j = PIC_OFFSET_TABLE_REGNUM;
3440 if (j != INVALID_REGNUM)
3441 fixed_regs[j] = call_used_regs[j] = 1;
3443 /* The MS_ABI changes the set of call-used registers. */
3444 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3446 call_used_regs[SI_REG] = 0;
3447 call_used_regs[DI_REG] = 0;
3448 call_used_regs[XMM6_REG] = 0;
3449 call_used_regs[XMM7_REG] = 0;
3450 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3451 call_used_regs[i] = 0;
3454 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3455 other call-clobbered regs for 64-bit. */
3458 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3460 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3461 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3462 && call_used_regs[i])
3463 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3466 /* If MMX is disabled, squash the registers. */
3468 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3469 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3470 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3472 /* If SSE is disabled, squash the registers. */
3474 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3475 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3476 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3478 /* If the FPU is disabled, squash the registers. */
3479 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3480 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3481 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3482 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3484 /* If 32-bit, squash the 64-bit registers. */
3487 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3489 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3495 /* Save the current options */
3498 ix86_function_specific_save (struct cl_target_option *ptr)
3500 ptr->arch = ix86_arch;
3501 ptr->schedule = ix86_schedule;
3502 ptr->tune = ix86_tune;
3503 ptr->fpmath = ix86_fpmath;
3504 ptr->branch_cost = ix86_branch_cost;
3505 ptr->tune_defaulted = ix86_tune_defaulted;
3506 ptr->arch_specified = ix86_arch_specified;
3507 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3508 ptr->target_flags_explicit = target_flags_explicit;
3510 /* The fields are char but the variables are not; make sure the
3511 values fit in the fields. */
3512 gcc_assert (ptr->arch == ix86_arch);
3513 gcc_assert (ptr->schedule == ix86_schedule);
3514 gcc_assert (ptr->tune == ix86_tune);
3515 gcc_assert (ptr->fpmath == ix86_fpmath);
3516 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3519 /* Restore the current options */
3522 ix86_function_specific_restore (struct cl_target_option *ptr)
3524 enum processor_type old_tune = ix86_tune;
3525 enum processor_type old_arch = ix86_arch;
3526 unsigned int ix86_arch_mask, ix86_tune_mask;
3529 ix86_arch = (enum processor_type) ptr->arch;
3530 ix86_schedule = (enum attr_cpu) ptr->schedule;
3531 ix86_tune = (enum processor_type) ptr->tune;
3532 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3533 ix86_branch_cost = ptr->branch_cost;
3534 ix86_tune_defaulted = ptr->tune_defaulted;
3535 ix86_arch_specified = ptr->arch_specified;
3536 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3537 target_flags_explicit = ptr->target_flags_explicit;
3539 /* Recreate the arch feature tests if the arch changed */
3540 if (old_arch != ix86_arch)
3542 ix86_arch_mask = 1u << ix86_arch;
3543 for (i = 0; i < X86_ARCH_LAST; ++i)
3544 ix86_arch_features[i]
3545 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3548 /* Recreate the tune optimization tests */
3549 if (old_tune != ix86_tune)
3551 ix86_tune_mask = 1u << ix86_tune;
3552 for (i = 0; i < X86_TUNE_LAST; ++i)
3553 ix86_tune_features[i]
3554 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3558 /* Print the current options */
3561 ix86_function_specific_print (FILE *file, int indent,
3562 struct cl_target_option *ptr)
3565 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3566 NULL, NULL, NULL, false);
3568 fprintf (file, "%*sarch = %d (%s)\n",
3571 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3572 ? cpu_names[ptr->arch]
3575 fprintf (file, "%*stune = %d (%s)\n",
3578 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3579 ? cpu_names[ptr->tune]
3582 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3583 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3584 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3585 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3589 fprintf (file, "%*s%s\n", indent, "", target_string);
3590 free (target_string);
3595 /* Inner function to process the attribute((target(...))), take an argument and
3596 set the current options from the argument. If we have a list, recursively go
3600 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3605 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3606 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3607 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3608 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3623 enum ix86_opt_type type;
3628 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3629 IX86_ATTR_ISA ("abm", OPT_mabm),
3630 IX86_ATTR_ISA ("aes", OPT_maes),
3631 IX86_ATTR_ISA ("avx", OPT_mavx),
3632 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3633 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3634 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3635 IX86_ATTR_ISA ("sse", OPT_msse),
3636 IX86_ATTR_ISA ("sse2", OPT_msse2),
3637 IX86_ATTR_ISA ("sse3", OPT_msse3),
3638 IX86_ATTR_ISA ("sse4", OPT_msse4),
3639 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3640 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3641 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3642 IX86_ATTR_ISA ("sse5", OPT_msse5),
3643 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3645 /* string options */
3646 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3647 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3648 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3651 IX86_ATTR_YES ("cld",
3655 IX86_ATTR_NO ("fancy-math-387",
3656 OPT_mfancy_math_387,
3657 MASK_NO_FANCY_MATH_387),
3659 IX86_ATTR_NO ("fused-madd",
3661 MASK_NO_FUSED_MADD),
3663 IX86_ATTR_YES ("ieee-fp",
3667 IX86_ATTR_YES ("inline-all-stringops",
3668 OPT_minline_all_stringops,
3669 MASK_INLINE_ALL_STRINGOPS),
3671 IX86_ATTR_YES ("inline-stringops-dynamically",
3672 OPT_minline_stringops_dynamically,
3673 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3675 IX86_ATTR_NO ("align-stringops",
3676 OPT_mno_align_stringops,
3677 MASK_NO_ALIGN_STRINGOPS),
3679 IX86_ATTR_YES ("recip",
3685 /* If this is a list, recurse to get the options. */
3686 if (TREE_CODE (args) == TREE_LIST)
3690 for (; args; args = TREE_CHAIN (args))
3691 if (TREE_VALUE (args)
3692 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3698 else if (TREE_CODE (args) != STRING_CST)
3701 /* Handle multiple arguments separated by commas. */
3702 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3704 while (next_optstr && *next_optstr != '\0')
3706 char *p = next_optstr;
3708 char *comma = strchr (next_optstr, ',');
3709 const char *opt_string;
3710 size_t len, opt_len;
3715 enum ix86_opt_type type = ix86_opt_unknown;
3721 len = comma - next_optstr;
3722 next_optstr = comma + 1;
3730 /* Recognize no-xxx. */
3731 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3740 /* Find the option. */
3743 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3745 type = attrs[i].type;
3746 opt_len = attrs[i].len;
3747 if (ch == attrs[i].string[0]
3748 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3749 && memcmp (p, attrs[i].string, opt_len) == 0)
3752 mask = attrs[i].mask;
3753 opt_string = attrs[i].string;
3758 /* Process the option. */
3761 error ("attribute(target(\"%s\")) is unknown", orig_p);
3765 else if (type == ix86_opt_isa)
3766 ix86_handle_option (opt, p, opt_set_p);
3768 else if (type == ix86_opt_yes || type == ix86_opt_no)
3770 if (type == ix86_opt_no)
3771 opt_set_p = !opt_set_p;
3774 target_flags |= mask;
3776 target_flags &= ~mask;
3779 else if (type == ix86_opt_str)
3783 error ("option(\"%s\") was already specified", opt_string);
3787 p_strings[opt] = xstrdup (p + opt_len);
3797 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3800 ix86_valid_target_attribute_tree (tree args)
3802 const char *orig_arch_string = ix86_arch_string;
3803 const char *orig_tune_string = ix86_tune_string;
3804 const char *orig_fpmath_string = ix86_fpmath_string;
3805 int orig_tune_defaulted = ix86_tune_defaulted;
3806 int orig_arch_specified = ix86_arch_specified;
3807 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3810 struct cl_target_option *def
3811 = TREE_TARGET_OPTION (target_option_default_node);
3813 /* Process each of the options on the chain. */
3814 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3817 /* If the changed options are different from the default, rerun override_options,
3818 and then save the options away. The string options are are attribute options,
3819 and will be undone when we copy the save structure. */
3820 if (ix86_isa_flags != def->ix86_isa_flags
3821 || target_flags != def->target_flags
3822 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3823 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3824 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3826 /* If we are using the default tune= or arch=, undo the string assigned,
3827 and use the default. */
3828 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3829 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3830 else if (!orig_arch_specified)
3831 ix86_arch_string = NULL;
3833 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3834 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3835 else if (orig_tune_defaulted)
3836 ix86_tune_string = NULL;
3838 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3839 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3840 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3841 else if (!TARGET_64BIT && TARGET_SSE)
3842 ix86_fpmath_string = "sse,387";
3844 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3845 override_options (false);
3847 /* Add any builtin functions with the new isa if any. */
3848 ix86_add_new_builtins (ix86_isa_flags);
3850 /* Save the current options unless we are validating options for
3852 t = build_target_option_node ();
3854 ix86_arch_string = orig_arch_string;
3855 ix86_tune_string = orig_tune_string;
3856 ix86_fpmath_string = orig_fpmath_string;
3858 /* Free up memory allocated to hold the strings */
3859 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3860 if (option_strings[i])
3861 free (option_strings[i]);
3867 /* Hook to validate attribute((target("string"))). */
3870 ix86_valid_target_attribute_p (tree fndecl,
3871 tree ARG_UNUSED (name),
3873 int ARG_UNUSED (flags))
3875 struct cl_target_option cur_target;
3877 tree old_optimize = build_optimization_node ();
3878 tree new_target, new_optimize;
3879 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3881 /* If the function changed the optimization levels as well as setting target
3882 options, start with the optimizations specified. */
3883 if (func_optimize && func_optimize != old_optimize)
3884 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3886 /* The target attributes may also change some optimization flags, so update
3887 the optimization options if necessary. */
3888 cl_target_option_save (&cur_target);
3889 new_target = ix86_valid_target_attribute_tree (args);
3890 new_optimize = build_optimization_node ();
3897 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3899 if (old_optimize != new_optimize)
3900 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3903 cl_target_option_restore (&cur_target);
3905 if (old_optimize != new_optimize)
3906 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3912 /* Hook to determine if one function can safely inline another. */
3915 ix86_can_inline_p (tree caller, tree callee)
3918 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3919 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3921 /* If callee has no option attributes, then it is ok to inline. */
3925 /* If caller has no option attributes, but callee does then it is not ok to
3927 else if (!caller_tree)
3932 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3933 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3935 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3936 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3938 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3939 != callee_opts->ix86_isa_flags)
3942 /* See if we have the same non-isa options. */
3943 else if (caller_opts->target_flags != callee_opts->target_flags)
3946 /* See if arch, tune, etc. are the same. */
3947 else if (caller_opts->arch != callee_opts->arch)
3950 else if (caller_opts->tune != callee_opts->tune)
3953 else if (caller_opts->fpmath != callee_opts->fpmath)
3956 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3967 /* Remember the last target of ix86_set_current_function. */
3968 static GTY(()) tree ix86_previous_fndecl;
3970 /* Establish appropriate back-end context for processing the function
3971 FNDECL. The argument might be NULL to indicate processing at top
3972 level, outside of any function scope. */
3974 ix86_set_current_function (tree fndecl)
3976 /* Only change the context if the function changes. This hook is called
3977 several times in the course of compiling a function, and we don't want to
3978 slow things down too much or call target_reinit when it isn't safe. */
3979 if (fndecl && fndecl != ix86_previous_fndecl)
3981 tree old_tree = (ix86_previous_fndecl
3982 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3985 tree new_tree = (fndecl
3986 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3989 ix86_previous_fndecl = fndecl;
3990 if (old_tree == new_tree)
3995 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4001 struct cl_target_option *def
4002 = TREE_TARGET_OPTION (target_option_current_node);
4004 cl_target_option_restore (def);
4011 /* Return true if this goes in large data/bss. */
4014 ix86_in_large_data_p (tree exp)
4016 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4019 /* Functions are never large data. */
4020 if (TREE_CODE (exp) == FUNCTION_DECL)
4023 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4025 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4026 if (strcmp (section, ".ldata") == 0
4027 || strcmp (section, ".lbss") == 0)
4033 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4035 /* If this is an incomplete type with size 0, then we can't put it
4036 in data because it might be too big when completed. */
4037 if (!size || size > ix86_section_threshold)
4044 /* Switch to the appropriate section for output of DECL.
4045 DECL is either a `VAR_DECL' node or a constant of some sort.
4046 RELOC indicates whether forming the initial value of DECL requires
4047 link-time relocations. */
4049 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4053 x86_64_elf_select_section (tree decl, int reloc,
4054 unsigned HOST_WIDE_INT align)
4056 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4057 && ix86_in_large_data_p (decl))
4059 const char *sname = NULL;
4060 unsigned int flags = SECTION_WRITE;
4061 switch (categorize_decl_for_section (decl, reloc))
4066 case SECCAT_DATA_REL:
4067 sname = ".ldata.rel";
4069 case SECCAT_DATA_REL_LOCAL:
4070 sname = ".ldata.rel.local";
4072 case SECCAT_DATA_REL_RO:
4073 sname = ".ldata.rel.ro";
4075 case SECCAT_DATA_REL_RO_LOCAL:
4076 sname = ".ldata.rel.ro.local";
4080 flags |= SECTION_BSS;
4083 case SECCAT_RODATA_MERGE_STR:
4084 case SECCAT_RODATA_MERGE_STR_INIT:
4085 case SECCAT_RODATA_MERGE_CONST:
4089 case SECCAT_SRODATA:
4096 /* We don't split these for medium model. Place them into
4097 default sections and hope for best. */
4099 case SECCAT_EMUTLS_VAR:
4100 case SECCAT_EMUTLS_TMPL:
4105 /* We might get called with string constants, but get_named_section
4106 doesn't like them as they are not DECLs. Also, we need to set
4107 flags in that case. */
4109 return get_section (sname, flags, NULL);
4110 return get_named_section (decl, sname, reloc);
4113 return default_elf_select_section (decl, reloc, align);
4116 /* Build up a unique section name, expressed as a
4117 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4118 RELOC indicates whether the initial value of EXP requires
4119 link-time relocations. */
4121 static void ATTRIBUTE_UNUSED
4122 x86_64_elf_unique_section (tree decl, int reloc)
4124 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4125 && ix86_in_large_data_p (decl))
4127 const char *prefix = NULL;
4128 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4129 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4131 switch (categorize_decl_for_section (decl, reloc))
4134 case SECCAT_DATA_REL:
4135 case SECCAT_DATA_REL_LOCAL:
4136 case SECCAT_DATA_REL_RO:
4137 case SECCAT_DATA_REL_RO_LOCAL:
4138 prefix = one_only ? ".ld" : ".ldata";
4141 prefix = one_only ? ".lb" : ".lbss";
4144 case SECCAT_RODATA_MERGE_STR:
4145 case SECCAT_RODATA_MERGE_STR_INIT:
4146 case SECCAT_RODATA_MERGE_CONST:
4147 prefix = one_only ? ".lr" : ".lrodata";
4149 case SECCAT_SRODATA:
4156 /* We don't split these for medium model. Place them into
4157 default sections and hope for best. */
4159 case SECCAT_EMUTLS_VAR:
4160 prefix = targetm.emutls.var_section;
4162 case SECCAT_EMUTLS_TMPL:
4163 prefix = targetm.emutls.tmpl_section;
4168 const char *name, *linkonce;
4171 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4172 name = targetm.strip_name_encoding (name);
4174 /* If we're using one_only, then there needs to be a .gnu.linkonce
4175 prefix to the section name. */
4176 linkonce = one_only ? ".gnu.linkonce" : "";
4178 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4180 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4184 default_unique_section (decl, reloc);
4187 #ifdef COMMON_ASM_OP
4188 /* This says how to output assembler code to declare an
4189 uninitialized external linkage data object.
4191 For medium model x86-64 we need to use .largecomm opcode for
4194 x86_elf_aligned_common (FILE *file,
4195 const char *name, unsigned HOST_WIDE_INT size,
4198 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4199 && size > (unsigned int)ix86_section_threshold)
4200 fprintf (file, ".largecomm\t");
4202 fprintf (file, "%s", COMMON_ASM_OP);
4203 assemble_name (file, name);
4204 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4205 size, align / BITS_PER_UNIT);
4209 /* Utility function for targets to use in implementing
4210 ASM_OUTPUT_ALIGNED_BSS. */
4213 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4214 const char *name, unsigned HOST_WIDE_INT size,
4217 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4218 && size > (unsigned int)ix86_section_threshold)
4219 switch_to_section (get_named_section (decl, ".lbss", 0));
4221 switch_to_section (bss_section);
4222 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4223 #ifdef ASM_DECLARE_OBJECT_NAME
4224 last_assemble_variable_decl = decl;
4225 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4227 /* Standard thing is just output label for the object. */
4228 ASM_OUTPUT_LABEL (file, name);
4229 #endif /* ASM_DECLARE_OBJECT_NAME */
4230 ASM_OUTPUT_SKIP (file, size ? size : 1);
4234 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4236 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4237 make the problem with not enough registers even worse. */
4238 #ifdef INSN_SCHEDULING
4240 flag_schedule_insns = 0;
4244 /* The Darwin libraries never set errno, so we might as well
4245 avoid calling them when that's the only reason we would. */
4246 flag_errno_math = 0;
4248 /* The default values of these switches depend on the TARGET_64BIT
4249 that is not known at this moment. Mark these values with 2 and
4250 let user the to override these. In case there is no command line option
4251 specifying them, we will set the defaults in override_options. */
4253 flag_omit_frame_pointer = 2;
4254 flag_pcc_struct_return = 2;
4255 flag_asynchronous_unwind_tables = 2;
4256 flag_vect_cost_model = 1;
4257 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4258 SUBTARGET_OPTIMIZATION_OPTIONS;
4262 /* Decide whether we can make a sibling call to a function. DECL is the
4263 declaration of the function being targeted by the call and EXP is the
4264 CALL_EXPR representing the call. */
4267 ix86_function_ok_for_sibcall (tree decl, tree exp)
4269 tree type, decl_or_type;
4272 /* If we are generating position-independent code, we cannot sibcall
4273 optimize any indirect call, or a direct call to a global function,
4274 as the PLT requires %ebx be live. */
4275 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4278 /* If we need to align the outgoing stack, then sibcalling would
4279 unalign the stack, which may break the called function. */
4280 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4285 decl_or_type = decl;
4286 type = TREE_TYPE (decl);
4290 /* We're looking at the CALL_EXPR, we need the type of the function. */
4291 type = CALL_EXPR_FN (exp); /* pointer expression */
4292 type = TREE_TYPE (type); /* pointer type */
4293 type = TREE_TYPE (type); /* function type */
4294 decl_or_type = type;
4297 /* Check that the return value locations are the same. Like
4298 if we are returning floats on the 80387 register stack, we cannot
4299 make a sibcall from a function that doesn't return a float to a
4300 function that does or, conversely, from a function that does return
4301 a float to a function that doesn't; the necessary stack adjustment
4302 would not be executed. This is also the place we notice
4303 differences in the return value ABI. Note that it is ok for one
4304 of the functions to have void return type as long as the return
4305 value of the other is passed in a register. */
4306 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4307 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4309 if (STACK_REG_P (a) || STACK_REG_P (b))
4311 if (!rtx_equal_p (a, b))
4314 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4316 else if (!rtx_equal_p (a, b))
4321 /* The SYSV ABI has more call-clobbered registers;
4322 disallow sibcalls from MS to SYSV. */
4323 if (cfun->machine->call_abi == MS_ABI
4324 && ix86_function_type_abi (type) == SYSV_ABI)
4329 /* If this call is indirect, we'll need to be able to use a
4330 call-clobbered register for the address of the target function.
4331 Make sure that all such registers are not used for passing
4332 parameters. Note that DLLIMPORT functions are indirect. */
4334 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4336 if (ix86_function_regparm (type, NULL) >= 3)
4338 /* ??? Need to count the actual number of registers to be used,
4339 not the possible number of registers. Fix later. */
4345 /* Otherwise okay. That also includes certain types of indirect calls. */
4349 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4350 calling convention attributes;
4351 arguments as in struct attribute_spec.handler. */
4354 ix86_handle_cconv_attribute (tree *node, tree name,
4356 int flags ATTRIBUTE_UNUSED,
4359 if (TREE_CODE (*node) != FUNCTION_TYPE
4360 && TREE_CODE (*node) != METHOD_TYPE
4361 && TREE_CODE (*node) != FIELD_DECL
4362 && TREE_CODE (*node) != TYPE_DECL)
4364 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4366 *no_add_attrs = true;
4370 /* Can combine regparm with all attributes but fastcall. */
4371 if (is_attribute_p ("regparm", name))
4375 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4377 error ("fastcall and regparm attributes are not compatible");
4380 cst = TREE_VALUE (args);
4381 if (TREE_CODE (cst) != INTEGER_CST)
4383 warning (OPT_Wattributes,
4384 "%qE attribute requires an integer constant argument",
4386 *no_add_attrs = true;
4388 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4390 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4392 *no_add_attrs = true;
4400 /* Do not warn when emulating the MS ABI. */
4401 if (TREE_CODE (*node) != FUNCTION_TYPE
4402 || ix86_function_type_abi (*node) != MS_ABI)
4403 warning (OPT_Wattributes, "%qE attribute ignored",
4405 *no_add_attrs = true;
4409 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4410 if (is_attribute_p ("fastcall", name))
4412 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4414 error ("fastcall and cdecl attributes are not compatible");
4416 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4418 error ("fastcall and stdcall attributes are not compatible");
4420 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4422 error ("fastcall and regparm attributes are not compatible");
4426 /* Can combine stdcall with fastcall (redundant), regparm and
4428 else if (is_attribute_p ("stdcall", name))
4430 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4432 error ("stdcall and cdecl attributes are not compatible");
4434 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4436 error ("stdcall and fastcall attributes are not compatible");
4440 /* Can combine cdecl with regparm and sseregparm. */
4441 else if (is_attribute_p ("cdecl", name))
4443 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4445 error ("stdcall and cdecl attributes are not compatible");
4447 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4449 error ("fastcall and cdecl attributes are not compatible");
4453 /* Can combine sseregparm with all attributes. */
4458 /* Return 0 if the attributes for two types are incompatible, 1 if they
4459 are compatible, and 2 if they are nearly compatible (which causes a
4460 warning to be generated). */
4463 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4465 /* Check for mismatch of non-default calling convention. */
4466 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4468 if (TREE_CODE (type1) != FUNCTION_TYPE
4469 && TREE_CODE (type1) != METHOD_TYPE)
4472 /* Check for mismatched fastcall/regparm types. */
4473 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4474 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4475 || (ix86_function_regparm (type1, NULL)
4476 != ix86_function_regparm (type2, NULL)))
4479 /* Check for mismatched sseregparm types. */
4480 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4481 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4484 /* Check for mismatched return types (cdecl vs stdcall). */
4485 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4486 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4492 /* Return the regparm value for a function with the indicated TYPE and DECL.
4493 DECL may be NULL when calling function indirectly
4494 or considering a libcall. */
4497 ix86_function_regparm (const_tree type, const_tree decl)
4502 static bool error_issued;
4505 return (ix86_function_type_abi (type) == SYSV_ABI
4506 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4508 regparm = ix86_regparm;
4509 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4513 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4515 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4517 /* We can't use regparm(3) for nested functions because
4518 these pass static chain pointer in %ecx register. */
4519 if (!error_issued && regparm == 3
4520 && decl_function_context (decl)
4521 && !DECL_NO_STATIC_CHAIN (decl))
4523 error ("nested functions are limited to 2 register parameters");
4524 error_issued = true;
4532 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4535 /* Use register calling convention for local functions when possible. */
4537 && TREE_CODE (decl) == FUNCTION_DECL
4541 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4542 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4545 int local_regparm, globals = 0, regno;
4548 /* Make sure no regparm register is taken by a
4549 fixed register variable. */
4550 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4551 if (fixed_regs[local_regparm])
4554 /* We can't use regparm(3) for nested functions as these use
4555 static chain pointer in third argument. */
4556 if (local_regparm == 3
4557 && decl_function_context (decl)
4558 && !DECL_NO_STATIC_CHAIN (decl))
4561 /* If the function realigns its stackpointer, the prologue will
4562 clobber %ecx. If we've already generated code for the callee,
4563 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4564 scanning the attributes for the self-realigning property. */
4565 f = DECL_STRUCT_FUNCTION (decl);
4566 /* Since current internal arg pointer won't conflict with
4567 parameter passing regs, so no need to change stack
4568 realignment and adjust regparm number.
4570 Each fixed register usage increases register pressure,
4571 so less registers should be used for argument passing.
4572 This functionality can be overriden by an explicit
4574 for (regno = 0; regno <= DI_REG; regno++)
4575 if (fixed_regs[regno])
4579 = globals < local_regparm ? local_regparm - globals : 0;
4581 if (local_regparm > regparm)
4582 regparm = local_regparm;
4589 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4590 DFmode (2) arguments in SSE registers for a function with the
4591 indicated TYPE and DECL. DECL may be NULL when calling function
4592 indirectly or considering a libcall. Otherwise return 0. */
4595 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4597 gcc_assert (!TARGET_64BIT);
4599 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4600 by the sseregparm attribute. */
4601 if (TARGET_SSEREGPARM
4602 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4609 error ("Calling %qD with attribute sseregparm without "
4610 "SSE/SSE2 enabled", decl);
4612 error ("Calling %qT with attribute sseregparm without "
4613 "SSE/SSE2 enabled", type);
4621 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4622 (and DFmode for SSE2) arguments in SSE registers. */
4623 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4625 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4626 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4628 return TARGET_SSE2 ? 2 : 1;
4634 /* Return true if EAX is live at the start of the function. Used by
4635 ix86_expand_prologue to determine if we need special help before
4636 calling allocate_stack_worker. */
4639 ix86_eax_live_at_start_p (void)
4641 /* Cheat. Don't bother working forward from ix86_function_regparm
4642 to the function type to whether an actual argument is located in
4643 eax. Instead just look at cfg info, which is still close enough
4644 to correct at this point. This gives false positives for broken
4645 functions that might use uninitialized data that happens to be
4646 allocated in eax, but who cares? */
4647 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4650 /* Value is the number of bytes of arguments automatically
4651 popped when returning from a subroutine call.
4652 FUNDECL is the declaration node of the function (as a tree),
4653 FUNTYPE is the data type of the function (as a tree),
4654 or for a library call it is an identifier node for the subroutine name.
4655 SIZE is the number of bytes of arguments passed on the stack.
4657 On the 80386, the RTD insn may be used to pop them if the number
4658 of args is fixed, but if the number is variable then the caller
4659 must pop them all. RTD can't be used for library calls now
4660 because the library is compiled with the Unix compiler.
4661 Use of RTD is a selectable option, since it is incompatible with
4662 standard Unix calling sequences. If the option is not selected,
4663 the caller must always pop the args.
4665 The attribute stdcall is equivalent to RTD on a per module basis. */
4668 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4672 /* None of the 64-bit ABIs pop arguments. */
4676 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4678 /* Cdecl functions override -mrtd, and never pop the stack. */
4679 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4681 /* Stdcall and fastcall functions will pop the stack if not
4683 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4684 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4687 if (rtd && ! stdarg_p (funtype))
4691 /* Lose any fake structure return argument if it is passed on the stack. */
4692 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4693 && !KEEP_AGGREGATE_RETURN_POINTER)
4695 int nregs = ix86_function_regparm (funtype, fundecl);
4697 return GET_MODE_SIZE (Pmode);
4703 /* Argument support functions. */
4705 /* Return true when register may be used to pass function parameters. */
4707 ix86_function_arg_regno_p (int regno)
4710 const int *parm_regs;
4715 return (regno < REGPARM_MAX
4716 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4718 return (regno < REGPARM_MAX
4719 || (TARGET_MMX && MMX_REGNO_P (regno)
4720 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4721 || (TARGET_SSE && SSE_REGNO_P (regno)
4722 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4727 if (SSE_REGNO_P (regno) && TARGET_SSE)
4732 if (TARGET_SSE && SSE_REGNO_P (regno)
4733 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4737 /* TODO: The function should depend on current function ABI but
4738 builtins.c would need updating then. Therefore we use the
4741 /* RAX is used as hidden argument to va_arg functions. */
4742 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4745 if (ix86_abi == MS_ABI)
4746 parm_regs = x86_64_ms_abi_int_parameter_registers;
4748 parm_regs = x86_64_int_parameter_registers;
4749 for (i = 0; i < (ix86_abi == MS_ABI
4750 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
4751 if (regno == parm_regs[i])
4756 /* Return if we do not know how to pass TYPE solely in registers. */
4759 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4761 if (must_pass_in_stack_var_size_or_pad (mode, type))
4764 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4765 The layout_type routine is crafty and tries to trick us into passing
4766 currently unsupported vector types on the stack by using TImode. */
4767 return (!TARGET_64BIT && mode == TImode
4768 && type && TREE_CODE (type) != VECTOR_TYPE);
4771 /* It returns the size, in bytes, of the area reserved for arguments passed
4772 in registers for the function represented by fndecl dependent to the used
4775 ix86_reg_parm_stack_space (const_tree fndecl)
4777 enum calling_abi call_abi = SYSV_ABI;
4778 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4779 call_abi = ix86_function_abi (fndecl);
4781 call_abi = ix86_function_type_abi (fndecl);
4782 if (call_abi == MS_ABI)
4787 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4790 ix86_function_type_abi (const_tree fntype)
4792 if (TARGET_64BIT && fntype != NULL)
4794 enum calling_abi abi = ix86_abi;
4795 if (abi == SYSV_ABI)
4797 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4800 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4807 static enum calling_abi
4808 ix86_function_abi (const_tree fndecl)
4812 return ix86_function_type_abi (TREE_TYPE (fndecl));
4815 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4818 ix86_cfun_abi (void)
4820 if (! cfun || ! TARGET_64BIT)
4822 return cfun->machine->call_abi;
4826 extern void init_regs (void);
4828 /* Implementation of call abi switching target hook. Specific to FNDECL
4829 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4830 for more details. */
4832 ix86_call_abi_override (const_tree fndecl)
4834 if (fndecl == NULL_TREE)
4835 cfun->machine->call_abi = ix86_abi;
4837 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4840 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4841 re-initialization of init_regs each time we switch function context since
4842 this is needed only during RTL expansion. */
4844 ix86_maybe_switch_abi (void)
4847 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4851 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4852 for a call to a function whose data type is FNTYPE.
4853 For a library call, FNTYPE is 0. */
4856 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4857 tree fntype, /* tree ptr for function decl */
4858 rtx libname, /* SYMBOL_REF of library name or 0 */
4861 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4862 memset (cum, 0, sizeof (*cum));
4865 cum->call_abi = ix86_function_abi (fndecl);
4867 cum->call_abi = ix86_function_type_abi (fntype);
4868 /* Set up the number of registers to use for passing arguments. */
4870 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4871 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
4872 "or subtarget optimization implying it");
4873 cum->nregs = ix86_regparm;
4876 if (cum->call_abi != ix86_abi)
4877 cum->nregs = (ix86_abi != SYSV_ABI
4878 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4882 cum->sse_nregs = SSE_REGPARM_MAX;
4885 if (cum->call_abi != ix86_abi)
4886 cum->sse_nregs = (ix86_abi != SYSV_ABI
4887 ? X86_64_SSE_REGPARM_MAX
4888 : X86_64_MS_SSE_REGPARM_MAX);
4892 cum->mmx_nregs = MMX_REGPARM_MAX;
4893 cum->warn_avx = true;
4894 cum->warn_sse = true;
4895 cum->warn_mmx = true;
4897 /* Because type might mismatch in between caller and callee, we need to
4898 use actual type of function for local calls.
4899 FIXME: cgraph_analyze can be told to actually record if function uses
4900 va_start so for local functions maybe_vaarg can be made aggressive
4902 FIXME: once typesytem is fixed, we won't need this code anymore. */
4904 fntype = TREE_TYPE (fndecl);
4905 cum->maybe_vaarg = (fntype
4906 ? (!prototype_p (fntype) || stdarg_p (fntype))
4911 /* If there are variable arguments, then we won't pass anything
4912 in registers in 32-bit mode. */
4913 if (stdarg_p (fntype))
4924 /* Use ecx and edx registers if function has fastcall attribute,
4925 else look for regparm information. */
4928 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4934 cum->nregs = ix86_function_regparm (fntype, fndecl);
4937 /* Set up the number of SSE registers used for passing SFmode
4938 and DFmode arguments. Warn for mismatching ABI. */
4939 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4943 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4944 But in the case of vector types, it is some vector mode.
4946 When we have only some of our vector isa extensions enabled, then there
4947 are some modes for which vector_mode_supported_p is false. For these
4948 modes, the generic vector support in gcc will choose some non-vector mode
4949 in order to implement the type. By computing the natural mode, we'll
4950 select the proper ABI location for the operand and not depend on whatever
4951 the middle-end decides to do with these vector types.
4953 The midde-end can't deal with the vector types > 16 bytes. In this
4954 case, we return the original mode and warn ABI change if CUM isn't
4957 static enum machine_mode
4958 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4960 enum machine_mode mode = TYPE_MODE (type);
4962 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4964 HOST_WIDE_INT size = int_size_in_bytes (type);
4965 if ((size == 8 || size == 16 || size == 32)
4966 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4967 && TYPE_VECTOR_SUBPARTS (type) > 1)
4969 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4971 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4972 mode = MIN_MODE_VECTOR_FLOAT;
4974 mode = MIN_MODE_VECTOR_INT;
4976 /* Get the mode which has this inner mode and number of units. */
4977 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4978 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4979 && GET_MODE_INNER (mode) == innermode)
4981 if (size == 32 && !TARGET_AVX)
4983 static bool warnedavx;
4990 warning (0, "AVX vector argument without AVX "
4991 "enabled changes the ABI");
4993 return TYPE_MODE (type);
5006 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5007 this may not agree with the mode that the type system has chosen for the
5008 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5009 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5012 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5017 if (orig_mode != BLKmode)
5018 tmp = gen_rtx_REG (orig_mode, regno);
5021 tmp = gen_rtx_REG (mode, regno);
5022 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5023 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5029 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5030 of this code is to classify each 8bytes of incoming argument by the register
5031 class and assign registers accordingly. */
5033 /* Return the union class of CLASS1 and CLASS2.
5034 See the x86-64 PS ABI for details. */
5036 static enum x86_64_reg_class
5037 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5039 /* Rule #1: If both classes are equal, this is the resulting class. */
5040 if (class1 == class2)
5043 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5045 if (class1 == X86_64_NO_CLASS)
5047 if (class2 == X86_64_NO_CLASS)
5050 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5051 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5052 return X86_64_MEMORY_CLASS;
5054 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5055 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5056 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5057 return X86_64_INTEGERSI_CLASS;
5058 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5059 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5060 return X86_64_INTEGER_CLASS;
5062 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5064 if (class1 == X86_64_X87_CLASS
5065 || class1 == X86_64_X87UP_CLASS
5066 || class1 == X86_64_COMPLEX_X87_CLASS
5067 || class2 == X86_64_X87_CLASS
5068 || class2 == X86_64_X87UP_CLASS
5069 || class2 == X86_64_COMPLEX_X87_CLASS)
5070 return X86_64_MEMORY_CLASS;
5072 /* Rule #6: Otherwise class SSE is used. */
5073 return X86_64_SSE_CLASS;
5076 /* Classify the argument of type TYPE and mode MODE.
5077 CLASSES will be filled by the register class used to pass each word
5078 of the operand. The number of words is returned. In case the parameter
5079 should be passed in memory, 0 is returned. As a special case for zero
5080 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5082 BIT_OFFSET is used internally for handling records and specifies offset
5083 of the offset in bits modulo 256 to avoid overflow cases.
5085 See the x86-64 PS ABI for details.
5089 classify_argument (enum machine_mode mode, const_tree type,
5090 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5092 HOST_WIDE_INT bytes =
5093 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5094 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5096 /* Variable sized entities are always passed/returned in memory. */
5100 if (mode != VOIDmode
5101 && targetm.calls.must_pass_in_stack (mode, type))
5104 if (type && AGGREGATE_TYPE_P (type))
5108 enum x86_64_reg_class subclasses[MAX_CLASSES];
5110 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5114 for (i = 0; i < words; i++)
5115 classes[i] = X86_64_NO_CLASS;
5117 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5118 signalize memory class, so handle it as special case. */
5121 classes[0] = X86_64_NO_CLASS;
5125 /* Classify each field of record and merge classes. */
5126 switch (TREE_CODE (type))
5129 /* And now merge the fields of structure. */
5130 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5132 if (TREE_CODE (field) == FIELD_DECL)
5136 if (TREE_TYPE (field) == error_mark_node)
5139 /* Bitfields are always classified as integer. Handle them
5140 early, since later code would consider them to be
5141 misaligned integers. */
5142 if (DECL_BIT_FIELD (field))
5144 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5145 i < ((int_bit_position (field) + (bit_offset % 64))
5146 + tree_low_cst (DECL_SIZE (field), 0)
5149 merge_classes (X86_64_INTEGER_CLASS,
5156 type = TREE_TYPE (field);
5158 /* Flexible array member is ignored. */
5159 if (TYPE_MODE (type) == BLKmode
5160 && TREE_CODE (type) == ARRAY_TYPE
5161 && TYPE_SIZE (type) == NULL_TREE
5162 && TYPE_DOMAIN (type) != NULL_TREE
5163 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5168 if (!warned && warn_psabi)
5171 inform (input_location,
5172 "The ABI of passing struct with"
5173 " a flexible array member has"
5174 " changed in GCC 4.4");
5178 num = classify_argument (TYPE_MODE (type), type,
5180 (int_bit_position (field)
5181 + bit_offset) % 256);
5184 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5185 for (i = 0; i < num && (i + pos) < words; i++)
5187 merge_classes (subclasses[i], classes[i + pos]);
5194 /* Arrays are handled as small records. */
5197 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5198 TREE_TYPE (type), subclasses, bit_offset);
5202 /* The partial classes are now full classes. */
5203 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5204 subclasses[0] = X86_64_SSE_CLASS;
5205 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5206 && !((bit_offset % 64) == 0 && bytes == 4))
5207 subclasses[0] = X86_64_INTEGER_CLASS;
5209 for (i = 0; i < words; i++)
5210 classes[i] = subclasses[i % num];
5215 case QUAL_UNION_TYPE:
5216 /* Unions are similar to RECORD_TYPE but offset is always 0.
5218 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5220 if (TREE_CODE (field) == FIELD_DECL)
5224 if (TREE_TYPE (field) == error_mark_node)
5227 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5228 TREE_TYPE (field), subclasses,
5232 for (i = 0; i < num; i++)
5233 classes[i] = merge_classes (subclasses[i], classes[i]);
5244 /* When size > 16 bytes, if the first one isn't
5245 X86_64_SSE_CLASS or any other ones aren't
5246 X86_64_SSEUP_CLASS, everything should be passed in
5248 if (classes[0] != X86_64_SSE_CLASS)
5251 for (i = 1; i < words; i++)
5252 if (classes[i] != X86_64_SSEUP_CLASS)
5256 /* Final merger cleanup. */
5257 for (i = 0; i < words; i++)
5259 /* If one class is MEMORY, everything should be passed in
5261 if (classes[i] == X86_64_MEMORY_CLASS)
5264 /* The X86_64_SSEUP_CLASS should be always preceded by
5265 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5266 if (classes[i] == X86_64_SSEUP_CLASS
5267 && classes[i - 1] != X86_64_SSE_CLASS
5268 && classes[i - 1] != X86_64_SSEUP_CLASS)
5270 /* The first one should never be X86_64_SSEUP_CLASS. */
5271 gcc_assert (i != 0);
5272 classes[i] = X86_64_SSE_CLASS;
5275 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5276 everything should be passed in memory. */
5277 if (classes[i] == X86_64_X87UP_CLASS
5278 && (classes[i - 1] != X86_64_X87_CLASS))
5282 /* The first one should never be X86_64_X87UP_CLASS. */
5283 gcc_assert (i != 0);
5284 if (!warned && warn_psabi)
5287 inform (input_location,
5288 "The ABI of passing union with long double"
5289 " has changed in GCC 4.4");
5297 /* Compute alignment needed. We align all types to natural boundaries with
5298 exception of XFmode that is aligned to 64bits. */
5299 if (mode != VOIDmode && mode != BLKmode)
5301 int mode_alignment = GET_MODE_BITSIZE (mode);
5304 mode_alignment = 128;
5305 else if (mode == XCmode)
5306 mode_alignment = 256;
5307 if (COMPLEX_MODE_P (mode))
5308 mode_alignment /= 2;
5309 /* Misaligned fields are always returned in memory. */
5310 if (bit_offset % mode_alignment)
5314 /* for V1xx modes, just use the base mode */
5315 if (VECTOR_MODE_P (mode) && mode != V1DImode
5316 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5317 mode = GET_MODE_INNER (mode);
5319 /* Classification of atomic types. */
5324 classes[0] = X86_64_SSE_CLASS;
5327 classes[0] = X86_64_SSE_CLASS;
5328 classes[1] = X86_64_SSEUP_CLASS;
5338 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5342 classes[0] = X86_64_INTEGERSI_CLASS;
5345 else if (size <= 64)
5347 classes[0] = X86_64_INTEGER_CLASS;
5350 else if (size <= 64+32)
5352 classes[0] = X86_64_INTEGER_CLASS;
5353 classes[1] = X86_64_INTEGERSI_CLASS;
5356 else if (size <= 64+64)
5358 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5366 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5370 /* OImode shouldn't be used directly. */
5375 if (!(bit_offset % 64))
5376 classes[0] = X86_64_SSESF_CLASS;
5378 classes[0] = X86_64_SSE_CLASS;
5381 classes[0] = X86_64_SSEDF_CLASS;
5384 classes[0] = X86_64_X87_CLASS;
5385 classes[1] = X86_64_X87UP_CLASS;
5388 classes[0] = X86_64_SSE_CLASS;
5389 classes[1] = X86_64_SSEUP_CLASS;
5392 classes[0] = X86_64_SSE_CLASS;
5393 if (!(bit_offset % 64))
5399 if (!warned && warn_psabi)
5402 inform (input_location,
5403 "The ABI of passing structure with complex float"
5404 " member has changed in GCC 4.4");
5406 classes[1] = X86_64_SSESF_CLASS;
5410 classes[0] = X86_64_SSEDF_CLASS;
5411 classes[1] = X86_64_SSEDF_CLASS;
5414 classes[0] = X86_64_COMPLEX_X87_CLASS;
5417 /* This modes is larger than 16 bytes. */
5425 classes[0] = X86_64_SSE_CLASS;
5426 classes[1] = X86_64_SSEUP_CLASS;
5427 classes[2] = X86_64_SSEUP_CLASS;
5428 classes[3] = X86_64_SSEUP_CLASS;
5436 classes[0] = X86_64_SSE_CLASS;
5437 classes[1] = X86_64_SSEUP_CLASS;
5444 classes[0] = X86_64_SSE_CLASS;
5450 gcc_assert (VECTOR_MODE_P (mode));
5455 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5457 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5458 classes[0] = X86_64_INTEGERSI_CLASS;
5460 classes[0] = X86_64_INTEGER_CLASS;
5461 classes[1] = X86_64_INTEGER_CLASS;
5462 return 1 + (bytes > 8);
5466 /* Examine the argument and return set number of register required in each
5467 class. Return 0 iff parameter should be passed in memory. */
5469 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5470 int *int_nregs, int *sse_nregs)
5472 enum x86_64_reg_class regclass[MAX_CLASSES];
5473 int n = classify_argument (mode, type, regclass, 0);
5479 for (n--; n >= 0; n--)
5480 switch (regclass[n])
5482 case X86_64_INTEGER_CLASS:
5483 case X86_64_INTEGERSI_CLASS:
5486 case X86_64_SSE_CLASS:
5487 case X86_64_SSESF_CLASS:
5488 case X86_64_SSEDF_CLASS:
5491 case X86_64_NO_CLASS:
5492 case X86_64_SSEUP_CLASS:
5494 case X86_64_X87_CLASS:
5495 case X86_64_X87UP_CLASS:
5499 case X86_64_COMPLEX_X87_CLASS:
5500 return in_return ? 2 : 0;
5501 case X86_64_MEMORY_CLASS:
5507 /* Construct container for the argument used by GCC interface. See
5508 FUNCTION_ARG for the detailed description. */
5511 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5512 const_tree type, int in_return, int nintregs, int nsseregs,
5513 const int *intreg, int sse_regno)
5515 /* The following variables hold the static issued_error state. */
5516 static bool issued_sse_arg_error;
5517 static bool issued_sse_ret_error;
5518 static bool issued_x87_ret_error;
5520 enum machine_mode tmpmode;
5522 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5523 enum x86_64_reg_class regclass[MAX_CLASSES];
5527 int needed_sseregs, needed_intregs;
5528 rtx exp[MAX_CLASSES];
5531 n = classify_argument (mode, type, regclass, 0);
5534 if (!examine_argument (mode, type, in_return, &needed_intregs,
5537 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5540 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5541 some less clueful developer tries to use floating-point anyway. */
5542 if (needed_sseregs && !TARGET_SSE)
5546 if (!issued_sse_ret_error)
5548 error ("SSE register return with SSE disabled");
5549 issued_sse_ret_error = true;
5552 else if (!issued_sse_arg_error)
5554 error ("SSE register argument with SSE disabled");
5555 issued_sse_arg_error = true;
5560 /* Likewise, error if the ABI requires us to return values in the
5561 x87 registers and the user specified -mno-80387. */
5562 if (!TARGET_80387 && in_return)
5563 for (i = 0; i < n; i++)
5564 if (regclass[i] == X86_64_X87_CLASS
5565 || regclass[i] == X86_64_X87UP_CLASS
5566 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5568 if (!issued_x87_ret_error)
5570 error ("x87 register return with x87 disabled");
5571 issued_x87_ret_error = true;
5576 /* First construct simple cases. Avoid SCmode, since we want to use
5577 single register to pass this type. */
5578 if (n == 1 && mode != SCmode)
5579 switch (regclass[0])
5581 case X86_64_INTEGER_CLASS:
5582 case X86_64_INTEGERSI_CLASS:
5583 return gen_rtx_REG (mode, intreg[0]);
5584 case X86_64_SSE_CLASS:
5585 case X86_64_SSESF_CLASS:
5586 case X86_64_SSEDF_CLASS:
5587 if (mode != BLKmode)
5588 return gen_reg_or_parallel (mode, orig_mode,
5589 SSE_REGNO (sse_regno));
5591 case X86_64_X87_CLASS:
5592 case X86_64_COMPLEX_X87_CLASS:
5593 return gen_rtx_REG (mode, FIRST_STACK_REG);
5594 case X86_64_NO_CLASS:
5595 /* Zero sized array, struct or class. */
5600 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5601 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5602 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5604 && regclass[0] == X86_64_SSE_CLASS
5605 && regclass[1] == X86_64_SSEUP_CLASS
5606 && regclass[2] == X86_64_SSEUP_CLASS
5607 && regclass[3] == X86_64_SSEUP_CLASS
5609 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5612 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5613 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5614 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5615 && regclass[1] == X86_64_INTEGER_CLASS
5616 && (mode == CDImode || mode == TImode || mode == TFmode)
5617 && intreg[0] + 1 == intreg[1])
5618 return gen_rtx_REG (mode, intreg[0]);
5620 /* Otherwise figure out the entries of the PARALLEL. */
5621 for (i = 0; i < n; i++)
5625 switch (regclass[i])
5627 case X86_64_NO_CLASS:
5629 case X86_64_INTEGER_CLASS:
5630 case X86_64_INTEGERSI_CLASS:
5631 /* Merge TImodes on aligned occasions here too. */
5632 if (i * 8 + 8 > bytes)
5633 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5634 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5638 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5639 if (tmpmode == BLKmode)
5641 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5642 gen_rtx_REG (tmpmode, *intreg),
5646 case X86_64_SSESF_CLASS:
5647 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5648 gen_rtx_REG (SFmode,
5649 SSE_REGNO (sse_regno)),
5653 case X86_64_SSEDF_CLASS:
5654 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5655 gen_rtx_REG (DFmode,
5656 SSE_REGNO (sse_regno)),
5660 case X86_64_SSE_CLASS:
5668 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5678 && regclass[1] == X86_64_SSEUP_CLASS
5679 && regclass[2] == X86_64_SSEUP_CLASS
5680 && regclass[3] == X86_64_SSEUP_CLASS);
5687 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5688 gen_rtx_REG (tmpmode,
5689 SSE_REGNO (sse_regno)),
5698 /* Empty aligned struct, union or class. */
5702 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5703 for (i = 0; i < nexps; i++)
5704 XVECEXP (ret, 0, i) = exp [i];
5708 /* Update the data in CUM to advance over an argument of mode MODE
5709 and data type TYPE. (TYPE is null for libcalls where that information
5710 may not be available.) */
5713 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5714 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5730 cum->words += words;
5731 cum->nregs -= words;
5732 cum->regno += words;
5734 if (cum->nregs <= 0)
5742 /* OImode shouldn't be used directly. */
5746 if (cum->float_in_sse < 2)
5749 if (cum->float_in_sse < 1)
5766 if (!type || !AGGREGATE_TYPE_P (type))
5768 cum->sse_words += words;
5769 cum->sse_nregs -= 1;
5770 cum->sse_regno += 1;
5771 if (cum->sse_nregs <= 0)
5784 if (!type || !AGGREGATE_TYPE_P (type))
5786 cum->mmx_words += words;
5787 cum->mmx_nregs -= 1;
5788 cum->mmx_regno += 1;
5789 if (cum->mmx_nregs <= 0)
5800 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5801 tree type, HOST_WIDE_INT words, int named)
5803 int int_nregs, sse_nregs;
5805 /* Unnamed 256bit vector mode parameters are passed on stack. */
5806 if (!named && VALID_AVX256_REG_MODE (mode))
5809 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5810 cum->words += words;
5811 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5813 cum->nregs -= int_nregs;
5814 cum->sse_nregs -= sse_nregs;
5815 cum->regno += int_nregs;
5816 cum->sse_regno += sse_nregs;
5819 cum->words += words;
5823 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5824 HOST_WIDE_INT words)
5826 /* Otherwise, this should be passed indirect. */
5827 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5829 cum->words += words;
5838 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5839 tree type, int named)
5841 HOST_WIDE_INT bytes, words;
5843 if (mode == BLKmode)
5844 bytes = int_size_in_bytes (type);
5846 bytes = GET_MODE_SIZE (mode);
5847 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5850 mode = type_natural_mode (type, NULL);
5852 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5853 function_arg_advance_ms_64 (cum, bytes, words);
5854 else if (TARGET_64BIT)
5855 function_arg_advance_64 (cum, mode, type, words, named);
5857 function_arg_advance_32 (cum, mode, type, bytes, words);
5860 /* Define where to put the arguments to a function.
5861 Value is zero to push the argument on the stack,
5862 or a hard register in which to store the argument.
5864 MODE is the argument's machine mode.
5865 TYPE is the data type of the argument (as a tree).
5866 This is null for libcalls where that information may
5868 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5869 the preceding args and about the function being called.
5870 NAMED is nonzero if this argument is a named parameter
5871 (otherwise it is an extra parameter matching an ellipsis). */
5874 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5875 enum machine_mode orig_mode, tree type,
5876 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5878 static bool warnedsse, warnedmmx;
5880 /* Avoid the AL settings for the Unix64 ABI. */
5881 if (mode == VOIDmode)
5897 if (words <= cum->nregs)
5899 int regno = cum->regno;
5901 /* Fastcall allocates the first two DWORD (SImode) or
5902 smaller arguments to ECX and EDX if it isn't an
5908 || (type && AGGREGATE_TYPE_P (type)))
5911 /* ECX not EAX is the first allocated register. */
5912 if (regno == AX_REG)
5915 return gen_rtx_REG (mode, regno);
5920 if (cum->float_in_sse < 2)
5923 if (cum->float_in_sse < 1)
5927 /* In 32bit, we pass TImode in xmm registers. */
5934 if (!type || !AGGREGATE_TYPE_P (type))
5936 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5939 warning (0, "SSE vector argument without SSE enabled "
5943 return gen_reg_or_parallel (mode, orig_mode,
5944 cum->sse_regno + FIRST_SSE_REG);
5949 /* OImode shouldn't be used directly. */
5958 if (!type || !AGGREGATE_TYPE_P (type))
5961 return gen_reg_or_parallel (mode, orig_mode,
5962 cum->sse_regno + FIRST_SSE_REG);
5971 if (!type || !AGGREGATE_TYPE_P (type))
5973 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5976 warning (0, "MMX vector argument without MMX enabled "
5980 return gen_reg_or_parallel (mode, orig_mode,
5981 cum->mmx_regno + FIRST_MMX_REG);
5990 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5991 enum machine_mode orig_mode, tree type, int named)
5993 /* Handle a hidden AL argument containing number of registers
5994 for varargs x86-64 functions. */
5995 if (mode == VOIDmode)
5996 return GEN_INT (cum->maybe_vaarg
5997 ? (cum->sse_nregs < 0
5998 ? (cum->call_abi == ix86_abi
6000 : (ix86_abi != SYSV_ABI
6001 ? X86_64_SSE_REGPARM_MAX
6002 : X86_64_MS_SSE_REGPARM_MAX))
6017 /* Unnamed 256bit vector mode parameters are passed on stack. */
6023 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6025 &x86_64_int_parameter_registers [cum->regno],
6030 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6031 enum machine_mode orig_mode, int named,
6032 HOST_WIDE_INT bytes)
6036 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6037 We use value of -2 to specify that current function call is MSABI. */
6038 if (mode == VOIDmode)
6039 return GEN_INT (-2);
6041 /* If we've run out of registers, it goes on the stack. */
6042 if (cum->nregs == 0)
6045 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6047 /* Only floating point modes are passed in anything but integer regs. */
6048 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6051 regno = cum->regno + FIRST_SSE_REG;
6056 /* Unnamed floating parameters are passed in both the
6057 SSE and integer registers. */
6058 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6059 t2 = gen_rtx_REG (mode, regno);
6060 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6061 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6062 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6065 /* Handle aggregated types passed in register. */
6066 if (orig_mode == BLKmode)
6068 if (bytes > 0 && bytes <= 8)
6069 mode = (bytes > 4 ? DImode : SImode);
6070 if (mode == BLKmode)
6074 return gen_reg_or_parallel (mode, orig_mode, regno);
6078 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6079 tree type, int named)
6081 enum machine_mode mode = omode;
6082 HOST_WIDE_INT bytes, words;
6084 if (mode == BLKmode)
6085 bytes = int_size_in_bytes (type);
6087 bytes = GET_MODE_SIZE (mode);
6088 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6090 /* To simplify the code below, represent vector types with a vector mode
6091 even if MMX/SSE are not active. */
6092 if (type && TREE_CODE (type) == VECTOR_TYPE)
6093 mode = type_natural_mode (type, cum);
6095 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6096 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6097 else if (TARGET_64BIT)
6098 return function_arg_64 (cum, mode, omode, type, named);
6100 return function_arg_32 (cum, mode, omode, type, bytes, words);
6103 /* A C expression that indicates when an argument must be passed by
6104 reference. If nonzero for an argument, a copy of that argument is
6105 made in memory and a pointer to the argument is passed instead of
6106 the argument itself. The pointer is passed in whatever way is
6107 appropriate for passing a pointer to that type. */
6110 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6111 enum machine_mode mode ATTRIBUTE_UNUSED,
6112 const_tree type, bool named ATTRIBUTE_UNUSED)
6114 /* See Windows x64 Software Convention. */
6115 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6117 int msize = (int) GET_MODE_SIZE (mode);
6120 /* Arrays are passed by reference. */
6121 if (TREE_CODE (type) == ARRAY_TYPE)
6124 if (AGGREGATE_TYPE_P (type))
6126 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6127 are passed by reference. */
6128 msize = int_size_in_bytes (type);
6132 /* __m128 is passed by reference. */
6134 case 1: case 2: case 4: case 8:
6140 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6146 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6149 contains_aligned_value_p (tree type)
6151 enum machine_mode mode = TYPE_MODE (type);
6152 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6156 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6158 if (TYPE_ALIGN (type) < 128)
6161 if (AGGREGATE_TYPE_P (type))
6163 /* Walk the aggregates recursively. */
6164 switch (TREE_CODE (type))
6168 case QUAL_UNION_TYPE:
6172 /* Walk all the structure fields. */
6173 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6175 if (TREE_CODE (field) == FIELD_DECL
6176 && contains_aligned_value_p (TREE_TYPE (field)))
6183 /* Just for use if some languages passes arrays by value. */
6184 if (contains_aligned_value_p (TREE_TYPE (type)))
6195 /* Gives the alignment boundary, in bits, of an argument with the
6196 specified mode and type. */
6199 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6204 /* Since canonical type is used for call, we convert it to
6205 canonical type if needed. */
6206 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6207 type = TYPE_CANONICAL (type);
6208 align = TYPE_ALIGN (type);
6211 align = GET_MODE_ALIGNMENT (mode);
6212 if (align < PARM_BOUNDARY)
6213 align = PARM_BOUNDARY;
6214 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6215 natural boundaries. */
6216 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6218 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6219 make an exception for SSE modes since these require 128bit
6222 The handling here differs from field_alignment. ICC aligns MMX
6223 arguments to 4 byte boundaries, while structure fields are aligned
6224 to 8 byte boundaries. */
6227 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6228 align = PARM_BOUNDARY;
6232 if (!contains_aligned_value_p (type))
6233 align = PARM_BOUNDARY;
6236 if (align > BIGGEST_ALIGNMENT)
6237 align = BIGGEST_ALIGNMENT;
6241 /* Return true if N is a possible register number of function value. */
6244 ix86_function_value_regno_p (int regno)
6251 case FIRST_FLOAT_REG:
6252 /* TODO: The function should depend on current function ABI but
6253 builtins.c would need updating then. Therefore we use the
6255 if (TARGET_64BIT && ix86_abi == MS_ABI)
6257 return TARGET_FLOAT_RETURNS_IN_80387;
6263 if (TARGET_MACHO || TARGET_64BIT)
6271 /* Define how to find the value returned by a function.
6272 VALTYPE is the data type of the value (as a tree).
6273 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6274 otherwise, FUNC is 0. */
6277 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6278 const_tree fntype, const_tree fn)
6282 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6283 we normally prevent this case when mmx is not available. However
6284 some ABIs may require the result to be returned like DImode. */
6285 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6286 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6288 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6289 we prevent this case when sse is not available. However some ABIs
6290 may require the result to be returned like integer TImode. */
6291 else if (mode == TImode
6292 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6293 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6295 /* 32-byte vector modes in %ymm0. */
6296 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6297 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6299 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6300 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6301 regno = FIRST_FLOAT_REG;
6303 /* Most things go in %eax. */
6306 /* Override FP return register with %xmm0 for local functions when
6307 SSE math is enabled or for functions with sseregparm attribute. */
6308 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6310 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6311 if ((sse_level >= 1 && mode == SFmode)
6312 || (sse_level == 2 && mode == DFmode))
6313 regno = FIRST_SSE_REG;
6316 /* OImode shouldn't be used directly. */
6317 gcc_assert (mode != OImode);
6319 return gen_rtx_REG (orig_mode, regno);
6323 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6328 /* Handle libcalls, which don't provide a type node. */
6329 if (valtype == NULL)
6341 return gen_rtx_REG (mode, FIRST_SSE_REG);
6344 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6348 return gen_rtx_REG (mode, AX_REG);
6352 ret = construct_container (mode, orig_mode, valtype, 1,
6353 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6354 x86_64_int_return_registers, 0);
6356 /* For zero sized structures, construct_container returns NULL, but we
6357 need to keep rest of compiler happy by returning meaningful value. */
6359 ret = gen_rtx_REG (orig_mode, AX_REG);
6365 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6367 unsigned int regno = AX_REG;
6371 switch (GET_MODE_SIZE (mode))
6374 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6375 && !COMPLEX_MODE_P (mode))
6376 regno = FIRST_SSE_REG;
6380 if (mode == SFmode || mode == DFmode)
6381 regno = FIRST_SSE_REG;
6387 return gen_rtx_REG (orig_mode, regno);
6391 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6392 enum machine_mode orig_mode, enum machine_mode mode)
6394 const_tree fn, fntype;
6397 if (fntype_or_decl && DECL_P (fntype_or_decl))
6398 fn = fntype_or_decl;
6399 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6401 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6402 return function_value_ms_64 (orig_mode, mode);
6403 else if (TARGET_64BIT)
6404 return function_value_64 (orig_mode, mode, valtype);
6406 return function_value_32 (orig_mode, mode, fntype, fn);
6410 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6411 bool outgoing ATTRIBUTE_UNUSED)
6413 enum machine_mode mode, orig_mode;
6415 orig_mode = TYPE_MODE (valtype);
6416 mode = type_natural_mode (valtype, NULL);
6417 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6421 ix86_libcall_value (enum machine_mode mode)
6423 return ix86_function_value_1 (NULL, NULL, mode, mode);
6426 /* Return true iff type is returned in memory. */
6428 static int ATTRIBUTE_UNUSED
6429 return_in_memory_32 (const_tree type, enum machine_mode mode)
6433 if (mode == BLKmode)
6436 size = int_size_in_bytes (type);
6438 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6441 if (VECTOR_MODE_P (mode) || mode == TImode)
6443 /* User-created vectors small enough to fit in EAX. */
6447 /* MMX/3dNow values are returned in MM0,
6448 except when it doesn't exits. */
6450 return (TARGET_MMX ? 0 : 1);
6452 /* SSE values are returned in XMM0, except when it doesn't exist. */
6454 return (TARGET_SSE ? 0 : 1);
6456 /* AVX values are returned in YMM0, except when it doesn't exist. */
6458 return TARGET_AVX ? 0 : 1;
6467 /* OImode shouldn't be used directly. */
6468 gcc_assert (mode != OImode);
6473 static int ATTRIBUTE_UNUSED
6474 return_in_memory_64 (const_tree type, enum machine_mode mode)
6476 int needed_intregs, needed_sseregs;
6477 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6480 static int ATTRIBUTE_UNUSED
6481 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6483 HOST_WIDE_INT size = int_size_in_bytes (type);
6485 /* __m128 is returned in xmm0. */
6486 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6487 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6490 /* Otherwise, the size must be exactly in [1248]. */
6491 return (size != 1 && size != 2 && size != 4 && size != 8);
6495 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6497 #ifdef SUBTARGET_RETURN_IN_MEMORY
6498 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6500 const enum machine_mode mode = type_natural_mode (type, NULL);
6504 if (ix86_function_type_abi (fntype) == MS_ABI)
6505 return return_in_memory_ms_64 (type, mode);
6507 return return_in_memory_64 (type, mode);
6510 return return_in_memory_32 (type, mode);
6514 /* Return false iff TYPE is returned in memory. This version is used
6515 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6516 but differs notably in that when MMX is available, 8-byte vectors
6517 are returned in memory, rather than in MMX registers. */
6520 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6523 enum machine_mode mode = type_natural_mode (type, NULL);
6526 return return_in_memory_64 (type, mode);
6528 if (mode == BLKmode)
6531 size = int_size_in_bytes (type);
6533 if (VECTOR_MODE_P (mode))
6535 /* Return in memory only if MMX registers *are* available. This
6536 seems backwards, but it is consistent with the existing
6543 else if (mode == TImode)
6545 else if (mode == XFmode)
6551 /* When returning SSE vector types, we have a choice of either
6552 (1) being abi incompatible with a -march switch, or
6553 (2) generating an error.
6554 Given no good solution, I think the safest thing is one warning.
6555 The user won't be able to use -Werror, but....
6557 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6558 called in response to actually generating a caller or callee that
6559 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6560 via aggregate_value_p for general type probing from tree-ssa. */
6563 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6565 static bool warnedsse, warnedmmx;
6567 if (!TARGET_64BIT && type)
6569 /* Look at the return type of the function, not the function type. */
6570 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6572 if (!TARGET_SSE && !warnedsse)
6575 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6578 warning (0, "SSE vector return without SSE enabled "
6583 if (!TARGET_MMX && !warnedmmx)
6585 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6588 warning (0, "MMX vector return without MMX enabled "
6598 /* Create the va_list data type. */
6600 /* Returns the calling convention specific va_list date type.
6601 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6604 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6606 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6608 /* For i386 we use plain pointer to argument area. */
6609 if (!TARGET_64BIT || abi == MS_ABI)
6610 return build_pointer_type (char_type_node);
6612 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6613 type_decl = build_decl (BUILTINS_LOCATION,
6614 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6616 f_gpr = build_decl (BUILTINS_LOCATION,
6617 FIELD_DECL, get_identifier ("gp_offset"),
6618 unsigned_type_node);
6619 f_fpr = build_decl (BUILTINS_LOCATION,
6620 FIELD_DECL, get_identifier ("fp_offset"),
6621 unsigned_type_node);
6622 f_ovf = build_decl (BUILTINS_LOCATION,
6623 FIELD_DECL, get_identifier ("overflow_arg_area"),
6625 f_sav = build_decl (BUILTINS_LOCATION,
6626 FIELD_DECL, get_identifier ("reg_save_area"),
6629 va_list_gpr_counter_field = f_gpr;
6630 va_list_fpr_counter_field = f_fpr;
6632 DECL_FIELD_CONTEXT (f_gpr) = record;
6633 DECL_FIELD_CONTEXT (f_fpr) = record;
6634 DECL_FIELD_CONTEXT (f_ovf) = record;
6635 DECL_FIELD_CONTEXT (f_sav) = record;
6637 TREE_CHAIN (record) = type_decl;
6638 TYPE_NAME (record) = type_decl;
6639 TYPE_FIELDS (record) = f_gpr;
6640 TREE_CHAIN (f_gpr) = f_fpr;
6641 TREE_CHAIN (f_fpr) = f_ovf;
6642 TREE_CHAIN (f_ovf) = f_sav;
6644 layout_type (record);
6646 /* The correct type is an array type of one element. */
6647 return build_array_type (record, build_index_type (size_zero_node));
6650 /* Setup the builtin va_list data type and for 64-bit the additional
6651 calling convention specific va_list data types. */
6654 ix86_build_builtin_va_list (void)
6656 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6658 /* Initialize abi specific va_list builtin types. */
6662 if (ix86_abi == MS_ABI)
6664 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6665 if (TREE_CODE (t) != RECORD_TYPE)
6666 t = build_variant_type_copy (t);
6667 sysv_va_list_type_node = t;
6672 if (TREE_CODE (t) != RECORD_TYPE)
6673 t = build_variant_type_copy (t);
6674 sysv_va_list_type_node = t;
6676 if (ix86_abi != MS_ABI)
6678 t = ix86_build_builtin_va_list_abi (MS_ABI);
6679 if (TREE_CODE (t) != RECORD_TYPE)
6680 t = build_variant_type_copy (t);
6681 ms_va_list_type_node = t;
6686 if (TREE_CODE (t) != RECORD_TYPE)
6687 t = build_variant_type_copy (t);
6688 ms_va_list_type_node = t;
6695 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6698 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6707 int regparm = ix86_regparm;
6709 if (cum->call_abi != ix86_abi)
6710 regparm = (ix86_abi != SYSV_ABI
6711 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6713 /* GPR size of varargs save area. */
6714 if (cfun->va_list_gpr_size)
6715 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6717 ix86_varargs_gpr_size = 0;
6719 /* FPR size of varargs save area. We don't need it if we don't pass
6720 anything in SSE registers. */
6721 if (cum->sse_nregs && cfun->va_list_fpr_size)
6722 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6724 ix86_varargs_fpr_size = 0;
6726 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6729 save_area = frame_pointer_rtx;
6730 set = get_varargs_alias_set ();
6732 for (i = cum->regno;
6734 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6737 mem = gen_rtx_MEM (Pmode,
6738 plus_constant (save_area, i * UNITS_PER_WORD));
6739 MEM_NOTRAP_P (mem) = 1;
6740 set_mem_alias_set (mem, set);
6741 emit_move_insn (mem, gen_rtx_REG (Pmode,
6742 x86_64_int_parameter_registers[i]));
6745 if (ix86_varargs_fpr_size)
6747 /* Now emit code to save SSE registers. The AX parameter contains number
6748 of SSE parameter registers used to call this function. We use
6749 sse_prologue_save insn template that produces computed jump across
6750 SSE saves. We need some preparation work to get this working. */
6752 label = gen_label_rtx ();
6753 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6755 /* Compute address to jump to :
6756 label - eax*4 + nnamed_sse_arguments*4 Or
6757 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6758 tmp_reg = gen_reg_rtx (Pmode);
6759 nsse_reg = gen_reg_rtx (Pmode);
6760 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6761 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6762 gen_rtx_MULT (Pmode, nsse_reg,
6765 /* vmovaps is one byte longer than movaps. */
6767 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6768 gen_rtx_PLUS (Pmode, tmp_reg,
6774 gen_rtx_CONST (DImode,
6775 gen_rtx_PLUS (DImode,
6777 GEN_INT (cum->sse_regno
6778 * (TARGET_AVX ? 5 : 4)))));
6780 emit_move_insn (nsse_reg, label_ref);
6781 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6783 /* Compute address of memory block we save into. We always use pointer
6784 pointing 127 bytes after first byte to store - this is needed to keep
6785 instruction size limited by 4 bytes (5 bytes for AVX) with one
6786 byte displacement. */
6787 tmp_reg = gen_reg_rtx (Pmode);
6788 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6789 plus_constant (save_area,
6790 ix86_varargs_gpr_size + 127)));
6791 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6792 MEM_NOTRAP_P (mem) = 1;
6793 set_mem_alias_set (mem, set);
6794 set_mem_align (mem, BITS_PER_WORD);
6796 /* And finally do the dirty job! */
6797 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6798 GEN_INT (cum->sse_regno), label));
6803 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6805 alias_set_type set = get_varargs_alias_set ();
6808 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
6812 mem = gen_rtx_MEM (Pmode,
6813 plus_constant (virtual_incoming_args_rtx,
6814 i * UNITS_PER_WORD));
6815 MEM_NOTRAP_P (mem) = 1;
6816 set_mem_alias_set (mem, set);
6818 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6819 emit_move_insn (mem, reg);
6824 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6825 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6828 CUMULATIVE_ARGS next_cum;
6831 /* This argument doesn't appear to be used anymore. Which is good,
6832 because the old code here didn't suppress rtl generation. */
6833 gcc_assert (!no_rtl);
6838 fntype = TREE_TYPE (current_function_decl);
6840 /* For varargs, we do not want to skip the dummy va_dcl argument.
6841 For stdargs, we do want to skip the last named argument. */
6843 if (stdarg_p (fntype))
6844 function_arg_advance (&next_cum, mode, type, 1);
6846 if (cum->call_abi == MS_ABI)
6847 setup_incoming_varargs_ms_64 (&next_cum);
6849 setup_incoming_varargs_64 (&next_cum);
6852 /* Checks if TYPE is of kind va_list char *. */
6855 is_va_list_char_pointer (tree type)
6859 /* For 32-bit it is always true. */
6862 canonic = ix86_canonical_va_list_type (type);
6863 return (canonic == ms_va_list_type_node
6864 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6867 /* Implement va_start. */
6870 ix86_va_start (tree valist, rtx nextarg)
6872 HOST_WIDE_INT words, n_gpr, n_fpr;
6873 tree f_gpr, f_fpr, f_ovf, f_sav;
6874 tree gpr, fpr, ovf, sav, t;
6877 /* Only 64bit target needs something special. */
6878 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6880 std_expand_builtin_va_start (valist, nextarg);
6884 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6885 f_fpr = TREE_CHAIN (f_gpr);
6886 f_ovf = TREE_CHAIN (f_fpr);
6887 f_sav = TREE_CHAIN (f_ovf);
6889 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6890 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6891 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6892 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6893 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6895 /* Count number of gp and fp argument registers used. */
6896 words = crtl->args.info.words;
6897 n_gpr = crtl->args.info.regno;
6898 n_fpr = crtl->args.info.sse_regno;
6900 if (cfun->va_list_gpr_size)
6902 type = TREE_TYPE (gpr);
6903 t = build2 (MODIFY_EXPR, type,
6904 gpr, build_int_cst (type, n_gpr * 8));
6905 TREE_SIDE_EFFECTS (t) = 1;
6906 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6909 if (TARGET_SSE && cfun->va_list_fpr_size)
6911 type = TREE_TYPE (fpr);
6912 t = build2 (MODIFY_EXPR, type, fpr,
6913 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6914 TREE_SIDE_EFFECTS (t) = 1;
6915 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6918 /* Find the overflow area. */
6919 type = TREE_TYPE (ovf);
6920 t = make_tree (type, crtl->args.internal_arg_pointer);
6922 t = build2 (POINTER_PLUS_EXPR, type, t,
6923 size_int (words * UNITS_PER_WORD));
6924 t = build2 (MODIFY_EXPR, type, ovf, t);
6925 TREE_SIDE_EFFECTS (t) = 1;
6926 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6928 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6930 /* Find the register save area.
6931 Prologue of the function save it right above stack frame. */
6932 type = TREE_TYPE (sav);
6933 t = make_tree (type, frame_pointer_rtx);
6934 if (!ix86_varargs_gpr_size)
6935 t = build2 (POINTER_PLUS_EXPR, type, t,
6936 size_int (-8 * X86_64_REGPARM_MAX));
6937 t = build2 (MODIFY_EXPR, type, sav, t);
6938 TREE_SIDE_EFFECTS (t) = 1;
6939 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6943 /* Implement va_arg. */
6946 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6949 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6950 tree f_gpr, f_fpr, f_ovf, f_sav;
6951 tree gpr, fpr, ovf, sav, t;
6953 tree lab_false, lab_over = NULL_TREE;
6958 enum machine_mode nat_mode;
6961 /* Only 64bit target needs something special. */
6962 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6963 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6965 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6966 f_fpr = TREE_CHAIN (f_gpr);
6967 f_ovf = TREE_CHAIN (f_fpr);
6968 f_sav = TREE_CHAIN (f_ovf);
6970 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6971 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6972 valist = build_va_arg_indirect_ref (valist);
6973 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6974 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6975 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6977 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6979 type = build_pointer_type (type);
6980 size = int_size_in_bytes (type);
6981 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6983 nat_mode = type_natural_mode (type, NULL);
6992 /* Unnamed 256bit vector mode parameters are passed on stack. */
6993 if (ix86_cfun_abi () == SYSV_ABI)
7000 container = construct_container (nat_mode, TYPE_MODE (type),
7001 type, 0, X86_64_REGPARM_MAX,
7002 X86_64_SSE_REGPARM_MAX, intreg,
7007 /* Pull the value out of the saved registers. */
7009 addr = create_tmp_var (ptr_type_node, "addr");
7013 int needed_intregs, needed_sseregs;
7015 tree int_addr, sse_addr;
7017 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7018 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7020 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7022 need_temp = (!REG_P (container)
7023 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7024 || TYPE_ALIGN (type) > 128));
7026 /* In case we are passing structure, verify that it is consecutive block
7027 on the register save area. If not we need to do moves. */
7028 if (!need_temp && !REG_P (container))
7030 /* Verify that all registers are strictly consecutive */
7031 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7035 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7037 rtx slot = XVECEXP (container, 0, i);
7038 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7039 || INTVAL (XEXP (slot, 1)) != i * 16)
7047 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7049 rtx slot = XVECEXP (container, 0, i);
7050 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7051 || INTVAL (XEXP (slot, 1)) != i * 8)
7063 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7064 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7067 /* First ensure that we fit completely in registers. */
7070 t = build_int_cst (TREE_TYPE (gpr),
7071 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7072 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7073 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7074 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7075 gimplify_and_add (t, pre_p);
7079 t = build_int_cst (TREE_TYPE (fpr),
7080 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7081 + X86_64_REGPARM_MAX * 8);
7082 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7083 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7084 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7085 gimplify_and_add (t, pre_p);
7088 /* Compute index to start of area used for integer regs. */
7091 /* int_addr = gpr + sav; */
7092 t = fold_convert (sizetype, gpr);
7093 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7094 gimplify_assign (int_addr, t, pre_p);
7098 /* sse_addr = fpr + sav; */
7099 t = fold_convert (sizetype, fpr);
7100 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7101 gimplify_assign (sse_addr, t, pre_p);
7106 tree temp = create_tmp_var (type, "va_arg_tmp");
7109 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7110 gimplify_assign (addr, t, pre_p);
7112 for (i = 0; i < XVECLEN (container, 0); i++)
7114 rtx slot = XVECEXP (container, 0, i);
7115 rtx reg = XEXP (slot, 0);
7116 enum machine_mode mode = GET_MODE (reg);
7117 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7118 tree addr_type = build_pointer_type (piece_type);
7119 tree daddr_type = build_pointer_type_for_mode (piece_type,
7123 tree dest_addr, dest;
7125 if (SSE_REGNO_P (REGNO (reg)))
7127 src_addr = sse_addr;
7128 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7132 src_addr = int_addr;
7133 src_offset = REGNO (reg) * 8;
7135 src_addr = fold_convert (addr_type, src_addr);
7136 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7137 size_int (src_offset));
7138 src = build_va_arg_indirect_ref (src_addr);
7140 dest_addr = fold_convert (daddr_type, addr);
7141 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7142 size_int (INTVAL (XEXP (slot, 1))));
7143 dest = build_va_arg_indirect_ref (dest_addr);
7145 gimplify_assign (dest, src, pre_p);
7151 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7152 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7153 gimplify_assign (gpr, t, pre_p);
7158 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7159 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7160 gimplify_assign (fpr, t, pre_p);
7163 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7165 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7168 /* ... otherwise out of the overflow area. */
7170 /* When we align parameter on stack for caller, if the parameter
7171 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7172 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7173 here with caller. */
7174 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7175 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7176 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7178 /* Care for on-stack alignment if needed. */
7179 if (arg_boundary <= 64
7180 || integer_zerop (TYPE_SIZE (type)))
7184 HOST_WIDE_INT align = arg_boundary / 8;
7185 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7186 size_int (align - 1));
7187 t = fold_convert (sizetype, t);
7188 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7190 t = fold_convert (TREE_TYPE (ovf), t);
7192 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7193 gimplify_assign (addr, t, pre_p);
7195 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7196 size_int (rsize * UNITS_PER_WORD));
7197 gimplify_assign (unshare_expr (ovf), t, pre_p);
7200 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7202 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7203 addr = fold_convert (ptrtype, addr);
7206 addr = build_va_arg_indirect_ref (addr);
7207 return build_va_arg_indirect_ref (addr);
7210 /* Return nonzero if OPNUM's MEM should be matched
7211 in movabs* patterns. */
7214 ix86_check_movabs (rtx insn, int opnum)
7218 set = PATTERN (insn);
7219 if (GET_CODE (set) == PARALLEL)
7220 set = XVECEXP (set, 0, 0);
7221 gcc_assert (GET_CODE (set) == SET);
7222 mem = XEXP (set, opnum);
7223 while (GET_CODE (mem) == SUBREG)
7224 mem = SUBREG_REG (mem);
7225 gcc_assert (MEM_P (mem));
7226 return (volatile_ok || !MEM_VOLATILE_P (mem));
7229 /* Initialize the table of extra 80387 mathematical constants. */
7232 init_ext_80387_constants (void)
7234 static const char * cst[5] =
7236 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7237 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7238 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7239 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7240 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7244 for (i = 0; i < 5; i++)
7246 real_from_string (&ext_80387_constants_table[i], cst[i]);
7247 /* Ensure each constant is rounded to XFmode precision. */
7248 real_convert (&ext_80387_constants_table[i],
7249 XFmode, &ext_80387_constants_table[i]);
7252 ext_80387_constants_init = 1;
7255 /* Return true if the constant is something that can be loaded with
7256 a special instruction. */
7259 standard_80387_constant_p (rtx x)
7261 enum machine_mode mode = GET_MODE (x);
7265 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7268 if (x == CONST0_RTX (mode))
7270 if (x == CONST1_RTX (mode))
7273 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7275 /* For XFmode constants, try to find a special 80387 instruction when
7276 optimizing for size or on those CPUs that benefit from them. */
7278 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7282 if (! ext_80387_constants_init)
7283 init_ext_80387_constants ();
7285 for (i = 0; i < 5; i++)
7286 if (real_identical (&r, &ext_80387_constants_table[i]))
7290 /* Load of the constant -0.0 or -1.0 will be split as
7291 fldz;fchs or fld1;fchs sequence. */
7292 if (real_isnegzero (&r))
7294 if (real_identical (&r, &dconstm1))
7300 /* Return the opcode of the special instruction to be used to load
7304 standard_80387_constant_opcode (rtx x)
7306 switch (standard_80387_constant_p (x))
7330 /* Return the CONST_DOUBLE representing the 80387 constant that is
7331 loaded by the specified special instruction. The argument IDX
7332 matches the return value from standard_80387_constant_p. */
7335 standard_80387_constant_rtx (int idx)
7339 if (! ext_80387_constants_init)
7340 init_ext_80387_constants ();
7356 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7360 /* Return 1 if mode is a valid mode for sse. */
7362 standard_sse_mode_p (enum machine_mode mode)
7379 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7380 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7381 modes and AVX is enabled. */
7384 standard_sse_constant_p (rtx x)
7386 enum machine_mode mode = GET_MODE (x);
7388 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7390 if (vector_all_ones_operand (x, mode))
7392 if (standard_sse_mode_p (mode))
7393 return TARGET_SSE2 ? 2 : -2;
7394 else if (VALID_AVX256_REG_MODE (mode))
7395 return TARGET_AVX ? 3 : -3;
7401 /* Return the opcode of the special instruction to be used to load
7405 standard_sse_constant_opcode (rtx insn, rtx x)
7407 switch (standard_sse_constant_p (x))
7410 switch (get_attr_mode (insn))
7413 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7415 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7417 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7419 return "vxorps\t%x0, %x0, %x0";
7421 return "vxorpd\t%x0, %x0, %x0";
7423 return "vpxor\t%x0, %x0, %x0";
7429 switch (get_attr_mode (insn))
7434 return "vpcmpeqd\t%0, %0, %0";
7440 return "pcmpeqd\t%0, %0";
7445 /* Returns 1 if OP contains a symbol reference */
7448 symbolic_reference_mentioned_p (rtx op)
7453 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7456 fmt = GET_RTX_FORMAT (GET_CODE (op));
7457 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7463 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7464 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7468 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7475 /* Return 1 if it is appropriate to emit `ret' instructions in the
7476 body of a function. Do this only if the epilogue is simple, needing a
7477 couple of insns. Prior to reloading, we can't tell how many registers
7478 must be saved, so return 0 then. Return 0 if there is no frame
7479 marker to de-allocate. */
7482 ix86_can_use_return_insn_p (void)
7484 struct ix86_frame frame;
7486 if (! reload_completed || frame_pointer_needed)
7489 /* Don't allow more than 32 pop, since that's all we can do
7490 with one instruction. */
7491 if (crtl->args.pops_args
7492 && crtl->args.size >= 32768)
7495 ix86_compute_frame_layout (&frame);
7496 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7499 /* Value should be nonzero if functions must have frame pointers.
7500 Zero means the frame pointer need not be set up (and parms may
7501 be accessed via the stack pointer) in functions that seem suitable. */
7504 ix86_frame_pointer_required (void)
7506 /* If we accessed previous frames, then the generated code expects
7507 to be able to access the saved ebp value in our frame. */
7508 if (cfun->machine->accesses_prev_frame)
7511 /* Several x86 os'es need a frame pointer for other reasons,
7512 usually pertaining to setjmp. */
7513 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7516 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7517 the frame pointer by default. Turn it back on now if we've not
7518 got a leaf function. */
7519 if (TARGET_OMIT_LEAF_FRAME_POINTER
7520 && (!current_function_is_leaf
7521 || ix86_current_function_calls_tls_descriptor))
7530 /* Record that the current function accesses previous call frames. */
7533 ix86_setup_frame_addresses (void)
7535 cfun->machine->accesses_prev_frame = 1;
7538 #ifndef USE_HIDDEN_LINKONCE
7539 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7540 # define USE_HIDDEN_LINKONCE 1
7542 # define USE_HIDDEN_LINKONCE 0
7546 static int pic_labels_used;
7548 /* Fills in the label name that should be used for a pc thunk for
7549 the given register. */
7552 get_pc_thunk_name (char name[32], unsigned int regno)
7554 gcc_assert (!TARGET_64BIT);
7556 if (USE_HIDDEN_LINKONCE)
7557 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7559 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7563 /* This function generates code for -fpic that loads %ebx with
7564 the return address of the caller and then returns. */
7567 ix86_file_end (void)
7572 for (regno = 0; regno < 8; ++regno)
7576 if (! ((pic_labels_used >> regno) & 1))
7579 get_pc_thunk_name (name, regno);
7584 switch_to_section (darwin_sections[text_coal_section]);
7585 fputs ("\t.weak_definition\t", asm_out_file);
7586 assemble_name (asm_out_file, name);
7587 fputs ("\n\t.private_extern\t", asm_out_file);
7588 assemble_name (asm_out_file, name);
7589 fputs ("\n", asm_out_file);
7590 ASM_OUTPUT_LABEL (asm_out_file, name);
7594 if (USE_HIDDEN_LINKONCE)
7598 decl = build_decl (BUILTINS_LOCATION,
7599 FUNCTION_DECL, get_identifier (name),
7601 TREE_PUBLIC (decl) = 1;
7602 TREE_STATIC (decl) = 1;
7603 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7605 (*targetm.asm_out.unique_section) (decl, 0);
7606 switch_to_section (get_named_section (decl, NULL, 0));
7608 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7609 fputs ("\t.hidden\t", asm_out_file);
7610 assemble_name (asm_out_file, name);
7611 fputc ('\n', asm_out_file);
7612 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7616 switch_to_section (text_section);
7617 ASM_OUTPUT_LABEL (asm_out_file, name);
7620 xops[0] = gen_rtx_REG (Pmode, regno);
7621 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7622 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7623 output_asm_insn ("ret", xops);
7626 if (NEED_INDICATE_EXEC_STACK)
7627 file_end_indicate_exec_stack ();
7630 /* Emit code for the SET_GOT patterns. */
7633 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7639 if (TARGET_VXWORKS_RTP && flag_pic)
7641 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7642 xops[2] = gen_rtx_MEM (Pmode,
7643 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7644 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7646 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7647 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7648 an unadorned address. */
7649 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7650 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7651 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7655 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7657 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7659 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7662 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7664 output_asm_insn ("call\t%a2", xops);
7667 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7668 is what will be referenced by the Mach-O PIC subsystem. */
7670 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7673 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7674 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7677 output_asm_insn ("pop%z0\t%0", xops);
7682 get_pc_thunk_name (name, REGNO (dest));
7683 pic_labels_used |= 1 << REGNO (dest);
7685 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7686 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7687 output_asm_insn ("call\t%X2", xops);
7688 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7689 is what will be referenced by the Mach-O PIC subsystem. */
7692 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7694 targetm.asm_out.internal_label (asm_out_file, "L",
7695 CODE_LABEL_NUMBER (label));
7702 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7703 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7705 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7710 /* Generate an "push" pattern for input ARG. */
7715 if (ix86_cfa_state->reg == stack_pointer_rtx)
7716 ix86_cfa_state->offset += UNITS_PER_WORD;
7718 return gen_rtx_SET (VOIDmode,
7720 gen_rtx_PRE_DEC (Pmode,
7721 stack_pointer_rtx)),
7725 /* Return >= 0 if there is an unused call-clobbered register available
7726 for the entire function. */
7729 ix86_select_alt_pic_regnum (void)
7731 if (current_function_is_leaf && !crtl->profile
7732 && !ix86_current_function_calls_tls_descriptor)
7735 /* Can't use the same register for both PIC and DRAP. */
7737 drap = REGNO (crtl->drap_reg);
7740 for (i = 2; i >= 0; --i)
7741 if (i != drap && !df_regs_ever_live_p (i))
7745 return INVALID_REGNUM;
7748 /* Return 1 if we need to save REGNO. */
7750 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7752 if (pic_offset_table_rtx
7753 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7754 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7756 || crtl->calls_eh_return
7757 || crtl->uses_const_pool))
7759 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7764 if (crtl->calls_eh_return && maybe_eh_return)
7769 unsigned test = EH_RETURN_DATA_REGNO (i);
7770 if (test == INVALID_REGNUM)
7777 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
7780 return (df_regs_ever_live_p (regno)
7781 && !call_used_regs[regno]
7782 && !fixed_regs[regno]
7783 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7786 /* Return number of saved general prupose registers. */
7789 ix86_nsaved_regs (void)
7794 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7795 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7800 /* Return number of saved SSE registrers. */
7803 ix86_nsaved_sseregs (void)
7808 if (ix86_cfun_abi () != MS_ABI)
7810 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7811 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7816 /* Given FROM and TO register numbers, say whether this elimination is
7817 allowed. If stack alignment is needed, we can only replace argument
7818 pointer with hard frame pointer, or replace frame pointer with stack
7819 pointer. Otherwise, frame pointer elimination is automatically
7820 handled and all other eliminations are valid. */
7823 ix86_can_eliminate (int from, int to)
7825 if (stack_realign_fp)
7826 return ((from == ARG_POINTER_REGNUM
7827 && to == HARD_FRAME_POINTER_REGNUM)
7828 || (from == FRAME_POINTER_REGNUM
7829 && to == STACK_POINTER_REGNUM));
7831 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7834 /* Return the offset between two registers, one to be eliminated, and the other
7835 its replacement, at the start of a routine. */
7838 ix86_initial_elimination_offset (int from, int to)
7840 struct ix86_frame frame;
7841 ix86_compute_frame_layout (&frame);
7843 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7844 return frame.hard_frame_pointer_offset;
7845 else if (from == FRAME_POINTER_REGNUM
7846 && to == HARD_FRAME_POINTER_REGNUM)
7847 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7850 gcc_assert (to == STACK_POINTER_REGNUM);
7852 if (from == ARG_POINTER_REGNUM)
7853 return frame.stack_pointer_offset;
7855 gcc_assert (from == FRAME_POINTER_REGNUM);
7856 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7860 /* In a dynamically-aligned function, we can't know the offset from
7861 stack pointer to frame pointer, so we must ensure that setjmp
7862 eliminates fp against the hard fp (%ebp) rather than trying to
7863 index from %esp up to the top of the frame across a gap that is
7864 of unknown (at compile-time) size. */
7866 ix86_builtin_setjmp_frame_value (void)
7868 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7871 /* Fill structure ix86_frame about frame of currently computed function. */
7874 ix86_compute_frame_layout (struct ix86_frame *frame)
7876 HOST_WIDE_INT total_size;
7877 unsigned int stack_alignment_needed;
7878 HOST_WIDE_INT offset;
7879 unsigned int preferred_alignment;
7880 HOST_WIDE_INT size = get_frame_size ();
7882 frame->nregs = ix86_nsaved_regs ();
7883 frame->nsseregs = ix86_nsaved_sseregs ();
7886 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7887 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7889 /* MS ABI seem to require stack alignment to be always 16 except for function
7891 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7893 preferred_alignment = 16;
7894 stack_alignment_needed = 16;
7895 crtl->preferred_stack_boundary = 128;
7896 crtl->stack_alignment_needed = 128;
7899 gcc_assert (!size || stack_alignment_needed);
7900 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7901 gcc_assert (preferred_alignment <= stack_alignment_needed);
7903 /* During reload iteration the amount of registers saved can change.
7904 Recompute the value as needed. Do not recompute when amount of registers
7905 didn't change as reload does multiple calls to the function and does not
7906 expect the decision to change within single iteration. */
7907 if (!optimize_function_for_size_p (cfun)
7908 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7910 int count = frame->nregs;
7912 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7913 /* The fast prologue uses move instead of push to save registers. This
7914 is significantly longer, but also executes faster as modern hardware
7915 can execute the moves in parallel, but can't do that for push/pop.
7917 Be careful about choosing what prologue to emit: When function takes
7918 many instructions to execute we may use slow version as well as in
7919 case function is known to be outside hot spot (this is known with
7920 feedback only). Weight the size of function by number of registers
7921 to save as it is cheap to use one or two push instructions but very
7922 slow to use many of them. */
7924 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7925 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7926 || (flag_branch_probabilities
7927 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7928 cfun->machine->use_fast_prologue_epilogue = false;
7930 cfun->machine->use_fast_prologue_epilogue
7931 = !expensive_function_p (count);
7933 if (TARGET_PROLOGUE_USING_MOVE
7934 && cfun->machine->use_fast_prologue_epilogue)
7935 frame->save_regs_using_mov = true;
7937 frame->save_regs_using_mov = false;
7940 /* Skip return address and saved base pointer. */
7941 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7943 frame->hard_frame_pointer_offset = offset;
7945 /* Set offset to aligned because the realigned frame starts from
7947 if (stack_realign_fp)
7948 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7950 /* Register save area */
7951 offset += frame->nregs * UNITS_PER_WORD;
7953 /* Align SSE reg save area. */
7954 if (frame->nsseregs)
7955 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7957 frame->padding0 = 0;
7959 /* SSE register save area. */
7960 offset += frame->padding0 + frame->nsseregs * 16;
7963 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7964 offset += frame->va_arg_size;
7966 /* Align start of frame for local function. */
7967 frame->padding1 = ((offset + stack_alignment_needed - 1)
7968 & -stack_alignment_needed) - offset;
7970 offset += frame->padding1;
7972 /* Frame pointer points here. */
7973 frame->frame_pointer_offset = offset;
7977 /* Add outgoing arguments area. Can be skipped if we eliminated
7978 all the function calls as dead code.
7979 Skipping is however impossible when function calls alloca. Alloca
7980 expander assumes that last crtl->outgoing_args_size
7981 of stack frame are unused. */
7982 if (ACCUMULATE_OUTGOING_ARGS
7983 && (!current_function_is_leaf || cfun->calls_alloca
7984 || ix86_current_function_calls_tls_descriptor))
7986 offset += crtl->outgoing_args_size;
7987 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7990 frame->outgoing_arguments_size = 0;
7992 /* Align stack boundary. Only needed if we're calling another function
7994 if (!current_function_is_leaf || cfun->calls_alloca
7995 || ix86_current_function_calls_tls_descriptor)
7996 frame->padding2 = ((offset + preferred_alignment - 1)
7997 & -preferred_alignment) - offset;
7999 frame->padding2 = 0;
8001 offset += frame->padding2;
8003 /* We've reached end of stack frame. */
8004 frame->stack_pointer_offset = offset;
8006 /* Size prologue needs to allocate. */
8007 frame->to_allocate =
8008 (size + frame->padding1 + frame->padding2
8009 + frame->outgoing_arguments_size + frame->va_arg_size);
8011 if ((!frame->to_allocate && frame->nregs <= 1)
8012 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
8013 frame->save_regs_using_mov = false;
8015 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8016 && current_function_sp_is_unchanging
8017 && current_function_is_leaf
8018 && !ix86_current_function_calls_tls_descriptor)
8020 frame->red_zone_size = frame->to_allocate;
8021 if (frame->save_regs_using_mov)
8022 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8023 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8024 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8027 frame->red_zone_size = 0;
8028 frame->to_allocate -= frame->red_zone_size;
8029 frame->stack_pointer_offset -= frame->red_zone_size;
8031 fprintf (stderr, "\n");
8032 fprintf (stderr, "size: %ld\n", (long)size);
8033 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
8034 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
8035 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
8036 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
8037 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
8038 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
8039 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
8040 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
8041 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
8042 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
8043 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
8044 (long)frame->hard_frame_pointer_offset);
8045 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
8046 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
8047 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
8048 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
8052 /* Emit code to save registers in the prologue. */
8055 ix86_emit_save_regs (void)
8060 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8061 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8063 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8064 RTX_FRAME_RELATED_P (insn) = 1;
8068 /* Emit code to save registers using MOV insns. First register
8069 is restored from POINTER + OFFSET. */
8071 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8076 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8077 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8079 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8081 gen_rtx_REG (Pmode, regno));
8082 RTX_FRAME_RELATED_P (insn) = 1;
8083 offset += UNITS_PER_WORD;
8087 /* Emit code to save registers using MOV insns. First register
8088 is restored from POINTER + OFFSET. */
8090 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8096 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8097 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8099 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8100 set_mem_align (mem, 128);
8101 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8102 RTX_FRAME_RELATED_P (insn) = 1;
8107 static GTY(()) rtx queued_cfa_restores;
8109 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8110 manipulation insn. Don't add it if the previously
8111 saved value will be left untouched within stack red-zone till return,
8112 as unwinders can find the same value in the register and
8116 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8119 && !TARGET_64BIT_MS_ABI
8120 && red_offset + RED_ZONE_SIZE >= 0
8121 && crtl->args.pops_args < 65536)
8126 add_reg_note (insn, REG_CFA_RESTORE, reg);
8127 RTX_FRAME_RELATED_P (insn) = 1;
8131 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8134 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8137 ix86_add_queued_cfa_restore_notes (rtx insn)
8140 if (!queued_cfa_restores)
8142 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8144 XEXP (last, 1) = REG_NOTES (insn);
8145 REG_NOTES (insn) = queued_cfa_restores;
8146 queued_cfa_restores = NULL_RTX;
8147 RTX_FRAME_RELATED_P (insn) = 1;
8150 /* Expand prologue or epilogue stack adjustment.
8151 The pattern exist to put a dependency on all ebp-based memory accesses.
8152 STYLE should be negative if instructions should be marked as frame related,
8153 zero if %r11 register is live and cannot be freely used and positive
8157 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8158 int style, bool set_cfa)
8163 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8164 else if (x86_64_immediate_operand (offset, DImode))
8165 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8169 /* r11 is used by indirect sibcall return as well, set before the
8170 epilogue and used after the epilogue. ATM indirect sibcall
8171 shouldn't be used together with huge frame sizes in one
8172 function because of the frame_size check in sibcall.c. */
8174 r11 = gen_rtx_REG (DImode, R11_REG);
8175 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8177 RTX_FRAME_RELATED_P (insn) = 1;
8178 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8183 ix86_add_queued_cfa_restore_notes (insn);
8189 gcc_assert (ix86_cfa_state->reg == src);
8190 ix86_cfa_state->offset += INTVAL (offset);
8191 ix86_cfa_state->reg = dest;
8193 r = gen_rtx_PLUS (Pmode, src, offset);
8194 r = gen_rtx_SET (VOIDmode, dest, r);
8195 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8196 RTX_FRAME_RELATED_P (insn) = 1;
8199 RTX_FRAME_RELATED_P (insn) = 1;
8202 /* Find an available register to be used as dynamic realign argument
8203 pointer regsiter. Such a register will be written in prologue and
8204 used in begin of body, so it must not be
8205 1. parameter passing register.
8207 We reuse static-chain register if it is available. Otherwise, we
8208 use DI for i386 and R13 for x86-64. We chose R13 since it has
8211 Return: the regno of chosen register. */
8214 find_drap_reg (void)
8216 tree decl = cfun->decl;
8220 /* Use R13 for nested function or function need static chain.
8221 Since function with tail call may use any caller-saved
8222 registers in epilogue, DRAP must not use caller-saved
8223 register in such case. */
8224 if ((decl_function_context (decl)
8225 && !DECL_NO_STATIC_CHAIN (decl))
8226 || crtl->tail_call_emit)
8233 /* Use DI for nested function or function need static chain.
8234 Since function with tail call may use any caller-saved
8235 registers in epilogue, DRAP must not use caller-saved
8236 register in such case. */
8237 if ((decl_function_context (decl)
8238 && !DECL_NO_STATIC_CHAIN (decl))
8239 || crtl->tail_call_emit)
8242 /* Reuse static chain register if it isn't used for parameter
8244 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8245 && !lookup_attribute ("fastcall",
8246 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8253 /* Update incoming stack boundary and estimated stack alignment. */
8256 ix86_update_stack_boundary (void)
8258 /* Prefer the one specified at command line. */
8259 ix86_incoming_stack_boundary
8260 = (ix86_user_incoming_stack_boundary
8261 ? ix86_user_incoming_stack_boundary
8262 : ix86_default_incoming_stack_boundary);
8264 /* Incoming stack alignment can be changed on individual functions
8265 via force_align_arg_pointer attribute. We use the smallest
8266 incoming stack boundary. */
8267 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8268 && lookup_attribute (ix86_force_align_arg_pointer_string,
8269 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8270 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8272 /* The incoming stack frame has to be aligned at least at
8273 parm_stack_boundary. */
8274 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8275 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8277 /* Stack at entrance of main is aligned by runtime. We use the
8278 smallest incoming stack boundary. */
8279 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8280 && DECL_NAME (current_function_decl)
8281 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8282 && DECL_FILE_SCOPE_P (current_function_decl))
8283 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8285 /* x86_64 vararg needs 16byte stack alignment for register save
8289 && crtl->stack_alignment_estimated < 128)
8290 crtl->stack_alignment_estimated = 128;
8293 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8294 needed or an rtx for DRAP otherwise. */
8297 ix86_get_drap_rtx (void)
8299 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8300 crtl->need_drap = true;
8302 if (stack_realign_drap)
8304 /* Assign DRAP to vDRAP and returns vDRAP */
8305 unsigned int regno = find_drap_reg ();
8310 arg_ptr = gen_rtx_REG (Pmode, regno);
8311 crtl->drap_reg = arg_ptr;
8314 drap_vreg = copy_to_reg (arg_ptr);
8318 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8319 RTX_FRAME_RELATED_P (insn) = 1;
8326 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8329 ix86_internal_arg_pointer (void)
8331 return virtual_incoming_args_rtx;
8334 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8335 to be generated in correct form. */
8337 ix86_finalize_stack_realign_flags (void)
8339 /* Check if stack realign is really needed after reload, and
8340 stores result in cfun */
8341 unsigned int incoming_stack_boundary
8342 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8343 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8344 unsigned int stack_realign = (incoming_stack_boundary
8345 < (current_function_is_leaf
8346 ? crtl->max_used_stack_slot_alignment
8347 : crtl->stack_alignment_needed));
8349 if (crtl->stack_realign_finalized)
8351 /* After stack_realign_needed is finalized, we can't no longer
8353 gcc_assert (crtl->stack_realign_needed == stack_realign);
8357 crtl->stack_realign_needed = stack_realign;
8358 crtl->stack_realign_finalized = true;
8362 /* Expand the prologue into a bunch of separate insns. */
8365 ix86_expand_prologue (void)
8369 struct ix86_frame frame;
8370 HOST_WIDE_INT allocate;
8372 ix86_finalize_stack_realign_flags ();
8374 /* DRAP should not coexist with stack_realign_fp */
8375 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8377 /* Initialize CFA state for before the prologue. */
8378 ix86_cfa_state->reg = stack_pointer_rtx;
8379 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
8381 ix86_compute_frame_layout (&frame);
8383 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8384 of DRAP is needed and stack realignment is really needed after reload */
8385 if (crtl->drap_reg && crtl->stack_realign_needed)
8388 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8389 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8390 ? 0 : UNITS_PER_WORD);
8392 gcc_assert (stack_realign_drap);
8394 /* Grab the argument pointer. */
8395 x = plus_constant (stack_pointer_rtx,
8396 (UNITS_PER_WORD + param_ptr_offset));
8399 /* Only need to push parameter pointer reg if it is caller
8401 if (!call_used_regs[REGNO (crtl->drap_reg)])
8403 /* Push arg pointer reg */
8404 insn = emit_insn (gen_push (y));
8405 RTX_FRAME_RELATED_P (insn) = 1;
8408 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8409 RTX_FRAME_RELATED_P (insn) = 1;
8410 ix86_cfa_state->reg = crtl->drap_reg;
8412 /* Align the stack. */
8413 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8415 GEN_INT (-align_bytes)));
8416 RTX_FRAME_RELATED_P (insn) = 1;
8418 /* Replicate the return address on the stack so that return
8419 address can be reached via (argp - 1) slot. This is needed
8420 to implement macro RETURN_ADDR_RTX and intrinsic function
8421 expand_builtin_return_addr etc. */
8423 x = gen_frame_mem (Pmode,
8424 plus_constant (x, -UNITS_PER_WORD));
8425 insn = emit_insn (gen_push (x));
8426 RTX_FRAME_RELATED_P (insn) = 1;
8429 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8430 slower on all targets. Also sdb doesn't like it. */
8432 if (frame_pointer_needed)
8434 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8435 RTX_FRAME_RELATED_P (insn) = 1;
8437 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8438 RTX_FRAME_RELATED_P (insn) = 1;
8440 if (ix86_cfa_state->reg == stack_pointer_rtx)
8441 ix86_cfa_state->reg = hard_frame_pointer_rtx;
8444 if (stack_realign_fp)
8446 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8447 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8449 /* Align the stack. */
8450 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8452 GEN_INT (-align_bytes)));
8453 RTX_FRAME_RELATED_P (insn) = 1;
8456 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8458 if (!frame.save_regs_using_mov)
8459 ix86_emit_save_regs ();
8461 allocate += frame.nregs * UNITS_PER_WORD;
8463 /* When using red zone we may start register saving before allocating
8464 the stack frame saving one cycle of the prologue. However I will
8465 avoid doing this if I am going to have to probe the stack since
8466 at least on x86_64 the stack probe can turn into a call that clobbers
8467 a red zone location */
8468 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8469 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8470 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8471 && !crtl->stack_realign_needed)
8472 ? hard_frame_pointer_rtx
8473 : stack_pointer_rtx,
8474 -frame.nregs * UNITS_PER_WORD);
8478 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8479 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8480 GEN_INT (-allocate), -1,
8481 ix86_cfa_state->reg == stack_pointer_rtx);
8484 /* Only valid for Win32. */
8485 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8489 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8491 if (cfun->machine->call_abi == MS_ABI)
8494 eax_live = ix86_eax_live_at_start_p ();
8498 emit_insn (gen_push (eax));
8499 allocate -= UNITS_PER_WORD;
8502 emit_move_insn (eax, GEN_INT (allocate));
8505 insn = gen_allocate_stack_worker_64 (eax, eax);
8507 insn = gen_allocate_stack_worker_32 (eax, eax);
8508 insn = emit_insn (insn);
8510 if (ix86_cfa_state->reg == stack_pointer_rtx)
8512 ix86_cfa_state->offset += allocate;
8513 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8514 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8515 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8516 RTX_FRAME_RELATED_P (insn) = 1;
8521 if (frame_pointer_needed)
8522 t = plus_constant (hard_frame_pointer_rtx,
8525 - frame.nregs * UNITS_PER_WORD);
8527 t = plus_constant (stack_pointer_rtx, allocate);
8528 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8532 if (frame.save_regs_using_mov
8533 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8534 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8536 if (!frame_pointer_needed
8537 || !frame.to_allocate
8538 || crtl->stack_realign_needed)
8539 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8541 + frame.nsseregs * 16 + frame.padding0);
8543 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8544 -frame.nregs * UNITS_PER_WORD);
8546 if (!frame_pointer_needed
8547 || !frame.to_allocate
8548 || crtl->stack_realign_needed)
8549 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8552 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8553 - frame.nregs * UNITS_PER_WORD
8554 - frame.nsseregs * 16
8557 pic_reg_used = false;
8558 if (pic_offset_table_rtx
8559 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8562 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8564 if (alt_pic_reg_used != INVALID_REGNUM)
8565 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8567 pic_reg_used = true;
8574 if (ix86_cmodel == CM_LARGE_PIC)
8576 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8577 rtx label = gen_label_rtx ();
8579 LABEL_PRESERVE_P (label) = 1;
8580 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8581 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8582 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8583 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8584 pic_offset_table_rtx, tmp_reg));
8587 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8590 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8593 /* In the pic_reg_used case, make sure that the got load isn't deleted
8594 when mcount needs it. Blockage to avoid call movement across mcount
8595 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8597 if (crtl->profile && pic_reg_used)
8598 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8600 if (crtl->drap_reg && !crtl->stack_realign_needed)
8602 /* vDRAP is setup but after reload it turns out stack realign
8603 isn't necessary, here we will emit prologue to setup DRAP
8604 without stack realign adjustment */
8605 int drap_bp_offset = UNITS_PER_WORD * 2;
8606 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8607 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8610 /* Prevent instructions from being scheduled into register save push
8611 sequence when access to the redzone area is done through frame pointer.
8612 The offset betweeh the frame pointer and the stack pointer is calculated
8613 relative to the value of the stack pointer at the end of the function
8614 prologue, and moving instructions that access redzone area via frame
8615 pointer inside push sequence violates this assumption. */
8616 if (frame_pointer_needed && frame.red_zone_size)
8617 emit_insn (gen_memory_blockage ());
8619 /* Emit cld instruction if stringops are used in the function. */
8620 if (TARGET_CLD && ix86_current_function_needs_cld)
8621 emit_insn (gen_cld ());
8624 /* Emit code to restore REG using a POP insn. */
8627 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
8629 rtx insn = emit_insn (ix86_gen_pop1 (reg));
8631 if (ix86_cfa_state->reg == crtl->drap_reg
8632 && REGNO (reg) == REGNO (crtl->drap_reg))
8634 /* Previously we'd represented the CFA as an expression
8635 like *(%ebp - 8). We've just popped that value from
8636 the stack, which means we need to reset the CFA to
8637 the drap register. This will remain until we restore
8638 the stack pointer. */
8639 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8640 RTX_FRAME_RELATED_P (insn) = 1;
8644 if (ix86_cfa_state->reg == stack_pointer_rtx)
8646 ix86_cfa_state->offset -= UNITS_PER_WORD;
8647 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8648 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8649 RTX_FRAME_RELATED_P (insn) = 1;
8652 /* When the frame pointer is the CFA, and we pop it, we are
8653 swapping back to the stack pointer as the CFA. This happens
8654 for stack frames that don't allocate other data, so we assume
8655 the stack pointer is now pointing at the return address, i.e.
8656 the function entry state, which makes the offset be 1 word. */
8657 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
8658 && reg == hard_frame_pointer_rtx)
8660 ix86_cfa_state->reg = stack_pointer_rtx;
8661 ix86_cfa_state->offset = UNITS_PER_WORD;
8663 add_reg_note (insn, REG_CFA_DEF_CFA,
8664 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8665 GEN_INT (UNITS_PER_WORD)));
8666 RTX_FRAME_RELATED_P (insn) = 1;
8669 ix86_add_cfa_restore_note (insn, reg, red_offset);
8672 /* Emit code to restore saved registers using POP insns. */
8675 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
8679 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8680 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8682 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
8684 red_offset += UNITS_PER_WORD;
8688 /* Emit code and notes for the LEAVE instruction. */
8691 ix86_emit_leave (HOST_WIDE_INT red_offset)
8693 rtx insn = emit_insn (ix86_gen_leave ());
8695 ix86_add_queued_cfa_restore_notes (insn);
8697 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
8699 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8700 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
8701 RTX_FRAME_RELATED_P (insn) = 1;
8702 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
8706 /* Emit code to restore saved registers using MOV insns. First register
8707 is restored from POINTER + OFFSET. */
8709 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8710 HOST_WIDE_INT red_offset,
8711 int maybe_eh_return)
8714 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8717 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8718 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8720 rtx reg = gen_rtx_REG (Pmode, regno);
8722 /* Ensure that adjust_address won't be forced to produce pointer
8723 out of range allowed by x86-64 instruction set. */
8724 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8728 r11 = gen_rtx_REG (DImode, R11_REG);
8729 emit_move_insn (r11, GEN_INT (offset));
8730 emit_insn (gen_adddi3 (r11, r11, pointer));
8731 base_address = gen_rtx_MEM (Pmode, r11);
8734 insn = emit_move_insn (reg,
8735 adjust_address (base_address, Pmode, offset));
8736 offset += UNITS_PER_WORD;
8738 if (ix86_cfa_state->reg == crtl->drap_reg
8739 && regno == REGNO (crtl->drap_reg))
8741 /* Previously we'd represented the CFA as an expression
8742 like *(%ebp - 8). We've just popped that value from
8743 the stack, which means we need to reset the CFA to
8744 the drap register. This will remain until we restore
8745 the stack pointer. */
8746 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8747 RTX_FRAME_RELATED_P (insn) = 1;
8750 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8752 red_offset += UNITS_PER_WORD;
8756 /* Emit code to restore saved registers using MOV insns. First register
8757 is restored from POINTER + OFFSET. */
8759 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8760 HOST_WIDE_INT red_offset,
8761 int maybe_eh_return)
8764 rtx base_address = gen_rtx_MEM (TImode, pointer);
8767 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8768 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8770 rtx reg = gen_rtx_REG (TImode, regno);
8772 /* Ensure that adjust_address won't be forced to produce pointer
8773 out of range allowed by x86-64 instruction set. */
8774 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8778 r11 = gen_rtx_REG (DImode, R11_REG);
8779 emit_move_insn (r11, GEN_INT (offset));
8780 emit_insn (gen_adddi3 (r11, r11, pointer));
8781 base_address = gen_rtx_MEM (TImode, r11);
8784 mem = adjust_address (base_address, TImode, offset);
8785 set_mem_align (mem, 128);
8786 insn = emit_move_insn (reg, mem);
8789 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8795 /* Restore function stack, frame, and registers. */
8798 ix86_expand_epilogue (int style)
8801 struct ix86_frame frame;
8802 HOST_WIDE_INT offset, red_offset;
8803 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
8806 ix86_finalize_stack_realign_flags ();
8808 /* When stack is realigned, SP must be valid. */
8809 sp_valid = (!frame_pointer_needed
8810 || current_function_sp_is_unchanging
8811 || stack_realign_fp);
8813 ix86_compute_frame_layout (&frame);
8815 /* See the comment about red zone and frame
8816 pointer usage in ix86_expand_prologue. */
8817 if (frame_pointer_needed && frame.red_zone_size)
8818 emit_insn (gen_memory_blockage ());
8820 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8821 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
8823 /* Calculate start of saved registers relative to ebp. Special care
8824 must be taken for the normal return case of a function using
8825 eh_return: the eax and edx registers are marked as saved, but not
8826 restored along this path. */
8827 offset = frame.nregs;
8828 if (crtl->calls_eh_return && style != 2)
8830 offset *= -UNITS_PER_WORD;
8831 offset -= frame.nsseregs * 16 + frame.padding0;
8833 /* Calculate start of saved registers relative to esp on entry of the
8834 function. When realigning stack, this needs to be the most negative
8835 value possible at runtime. */
8836 red_offset = offset;
8838 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8840 else if (stack_realign_fp)
8841 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8843 if (frame_pointer_needed)
8844 red_offset -= UNITS_PER_WORD;
8846 /* If we're only restoring one register and sp is not valid then
8847 using a move instruction to restore the register since it's
8848 less work than reloading sp and popping the register.
8850 The default code result in stack adjustment using add/lea instruction,
8851 while this code results in LEAVE instruction (or discrete equivalent),
8852 so it is profitable in some other cases as well. Especially when there
8853 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8854 and there is exactly one register to pop. This heuristic may need some
8855 tuning in future. */
8856 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8857 || (TARGET_EPILOGUE_USING_MOVE
8858 && cfun->machine->use_fast_prologue_epilogue
8859 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8860 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8861 && frame.to_allocate)
8862 || (frame_pointer_needed && TARGET_USE_LEAVE
8863 && cfun->machine->use_fast_prologue_epilogue
8864 && (frame.nregs + frame.nsseregs) == 1)
8865 || crtl->calls_eh_return)
8867 /* Restore registers. We can use ebp or esp to address the memory
8868 locations. If both are available, default to ebp, since offsets
8869 are known to be small. Only exception is esp pointing directly
8870 to the end of block of saved registers, where we may simplify
8873 If we are realigning stack with bp and sp, regs restore can't
8874 be addressed by bp. sp must be used instead. */
8876 if (!frame_pointer_needed
8877 || (sp_valid && !frame.to_allocate)
8878 || stack_realign_fp)
8880 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8881 frame.to_allocate, red_offset,
8883 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8885 + frame.nsseregs * 16
8888 + frame.nsseregs * 16
8889 + frame.padding0, style == 2);
8893 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8896 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8898 + frame.nsseregs * 16
8901 + frame.nsseregs * 16
8902 + frame.padding0, style == 2);
8905 red_offset -= offset;
8907 /* eh_return epilogues need %ecx added to the stack pointer. */
8910 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8912 /* Stack align doesn't work with eh_return. */
8913 gcc_assert (!crtl->stack_realign_needed);
8915 if (frame_pointer_needed)
8917 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8918 tmp = plus_constant (tmp, UNITS_PER_WORD);
8919 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8921 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8922 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
8924 /* Note that we use SA as a temporary CFA, as the return
8925 address is at the proper place relative to it. We
8926 pretend this happens at the FP restore insn because
8927 prior to this insn the FP would be stored at the wrong
8928 offset relative to SA, and after this insn we have no
8929 other reasonable register to use for the CFA. We don't
8930 bother resetting the CFA to the SP for the duration of
8932 add_reg_note (tmp, REG_CFA_DEF_CFA,
8933 plus_constant (sa, UNITS_PER_WORD));
8934 ix86_add_queued_cfa_restore_notes (tmp);
8935 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8936 RTX_FRAME_RELATED_P (tmp) = 1;
8937 ix86_cfa_state->reg = sa;
8938 ix86_cfa_state->offset = UNITS_PER_WORD;
8940 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8941 const0_rtx, style, false);
8945 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8946 tmp = plus_constant (tmp, (frame.to_allocate
8947 + frame.nregs * UNITS_PER_WORD
8948 + frame.nsseregs * 16
8950 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8951 ix86_add_queued_cfa_restore_notes (tmp);
8953 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
8954 if (ix86_cfa_state->offset != UNITS_PER_WORD)
8956 ix86_cfa_state->offset = UNITS_PER_WORD;
8957 add_reg_note (tmp, REG_CFA_DEF_CFA,
8958 plus_constant (stack_pointer_rtx,
8960 RTX_FRAME_RELATED_P (tmp) = 1;
8964 else if (!frame_pointer_needed)
8965 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8966 GEN_INT (frame.to_allocate
8967 + frame.nregs * UNITS_PER_WORD
8968 + frame.nsseregs * 16
8970 style, !using_drap);
8971 /* If not an i386, mov & pop is faster than "leave". */
8972 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8973 || !cfun->machine->use_fast_prologue_epilogue)
8974 ix86_emit_leave (red_offset);
8977 pro_epilogue_adjust_stack (stack_pointer_rtx,
8978 hard_frame_pointer_rtx,
8979 const0_rtx, style, !using_drap);
8981 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
8986 /* First step is to deallocate the stack frame so that we can
8989 If we realign stack with frame pointer, then stack pointer
8990 won't be able to recover via lea $offset(%bp), %sp, because
8991 there is a padding area between bp and sp for realign.
8992 "add $to_allocate, %sp" must be used instead. */
8995 gcc_assert (frame_pointer_needed);
8996 gcc_assert (!stack_realign_fp);
8997 pro_epilogue_adjust_stack (stack_pointer_rtx,
8998 hard_frame_pointer_rtx,
8999 GEN_INT (offset), style, false);
9000 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9001 frame.to_allocate, red_offset,
9003 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9004 GEN_INT (frame.nsseregs * 16),
9007 else if (frame.to_allocate || frame.nsseregs)
9009 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9010 frame.to_allocate, red_offset,
9012 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9013 GEN_INT (frame.to_allocate
9014 + frame.nsseregs * 16
9015 + frame.padding0), style,
9016 !using_drap && !frame_pointer_needed);
9019 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
9021 red_offset -= offset;
9023 if (frame_pointer_needed)
9025 /* Leave results in shorter dependency chains on CPUs that are
9026 able to grok it fast. */
9027 if (TARGET_USE_LEAVE)
9028 ix86_emit_leave (red_offset);
9031 /* For stack realigned really happens, recover stack
9032 pointer to hard frame pointer is a must, if not using
9034 if (stack_realign_fp)
9035 pro_epilogue_adjust_stack (stack_pointer_rtx,
9036 hard_frame_pointer_rtx,
9037 const0_rtx, style, !using_drap);
9038 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
9046 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
9047 ? 0 : UNITS_PER_WORD);
9050 gcc_assert (stack_realign_drap);
9052 insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
9054 GEN_INT (-(UNITS_PER_WORD
9055 + param_ptr_offset))));
9057 ix86_cfa_state->reg = stack_pointer_rtx;
9058 ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
9060 add_reg_note (insn, REG_CFA_DEF_CFA,
9061 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
9062 GEN_INT (ix86_cfa_state->offset)));
9063 RTX_FRAME_RELATED_P (insn) = 1;
9065 if (param_ptr_offset)
9066 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
9069 /* Sibcall epilogues don't want a return instruction. */
9072 *ix86_cfa_state = cfa_state_save;
9076 if (crtl->args.pops_args && crtl->args.size)
9078 rtx popc = GEN_INT (crtl->args.pops_args);
9080 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9081 address, do explicit add, and jump indirectly to the caller. */
9083 if (crtl->args.pops_args >= 65536)
9085 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9088 /* There is no "pascal" calling convention in any 64bit ABI. */
9089 gcc_assert (!TARGET_64BIT);
9091 insn = emit_insn (gen_popsi1 (ecx));
9092 ix86_cfa_state->offset -= UNITS_PER_WORD;
9094 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9095 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9096 add_reg_note (insn, REG_CFA_REGISTER,
9097 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
9098 RTX_FRAME_RELATED_P (insn) = 1;
9100 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9102 emit_jump_insn (gen_return_indirect_internal (ecx));
9105 emit_jump_insn (gen_return_pop_internal (popc));
9108 emit_jump_insn (gen_return_internal ());
9110 /* Restore the state back to the state from the prologue,
9111 so that it's correct for the next epilogue. */
9112 *ix86_cfa_state = cfa_state_save;
9115 /* Reset from the function's potential modifications. */
9118 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9119 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
9121 if (pic_offset_table_rtx)
9122 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9124 /* Mach-O doesn't support labels at the end of objects, so if
9125 it looks like we might want one, insert a NOP. */
9127 rtx insn = get_last_insn ();
9130 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9131 insn = PREV_INSN (insn);
9135 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
9136 fputs ("\tnop\n", file);
9142 /* Extract the parts of an RTL expression that is a valid memory address
9143 for an instruction. Return 0 if the structure of the address is
9144 grossly off. Return -1 if the address contains ASHIFT, so it is not
9145 strictly valid, but still used for computing length of lea instruction. */
9148 ix86_decompose_address (rtx addr, struct ix86_address *out)
9150 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9151 rtx base_reg, index_reg;
9152 HOST_WIDE_INT scale = 1;
9153 rtx scale_rtx = NULL_RTX;
9155 enum ix86_address_seg seg = SEG_DEFAULT;
9157 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
9159 else if (GET_CODE (addr) == PLUS)
9169 addends[n++] = XEXP (op, 1);
9172 while (GET_CODE (op) == PLUS);
9177 for (i = n; i >= 0; --i)
9180 switch (GET_CODE (op))
9185 index = XEXP (op, 0);
9186 scale_rtx = XEXP (op, 1);
9190 if (XINT (op, 1) == UNSPEC_TP
9191 && TARGET_TLS_DIRECT_SEG_REFS
9192 && seg == SEG_DEFAULT)
9193 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
9222 else if (GET_CODE (addr) == MULT)
9224 index = XEXP (addr, 0); /* index*scale */
9225 scale_rtx = XEXP (addr, 1);
9227 else if (GET_CODE (addr) == ASHIFT)
9231 /* We're called for lea too, which implements ashift on occasion. */
9232 index = XEXP (addr, 0);
9233 tmp = XEXP (addr, 1);
9234 if (!CONST_INT_P (tmp))
9236 scale = INTVAL (tmp);
9237 if ((unsigned HOST_WIDE_INT) scale > 3)
9243 disp = addr; /* displacement */
9245 /* Extract the integral value of scale. */
9248 if (!CONST_INT_P (scale_rtx))
9250 scale = INTVAL (scale_rtx);
9253 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
9254 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
9256 /* Avoid useless 0 displacement. */
9257 if (disp == const0_rtx && (base || index))
9260 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9261 if (base_reg && index_reg && scale == 1
9262 && (index_reg == arg_pointer_rtx
9263 || index_reg == frame_pointer_rtx
9264 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
9267 tmp = base, base = index, index = tmp;
9268 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
9271 /* Special case: %ebp cannot be encoded as a base without a displacement.
9275 && (base_reg == hard_frame_pointer_rtx
9276 || base_reg == frame_pointer_rtx
9277 || base_reg == arg_pointer_rtx
9278 || (REG_P (base_reg)
9279 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
9280 || REGNO (base_reg) == R13_REG))))
9283 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9284 Avoid this by transforming to [%esi+0].
9285 Reload calls address legitimization without cfun defined, so we need
9286 to test cfun for being non-NULL. */
9287 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9288 && base_reg && !index_reg && !disp
9290 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
9293 /* Special case: encode reg+reg instead of reg*2. */
9294 if (!base && index && scale == 2)
9295 base = index, base_reg = index_reg, scale = 1;
9297 /* Special case: scaling cannot be encoded without base or displacement. */
9298 if (!base && !disp && index && scale != 1)
9310 /* Return cost of the memory address x.
9311 For i386, it is better to use a complex address than let gcc copy
9312 the address into a reg and make a new pseudo. But not if the address
9313 requires to two regs - that would mean more pseudos with longer
9316 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9318 struct ix86_address parts;
9320 int ok = ix86_decompose_address (x, &parts);
9324 if (parts.base && GET_CODE (parts.base) == SUBREG)
9325 parts.base = SUBREG_REG (parts.base);
9326 if (parts.index && GET_CODE (parts.index) == SUBREG)
9327 parts.index = SUBREG_REG (parts.index);
9329 /* Attempt to minimize number of registers in the address. */
9331 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9333 && (!REG_P (parts.index)
9334 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9338 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9340 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9341 && parts.base != parts.index)
9344 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9345 since it's predecode logic can't detect the length of instructions
9346 and it degenerates to vector decoded. Increase cost of such
9347 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9348 to split such addresses or even refuse such addresses at all.
9350 Following addressing modes are affected:
9355 The first and last case may be avoidable by explicitly coding the zero in
9356 memory address, but I don't have AMD-K6 machine handy to check this
9360 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9361 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9362 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9368 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9369 this is used for to form addresses to local data when -fPIC is in
9373 darwin_local_data_pic (rtx disp)
9375 return (GET_CODE (disp) == UNSPEC
9376 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9379 /* Determine if a given RTX is a valid constant. We already know this
9380 satisfies CONSTANT_P. */
9383 legitimate_constant_p (rtx x)
9385 switch (GET_CODE (x))
9390 if (GET_CODE (x) == PLUS)
9392 if (!CONST_INT_P (XEXP (x, 1)))
9397 if (TARGET_MACHO && darwin_local_data_pic (x))
9400 /* Only some unspecs are valid as "constants". */
9401 if (GET_CODE (x) == UNSPEC)
9402 switch (XINT (x, 1))
9407 return TARGET_64BIT;
9410 x = XVECEXP (x, 0, 0);
9411 return (GET_CODE (x) == SYMBOL_REF
9412 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9414 x = XVECEXP (x, 0, 0);
9415 return (GET_CODE (x) == SYMBOL_REF
9416 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9421 /* We must have drilled down to a symbol. */
9422 if (GET_CODE (x) == LABEL_REF)
9424 if (GET_CODE (x) != SYMBOL_REF)
9429 /* TLS symbols are never valid. */
9430 if (SYMBOL_REF_TLS_MODEL (x))
9433 /* DLLIMPORT symbols are never valid. */
9434 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9435 && SYMBOL_REF_DLLIMPORT_P (x))
9440 if (GET_MODE (x) == TImode
9441 && x != CONST0_RTX (TImode)
9447 if (!standard_sse_constant_p (x))
9454 /* Otherwise we handle everything else in the move patterns. */
9458 /* Determine if it's legal to put X into the constant pool. This
9459 is not possible for the address of thread-local symbols, which
9460 is checked above. */
9463 ix86_cannot_force_const_mem (rtx x)
9465 /* We can always put integral constants and vectors in memory. */
9466 switch (GET_CODE (x))
9476 return !legitimate_constant_p (x);
9480 /* Nonzero if the constant value X is a legitimate general operand
9481 when generating PIC code. It is given that flag_pic is on and
9482 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9485 legitimate_pic_operand_p (rtx x)
9489 switch (GET_CODE (x))
9492 inner = XEXP (x, 0);
9493 if (GET_CODE (inner) == PLUS
9494 && CONST_INT_P (XEXP (inner, 1)))
9495 inner = XEXP (inner, 0);
9497 /* Only some unspecs are valid as "constants". */
9498 if (GET_CODE (inner) == UNSPEC)
9499 switch (XINT (inner, 1))
9504 return TARGET_64BIT;
9506 x = XVECEXP (inner, 0, 0);
9507 return (GET_CODE (x) == SYMBOL_REF
9508 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9509 case UNSPEC_MACHOPIC_OFFSET:
9510 return legitimate_pic_address_disp_p (x);
9518 return legitimate_pic_address_disp_p (x);
9525 /* Determine if a given CONST RTX is a valid memory displacement
9529 legitimate_pic_address_disp_p (rtx disp)
9533 /* In 64bit mode we can allow direct addresses of symbols and labels
9534 when they are not dynamic symbols. */
9537 rtx op0 = disp, op1;
9539 switch (GET_CODE (disp))
9545 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9547 op0 = XEXP (XEXP (disp, 0), 0);
9548 op1 = XEXP (XEXP (disp, 0), 1);
9549 if (!CONST_INT_P (op1)
9550 || INTVAL (op1) >= 16*1024*1024
9551 || INTVAL (op1) < -16*1024*1024)
9553 if (GET_CODE (op0) == LABEL_REF)
9555 if (GET_CODE (op0) != SYMBOL_REF)
9560 /* TLS references should always be enclosed in UNSPEC. */
9561 if (SYMBOL_REF_TLS_MODEL (op0))
9563 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9564 && ix86_cmodel != CM_LARGE_PIC)
9572 if (GET_CODE (disp) != CONST)
9574 disp = XEXP (disp, 0);
9578 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9579 of GOT tables. We should not need these anyway. */
9580 if (GET_CODE (disp) != UNSPEC
9581 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9582 && XINT (disp, 1) != UNSPEC_GOTOFF
9583 && XINT (disp, 1) != UNSPEC_PLTOFF))
9586 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9587 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9593 if (GET_CODE (disp) == PLUS)
9595 if (!CONST_INT_P (XEXP (disp, 1)))
9597 disp = XEXP (disp, 0);
9601 if (TARGET_MACHO && darwin_local_data_pic (disp))
9604 if (GET_CODE (disp) != UNSPEC)
9607 switch (XINT (disp, 1))
9612 /* We need to check for both symbols and labels because VxWorks loads
9613 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9615 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9616 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9618 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9619 While ABI specify also 32bit relocation but we don't produce it in
9620 small PIC model at all. */
9621 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9622 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9624 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9626 case UNSPEC_GOTTPOFF:
9627 case UNSPEC_GOTNTPOFF:
9628 case UNSPEC_INDNTPOFF:
9631 disp = XVECEXP (disp, 0, 0);
9632 return (GET_CODE (disp) == SYMBOL_REF
9633 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9635 disp = XVECEXP (disp, 0, 0);
9636 return (GET_CODE (disp) == SYMBOL_REF
9637 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9639 disp = XVECEXP (disp, 0, 0);
9640 return (GET_CODE (disp) == SYMBOL_REF
9641 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9647 /* Recognizes RTL expressions that are valid memory addresses for an
9648 instruction. The MODE argument is the machine mode for the MEM
9649 expression that wants to use this address.
9651 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9652 convert common non-canonical forms to canonical form so that they will
9656 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9657 rtx addr, bool strict)
9659 struct ix86_address parts;
9660 rtx base, index, disp;
9661 HOST_WIDE_INT scale;
9662 const char *reason = NULL;
9663 rtx reason_rtx = NULL_RTX;
9665 if (ix86_decompose_address (addr, &parts) <= 0)
9667 reason = "decomposition failed";
9672 index = parts.index;
9674 scale = parts.scale;
9676 /* Validate base register.
9678 Don't allow SUBREG's that span more than a word here. It can lead to spill
9679 failures when the base is one word out of a two word structure, which is
9680 represented internally as a DImode int. */
9689 else if (GET_CODE (base) == SUBREG
9690 && REG_P (SUBREG_REG (base))
9691 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9693 reg = SUBREG_REG (base);
9696 reason = "base is not a register";
9700 if (GET_MODE (base) != Pmode)
9702 reason = "base is not in Pmode";
9706 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9707 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9709 reason = "base is not valid";
9714 /* Validate index register.
9716 Don't allow SUBREG's that span more than a word here -- same as above. */
9725 else if (GET_CODE (index) == SUBREG
9726 && REG_P (SUBREG_REG (index))
9727 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9729 reg = SUBREG_REG (index);
9732 reason = "index is not a register";
9736 if (GET_MODE (index) != Pmode)
9738 reason = "index is not in Pmode";
9742 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9743 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9745 reason = "index is not valid";
9750 /* Validate scale factor. */
9753 reason_rtx = GEN_INT (scale);
9756 reason = "scale without index";
9760 if (scale != 2 && scale != 4 && scale != 8)
9762 reason = "scale is not a valid multiplier";
9767 /* Validate displacement. */
9772 if (GET_CODE (disp) == CONST
9773 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9774 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9775 switch (XINT (XEXP (disp, 0), 1))
9777 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9778 used. While ABI specify also 32bit relocations, we don't produce
9779 them at all and use IP relative instead. */
9782 gcc_assert (flag_pic);
9784 goto is_legitimate_pic;
9785 reason = "64bit address unspec";
9788 case UNSPEC_GOTPCREL:
9789 gcc_assert (flag_pic);
9790 goto is_legitimate_pic;
9792 case UNSPEC_GOTTPOFF:
9793 case UNSPEC_GOTNTPOFF:
9794 case UNSPEC_INDNTPOFF:
9800 reason = "invalid address unspec";
9804 else if (SYMBOLIC_CONST (disp)
9808 && MACHOPIC_INDIRECT
9809 && !machopic_operand_p (disp)
9815 if (TARGET_64BIT && (index || base))
9817 /* foo@dtpoff(%rX) is ok. */
9818 if (GET_CODE (disp) != CONST
9819 || GET_CODE (XEXP (disp, 0)) != PLUS
9820 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9821 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9822 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9823 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9825 reason = "non-constant pic memory reference";
9829 else if (! legitimate_pic_address_disp_p (disp))
9831 reason = "displacement is an invalid pic construct";
9835 /* This code used to verify that a symbolic pic displacement
9836 includes the pic_offset_table_rtx register.
9838 While this is good idea, unfortunately these constructs may
9839 be created by "adds using lea" optimization for incorrect
9848 This code is nonsensical, but results in addressing
9849 GOT table with pic_offset_table_rtx base. We can't
9850 just refuse it easily, since it gets matched by
9851 "addsi3" pattern, that later gets split to lea in the
9852 case output register differs from input. While this
9853 can be handled by separate addsi pattern for this case
9854 that never results in lea, this seems to be easier and
9855 correct fix for crash to disable this test. */
9857 else if (GET_CODE (disp) != LABEL_REF
9858 && !CONST_INT_P (disp)
9859 && (GET_CODE (disp) != CONST
9860 || !legitimate_constant_p (disp))
9861 && (GET_CODE (disp) != SYMBOL_REF
9862 || !legitimate_constant_p (disp)))
9864 reason = "displacement is not constant";
9867 else if (TARGET_64BIT
9868 && !x86_64_immediate_operand (disp, VOIDmode))
9870 reason = "displacement is out of range";
9875 /* Everything looks valid. */
9882 /* Determine if a given RTX is a valid constant address. */
9885 constant_address_p (rtx x)
9887 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9890 /* Return a unique alias set for the GOT. */
9892 static alias_set_type
9893 ix86_GOT_alias_set (void)
9895 static alias_set_type set = -1;
9897 set = new_alias_set ();
9901 /* Return a legitimate reference for ORIG (an address) using the
9902 register REG. If REG is 0, a new pseudo is generated.
9904 There are two types of references that must be handled:
9906 1. Global data references must load the address from the GOT, via
9907 the PIC reg. An insn is emitted to do this load, and the reg is
9910 2. Static data references, constant pool addresses, and code labels
9911 compute the address as an offset from the GOT, whose base is in
9912 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9913 differentiate them from global data objects. The returned
9914 address is the PIC reg + an unspec constant.
9916 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9917 reg also appears in the address. */
9920 legitimize_pic_address (rtx orig, rtx reg)
9927 if (TARGET_MACHO && !TARGET_64BIT)
9930 reg = gen_reg_rtx (Pmode);
9931 /* Use the generic Mach-O PIC machinery. */
9932 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9936 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9938 else if (TARGET_64BIT
9939 && ix86_cmodel != CM_SMALL_PIC
9940 && gotoff_operand (addr, Pmode))
9943 /* This symbol may be referenced via a displacement from the PIC
9944 base address (@GOTOFF). */
9946 if (reload_in_progress)
9947 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9948 if (GET_CODE (addr) == CONST)
9949 addr = XEXP (addr, 0);
9950 if (GET_CODE (addr) == PLUS)
9952 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9954 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9957 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9958 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9960 tmpreg = gen_reg_rtx (Pmode);
9963 emit_move_insn (tmpreg, new_rtx);
9967 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9968 tmpreg, 1, OPTAB_DIRECT);
9971 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9973 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9975 /* This symbol may be referenced via a displacement from the PIC
9976 base address (@GOTOFF). */
9978 if (reload_in_progress)
9979 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9980 if (GET_CODE (addr) == CONST)
9981 addr = XEXP (addr, 0);
9982 if (GET_CODE (addr) == PLUS)
9984 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9986 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9989 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9990 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9991 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9995 emit_move_insn (reg, new_rtx);
9999 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10000 /* We can't use @GOTOFF for text labels on VxWorks;
10001 see gotoff_operand. */
10002 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10004 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10006 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
10007 return legitimize_dllimport_symbol (addr, true);
10008 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
10009 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
10010 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
10012 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
10013 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
10017 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10019 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
10020 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10021 new_rtx = gen_const_mem (Pmode, new_rtx);
10022 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10025 reg = gen_reg_rtx (Pmode);
10026 /* Use directly gen_movsi, otherwise the address is loaded
10027 into register for CSE. We don't want to CSE this addresses,
10028 instead we CSE addresses from the GOT table, so skip this. */
10029 emit_insn (gen_movsi (reg, new_rtx));
10034 /* This symbol must be referenced via a load from the
10035 Global Offset Table (@GOT). */
10037 if (reload_in_progress)
10038 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10039 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10040 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10042 new_rtx = force_reg (Pmode, new_rtx);
10043 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10044 new_rtx = gen_const_mem (Pmode, new_rtx);
10045 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10048 reg = gen_reg_rtx (Pmode);
10049 emit_move_insn (reg, new_rtx);
10055 if (CONST_INT_P (addr)
10056 && !x86_64_immediate_operand (addr, VOIDmode))
10060 emit_move_insn (reg, addr);
10064 new_rtx = force_reg (Pmode, addr);
10066 else if (GET_CODE (addr) == CONST)
10068 addr = XEXP (addr, 0);
10070 /* We must match stuff we generate before. Assume the only
10071 unspecs that can get here are ours. Not that we could do
10072 anything with them anyway.... */
10073 if (GET_CODE (addr) == UNSPEC
10074 || (GET_CODE (addr) == PLUS
10075 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10077 gcc_assert (GET_CODE (addr) == PLUS);
10079 if (GET_CODE (addr) == PLUS)
10081 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10083 /* Check first to see if this is a constant offset from a @GOTOFF
10084 symbol reference. */
10085 if (gotoff_operand (op0, Pmode)
10086 && CONST_INT_P (op1))
10090 if (reload_in_progress)
10091 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10092 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10094 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10095 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10096 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10100 emit_move_insn (reg, new_rtx);
10106 if (INTVAL (op1) < -16*1024*1024
10107 || INTVAL (op1) >= 16*1024*1024)
10109 if (!x86_64_immediate_operand (op1, Pmode))
10110 op1 = force_reg (Pmode, op1);
10111 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10117 base = legitimize_pic_address (XEXP (addr, 0), reg);
10118 new_rtx = legitimize_pic_address (XEXP (addr, 1),
10119 base == reg ? NULL_RTX : reg);
10121 if (CONST_INT_P (new_rtx))
10122 new_rtx = plus_constant (base, INTVAL (new_rtx));
10125 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
10127 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
10128 new_rtx = XEXP (new_rtx, 1);
10130 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
10138 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10141 get_thread_pointer (int to_reg)
10145 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10149 reg = gen_reg_rtx (Pmode);
10150 insn = gen_rtx_SET (VOIDmode, reg, tp);
10151 insn = emit_insn (insn);
10156 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10157 false if we expect this to be used for a memory address and true if
10158 we expect to load the address into a register. */
10161 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
10163 rtx dest, base, off, pic, tp;
10168 case TLS_MODEL_GLOBAL_DYNAMIC:
10169 dest = gen_reg_rtx (Pmode);
10170 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10172 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10174 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
10177 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
10178 insns = get_insns ();
10181 RTL_CONST_CALL_P (insns) = 1;
10182 emit_libcall_block (insns, dest, rax, x);
10184 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10185 emit_insn (gen_tls_global_dynamic_64 (dest, x));
10187 emit_insn (gen_tls_global_dynamic_32 (dest, x));
10189 if (TARGET_GNU2_TLS)
10191 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10193 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10197 case TLS_MODEL_LOCAL_DYNAMIC:
10198 base = gen_reg_rtx (Pmode);
10199 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10201 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10203 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
10206 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
10207 insns = get_insns ();
10210 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
10211 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
10212 RTL_CONST_CALL_P (insns) = 1;
10213 emit_libcall_block (insns, base, rax, note);
10215 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10216 emit_insn (gen_tls_local_dynamic_base_64 (base));
10218 emit_insn (gen_tls_local_dynamic_base_32 (base));
10220 if (TARGET_GNU2_TLS)
10222 rtx x = ix86_tls_module_base ();
10224 set_unique_reg_note (get_last_insn (), REG_EQUIV,
10225 gen_rtx_MINUS (Pmode, x, tp));
10228 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10229 off = gen_rtx_CONST (Pmode, off);
10231 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10233 if (TARGET_GNU2_TLS)
10235 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10237 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10242 case TLS_MODEL_INITIAL_EXEC:
10246 type = UNSPEC_GOTNTPOFF;
10250 if (reload_in_progress)
10251 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10252 pic = pic_offset_table_rtx;
10253 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10255 else if (!TARGET_ANY_GNU_TLS)
10257 pic = gen_reg_rtx (Pmode);
10258 emit_insn (gen_set_got (pic));
10259 type = UNSPEC_GOTTPOFF;
10264 type = UNSPEC_INDNTPOFF;
10267 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
10268 off = gen_rtx_CONST (Pmode, off);
10270 off = gen_rtx_PLUS (Pmode, pic, off);
10271 off = gen_const_mem (Pmode, off);
10272 set_mem_alias_set (off, ix86_GOT_alias_set ());
10274 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10276 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10277 off = force_reg (Pmode, off);
10278 return gen_rtx_PLUS (Pmode, base, off);
10282 base = get_thread_pointer (true);
10283 dest = gen_reg_rtx (Pmode);
10284 emit_insn (gen_subsi3 (dest, base, off));
10288 case TLS_MODEL_LOCAL_EXEC:
10289 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10290 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10291 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10292 off = gen_rtx_CONST (Pmode, off);
10294 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10296 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10297 return gen_rtx_PLUS (Pmode, base, off);
10301 base = get_thread_pointer (true);
10302 dest = gen_reg_rtx (Pmode);
10303 emit_insn (gen_subsi3 (dest, base, off));
10308 gcc_unreachable ();
10314 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10317 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10318 htab_t dllimport_map;
10321 get_dllimport_decl (tree decl)
10323 struct tree_map *h, in;
10326 const char *prefix;
10327 size_t namelen, prefixlen;
10332 if (!dllimport_map)
10333 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10335 in.hash = htab_hash_pointer (decl);
10336 in.base.from = decl;
10337 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10338 h = (struct tree_map *) *loc;
10342 *loc = h = GGC_NEW (struct tree_map);
10344 h->base.from = decl;
10345 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
10346 VAR_DECL, NULL, ptr_type_node);
10347 DECL_ARTIFICIAL (to) = 1;
10348 DECL_IGNORED_P (to) = 1;
10349 DECL_EXTERNAL (to) = 1;
10350 TREE_READONLY (to) = 1;
10352 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10353 name = targetm.strip_name_encoding (name);
10354 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10355 ? "*__imp_" : "*__imp__";
10356 namelen = strlen (name);
10357 prefixlen = strlen (prefix);
10358 imp_name = (char *) alloca (namelen + prefixlen + 1);
10359 memcpy (imp_name, prefix, prefixlen);
10360 memcpy (imp_name + prefixlen, name, namelen + 1);
10362 name = ggc_alloc_string (imp_name, namelen + prefixlen);
10363 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10364 SET_SYMBOL_REF_DECL (rtl, to);
10365 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10367 rtl = gen_const_mem (Pmode, rtl);
10368 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10370 SET_DECL_RTL (to, rtl);
10371 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10376 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10377 true if we require the result be a register. */
10380 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10385 gcc_assert (SYMBOL_REF_DECL (symbol));
10386 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10388 x = DECL_RTL (imp_decl);
10390 x = force_reg (Pmode, x);
10394 /* Try machine-dependent ways of modifying an illegitimate address
10395 to be legitimate. If we find one, return the new, valid address.
10396 This macro is used in only one place: `memory_address' in explow.c.
10398 OLDX is the address as it was before break_out_memory_refs was called.
10399 In some cases it is useful to look at this to decide what needs to be done.
10401 It is always safe for this macro to do nothing. It exists to recognize
10402 opportunities to optimize the output.
10404 For the 80386, we handle X+REG by loading X into a register R and
10405 using R+REG. R will go in a general reg and indexing will be used.
10406 However, if REG is a broken-out memory address or multiplication,
10407 nothing needs to be done because REG can certainly go in a general reg.
10409 When -fpic is used, special handling is needed for symbolic references.
10410 See comments by legitimize_pic_address in i386.c for details. */
10413 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10414 enum machine_mode mode)
10419 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10421 return legitimize_tls_address (x, (enum tls_model) log, false);
10422 if (GET_CODE (x) == CONST
10423 && GET_CODE (XEXP (x, 0)) == PLUS
10424 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10425 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10427 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10428 (enum tls_model) log, false);
10429 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10432 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10434 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10435 return legitimize_dllimport_symbol (x, true);
10436 if (GET_CODE (x) == CONST
10437 && GET_CODE (XEXP (x, 0)) == PLUS
10438 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10439 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10441 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10442 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10446 if (flag_pic && SYMBOLIC_CONST (x))
10447 return legitimize_pic_address (x, 0);
10449 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10450 if (GET_CODE (x) == ASHIFT
10451 && CONST_INT_P (XEXP (x, 1))
10452 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10455 log = INTVAL (XEXP (x, 1));
10456 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10457 GEN_INT (1 << log));
10460 if (GET_CODE (x) == PLUS)
10462 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10464 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10465 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10466 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10469 log = INTVAL (XEXP (XEXP (x, 0), 1));
10470 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10471 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10472 GEN_INT (1 << log));
10475 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10476 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10477 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10480 log = INTVAL (XEXP (XEXP (x, 1), 1));
10481 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10482 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10483 GEN_INT (1 << log));
10486 /* Put multiply first if it isn't already. */
10487 if (GET_CODE (XEXP (x, 1)) == MULT)
10489 rtx tmp = XEXP (x, 0);
10490 XEXP (x, 0) = XEXP (x, 1);
10495 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10496 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10497 created by virtual register instantiation, register elimination, and
10498 similar optimizations. */
10499 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10502 x = gen_rtx_PLUS (Pmode,
10503 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10504 XEXP (XEXP (x, 1), 0)),
10505 XEXP (XEXP (x, 1), 1));
10509 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10510 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10511 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10512 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10513 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10514 && CONSTANT_P (XEXP (x, 1)))
10517 rtx other = NULL_RTX;
10519 if (CONST_INT_P (XEXP (x, 1)))
10521 constant = XEXP (x, 1);
10522 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10524 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10526 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10527 other = XEXP (x, 1);
10535 x = gen_rtx_PLUS (Pmode,
10536 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10537 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10538 plus_constant (other, INTVAL (constant)));
10542 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10545 if (GET_CODE (XEXP (x, 0)) == MULT)
10548 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10551 if (GET_CODE (XEXP (x, 1)) == MULT)
10554 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10558 && REG_P (XEXP (x, 1))
10559 && REG_P (XEXP (x, 0)))
10562 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10565 x = legitimize_pic_address (x, 0);
10568 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10571 if (REG_P (XEXP (x, 0)))
10573 rtx temp = gen_reg_rtx (Pmode);
10574 rtx val = force_operand (XEXP (x, 1), temp);
10576 emit_move_insn (temp, val);
10578 XEXP (x, 1) = temp;
10582 else if (REG_P (XEXP (x, 1)))
10584 rtx temp = gen_reg_rtx (Pmode);
10585 rtx val = force_operand (XEXP (x, 0), temp);
10587 emit_move_insn (temp, val);
10589 XEXP (x, 0) = temp;
10597 /* Print an integer constant expression in assembler syntax. Addition
10598 and subtraction are the only arithmetic that may appear in these
10599 expressions. FILE is the stdio stream to write to, X is the rtx, and
10600 CODE is the operand print code from the output string. */
10603 output_pic_addr_const (FILE *file, rtx x, int code)
10607 switch (GET_CODE (x))
10610 gcc_assert (flag_pic);
10615 if (! TARGET_MACHO || TARGET_64BIT)
10616 output_addr_const (file, x);
10619 const char *name = XSTR (x, 0);
10621 /* Mark the decl as referenced so that cgraph will
10622 output the function. */
10623 if (SYMBOL_REF_DECL (x))
10624 mark_decl_referenced (SYMBOL_REF_DECL (x));
10627 if (MACHOPIC_INDIRECT
10628 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10629 name = machopic_indirection_name (x, /*stub_p=*/true);
10631 assemble_name (file, name);
10633 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10634 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10635 fputs ("@PLT", file);
10642 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10643 assemble_name (asm_out_file, buf);
10647 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10651 /* This used to output parentheses around the expression,
10652 but that does not work on the 386 (either ATT or BSD assembler). */
10653 output_pic_addr_const (file, XEXP (x, 0), code);
10657 if (GET_MODE (x) == VOIDmode)
10659 /* We can use %d if the number is <32 bits and positive. */
10660 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10661 fprintf (file, "0x%lx%08lx",
10662 (unsigned long) CONST_DOUBLE_HIGH (x),
10663 (unsigned long) CONST_DOUBLE_LOW (x));
10665 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10668 /* We can't handle floating point constants;
10669 PRINT_OPERAND must handle them. */
10670 output_operand_lossage ("floating constant misused");
10674 /* Some assemblers need integer constants to appear first. */
10675 if (CONST_INT_P (XEXP (x, 0)))
10677 output_pic_addr_const (file, XEXP (x, 0), code);
10679 output_pic_addr_const (file, XEXP (x, 1), code);
10683 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10684 output_pic_addr_const (file, XEXP (x, 1), code);
10686 output_pic_addr_const (file, XEXP (x, 0), code);
10692 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10693 output_pic_addr_const (file, XEXP (x, 0), code);
10695 output_pic_addr_const (file, XEXP (x, 1), code);
10697 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10701 gcc_assert (XVECLEN (x, 0) == 1);
10702 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10703 switch (XINT (x, 1))
10706 fputs ("@GOT", file);
10708 case UNSPEC_GOTOFF:
10709 fputs ("@GOTOFF", file);
10711 case UNSPEC_PLTOFF:
10712 fputs ("@PLTOFF", file);
10714 case UNSPEC_GOTPCREL:
10715 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10716 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10718 case UNSPEC_GOTTPOFF:
10719 /* FIXME: This might be @TPOFF in Sun ld too. */
10720 fputs ("@GOTTPOFF", file);
10723 fputs ("@TPOFF", file);
10725 case UNSPEC_NTPOFF:
10727 fputs ("@TPOFF", file);
10729 fputs ("@NTPOFF", file);
10731 case UNSPEC_DTPOFF:
10732 fputs ("@DTPOFF", file);
10734 case UNSPEC_GOTNTPOFF:
10736 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10737 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10739 fputs ("@GOTNTPOFF", file);
10741 case UNSPEC_INDNTPOFF:
10742 fputs ("@INDNTPOFF", file);
10745 case UNSPEC_MACHOPIC_OFFSET:
10747 machopic_output_function_base_name (file);
10751 output_operand_lossage ("invalid UNSPEC as operand");
10757 output_operand_lossage ("invalid expression as operand");
10761 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10762 We need to emit DTP-relative relocations. */
10764 static void ATTRIBUTE_UNUSED
10765 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10767 fputs (ASM_LONG, file);
10768 output_addr_const (file, x);
10769 fputs ("@DTPOFF", file);
10775 fputs (", 0", file);
10778 gcc_unreachable ();
10782 /* Return true if X is a representation of the PIC register. This copes
10783 with calls from ix86_find_base_term, where the register might have
10784 been replaced by a cselib value. */
10787 ix86_pic_register_p (rtx x)
10789 if (GET_CODE (x) == VALUE)
10790 return (pic_offset_table_rtx
10791 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10793 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10796 /* In the name of slightly smaller debug output, and to cater to
10797 general assembler lossage, recognize PIC+GOTOFF and turn it back
10798 into a direct symbol reference.
10800 On Darwin, this is necessary to avoid a crash, because Darwin
10801 has a different PIC label for each routine but the DWARF debugging
10802 information is not associated with any particular routine, so it's
10803 necessary to remove references to the PIC label from RTL stored by
10804 the DWARF output code. */
10807 ix86_delegitimize_address (rtx orig_x)
10810 /* reg_addend is NULL or a multiple of some register. */
10811 rtx reg_addend = NULL_RTX;
10812 /* const_addend is NULL or a const_int. */
10813 rtx const_addend = NULL_RTX;
10814 /* This is the result, or NULL. */
10815 rtx result = NULL_RTX;
10822 if (GET_CODE (x) != CONST
10823 || GET_CODE (XEXP (x, 0)) != UNSPEC
10824 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10825 || !MEM_P (orig_x))
10827 return XVECEXP (XEXP (x, 0), 0, 0);
10830 if (GET_CODE (x) != PLUS
10831 || GET_CODE (XEXP (x, 1)) != CONST)
10834 if (ix86_pic_register_p (XEXP (x, 0)))
10835 /* %ebx + GOT/GOTOFF */
10837 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10839 /* %ebx + %reg * scale + GOT/GOTOFF */
10840 reg_addend = XEXP (x, 0);
10841 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10842 reg_addend = XEXP (reg_addend, 1);
10843 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10844 reg_addend = XEXP (reg_addend, 0);
10847 if (!REG_P (reg_addend)
10848 && GET_CODE (reg_addend) != MULT
10849 && GET_CODE (reg_addend) != ASHIFT)
10855 x = XEXP (XEXP (x, 1), 0);
10856 if (GET_CODE (x) == PLUS
10857 && CONST_INT_P (XEXP (x, 1)))
10859 const_addend = XEXP (x, 1);
10863 if (GET_CODE (x) == UNSPEC
10864 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10865 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10866 result = XVECEXP (x, 0, 0);
10868 if (TARGET_MACHO && darwin_local_data_pic (x)
10869 && !MEM_P (orig_x))
10870 result = XVECEXP (x, 0, 0);
10876 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10878 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10882 /* If X is a machine specific address (i.e. a symbol or label being
10883 referenced as a displacement from the GOT implemented using an
10884 UNSPEC), then return the base term. Otherwise return X. */
10887 ix86_find_base_term (rtx x)
10893 if (GET_CODE (x) != CONST)
10895 term = XEXP (x, 0);
10896 if (GET_CODE (term) == PLUS
10897 && (CONST_INT_P (XEXP (term, 1))
10898 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10899 term = XEXP (term, 0);
10900 if (GET_CODE (term) != UNSPEC
10901 || XINT (term, 1) != UNSPEC_GOTPCREL)
10904 return XVECEXP (term, 0, 0);
10907 return ix86_delegitimize_address (x);
10911 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10912 int fp, FILE *file)
10914 const char *suffix;
10916 if (mode == CCFPmode || mode == CCFPUmode)
10918 code = ix86_fp_compare_code_to_integer (code);
10922 code = reverse_condition (code);
10973 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10977 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10978 Those same assemblers have the same but opposite lossage on cmov. */
10979 if (mode == CCmode)
10980 suffix = fp ? "nbe" : "a";
10981 else if (mode == CCCmode)
10984 gcc_unreachable ();
11000 gcc_unreachable ();
11004 gcc_assert (mode == CCmode || mode == CCCmode);
11021 gcc_unreachable ();
11025 /* ??? As above. */
11026 gcc_assert (mode == CCmode || mode == CCCmode);
11027 suffix = fp ? "nb" : "ae";
11030 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11034 /* ??? As above. */
11035 if (mode == CCmode)
11037 else if (mode == CCCmode)
11038 suffix = fp ? "nb" : "ae";
11040 gcc_unreachable ();
11043 suffix = fp ? "u" : "p";
11046 suffix = fp ? "nu" : "np";
11049 gcc_unreachable ();
11051 fputs (suffix, file);
11054 /* Print the name of register X to FILE based on its machine mode and number.
11055 If CODE is 'w', pretend the mode is HImode.
11056 If CODE is 'b', pretend the mode is QImode.
11057 If CODE is 'k', pretend the mode is SImode.
11058 If CODE is 'q', pretend the mode is DImode.
11059 If CODE is 'x', pretend the mode is V4SFmode.
11060 If CODE is 't', pretend the mode is V8SFmode.
11061 If CODE is 'h', pretend the reg is the 'high' byte register.
11062 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
11063 If CODE is 'd', duplicate the operand for AVX instruction.
11067 print_reg (rtx x, int code, FILE *file)
11070 bool duplicated = code == 'd' && TARGET_AVX;
11072 gcc_assert (x == pc_rtx
11073 || (REGNO (x) != ARG_POINTER_REGNUM
11074 && REGNO (x) != FRAME_POINTER_REGNUM
11075 && REGNO (x) != FLAGS_REG
11076 && REGNO (x) != FPSR_REG
11077 && REGNO (x) != FPCR_REG));
11079 if (ASSEMBLER_DIALECT == ASM_ATT)
11084 gcc_assert (TARGET_64BIT);
11085 fputs ("rip", file);
11089 if (code == 'w' || MMX_REG_P (x))
11091 else if (code == 'b')
11093 else if (code == 'k')
11095 else if (code == 'q')
11097 else if (code == 'y')
11099 else if (code == 'h')
11101 else if (code == 'x')
11103 else if (code == 't')
11106 code = GET_MODE_SIZE (GET_MODE (x));
11108 /* Irritatingly, AMD extended registers use different naming convention
11109 from the normal registers. */
11110 if (REX_INT_REG_P (x))
11112 gcc_assert (TARGET_64BIT);
11116 error ("extended registers have no high halves");
11119 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
11122 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
11125 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
11128 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
11131 error ("unsupported operand size for extended register");
11141 if (STACK_TOP_P (x))
11150 if (! ANY_FP_REG_P (x))
11151 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
11156 reg = hi_reg_name[REGNO (x)];
11159 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
11161 reg = qi_reg_name[REGNO (x)];
11164 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
11166 reg = qi_high_reg_name[REGNO (x)];
11171 gcc_assert (!duplicated);
11173 fputs (hi_reg_name[REGNO (x)] + 1, file);
11178 gcc_unreachable ();
11184 if (ASSEMBLER_DIALECT == ASM_ATT)
11185 fprintf (file, ", %%%s", reg);
11187 fprintf (file, ", %s", reg);
11191 /* Locate some local-dynamic symbol still in use by this function
11192 so that we can print its name in some tls_local_dynamic_base
11196 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11200 if (GET_CODE (x) == SYMBOL_REF
11201 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11203 cfun->machine->some_ld_name = XSTR (x, 0);
11210 static const char *
11211 get_some_local_dynamic_name (void)
11215 if (cfun->machine->some_ld_name)
11216 return cfun->machine->some_ld_name;
11218 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11220 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11221 return cfun->machine->some_ld_name;
11223 gcc_unreachable ();
11226 /* Meaning of CODE:
11227 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11228 C -- print opcode suffix for set/cmov insn.
11229 c -- like C, but print reversed condition
11230 E,e -- likewise, but for compare-and-branch fused insn.
11231 F,f -- likewise, but for floating-point.
11232 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11234 R -- print the prefix for register names.
11235 z -- print the opcode suffix for the size of the current operand.
11236 Z -- likewise, with special suffixes for x87 instructions.
11237 * -- print a star (in certain assembler syntax)
11238 A -- print an absolute memory reference.
11239 w -- print the operand as if it's a "word" (HImode) even if it isn't.
11240 s -- print a shift double count, followed by the assemblers argument
11242 b -- print the QImode name of the register for the indicated operand.
11243 %b0 would print %al if operands[0] is reg 0.
11244 w -- likewise, print the HImode name of the register.
11245 k -- likewise, print the SImode name of the register.
11246 q -- likewise, print the DImode name of the register.
11247 x -- likewise, print the V4SFmode name of the register.
11248 t -- likewise, print the V8SFmode name of the register.
11249 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11250 y -- print "st(0)" instead of "st" as a register.
11251 d -- print duplicated register operand for AVX instruction.
11252 D -- print condition for SSE cmp instruction.
11253 P -- if PIC, print an @PLT suffix.
11254 X -- don't print any sort of PIC '@' suffix for a symbol.
11255 & -- print some in-use local-dynamic symbol name.
11256 H -- print a memory address offset by 8; used for sse high-parts
11257 Y -- print condition for SSE5 com* instruction.
11258 + -- print a branch hint as 'cs' or 'ds' prefix
11259 ; -- print a semicolon (after prefixes due to bug in older gas).
11263 print_operand (FILE *file, rtx x, int code)
11270 if (ASSEMBLER_DIALECT == ASM_ATT)
11275 assemble_name (file, get_some_local_dynamic_name ());
11279 switch (ASSEMBLER_DIALECT)
11286 /* Intel syntax. For absolute addresses, registers should not
11287 be surrounded by braces. */
11291 PRINT_OPERAND (file, x, 0);
11298 gcc_unreachable ();
11301 PRINT_OPERAND (file, x, 0);
11306 if (ASSEMBLER_DIALECT == ASM_ATT)
11311 if (ASSEMBLER_DIALECT == ASM_ATT)
11316 if (ASSEMBLER_DIALECT == ASM_ATT)
11321 if (ASSEMBLER_DIALECT == ASM_ATT)
11326 if (ASSEMBLER_DIALECT == ASM_ATT)
11331 if (ASSEMBLER_DIALECT == ASM_ATT)
11336 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11338 /* Opcodes don't get size suffixes if using Intel opcodes. */
11339 if (ASSEMBLER_DIALECT == ASM_INTEL)
11342 switch (GET_MODE_SIZE (GET_MODE (x)))
11361 output_operand_lossage
11362 ("invalid operand size for operand code '%c'", code);
11367 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11369 (0, "non-integer operand used with operand code '%c'", code);
11373 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11374 if (ASSEMBLER_DIALECT == ASM_INTEL)
11377 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11379 switch (GET_MODE_SIZE (GET_MODE (x)))
11382 #ifdef HAVE_AS_IX86_FILDS
11392 #ifdef HAVE_AS_IX86_FILDQ
11395 fputs ("ll", file);
11403 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11405 /* 387 opcodes don't get size suffixes
11406 if the operands are registers. */
11407 if (STACK_REG_P (x))
11410 switch (GET_MODE_SIZE (GET_MODE (x)))
11431 output_operand_lossage
11432 ("invalid operand type used with operand code '%c'", code);
11436 output_operand_lossage
11437 ("invalid operand size for operand code '%c'", code);
11454 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11456 PRINT_OPERAND (file, x, 0);
11457 fputs (", ", file);
11462 /* Little bit of braindamage here. The SSE compare instructions
11463 does use completely different names for the comparisons that the
11464 fp conditional moves. */
11467 switch (GET_CODE (x))
11470 fputs ("eq", file);
11473 fputs ("eq_us", file);
11476 fputs ("lt", file);
11479 fputs ("nge", file);
11482 fputs ("le", file);
11485 fputs ("ngt", file);
11488 fputs ("unord", file);
11491 fputs ("neq", file);
11494 fputs ("neq_oq", file);
11497 fputs ("ge", file);
11500 fputs ("nlt", file);
11503 fputs ("gt", file);
11506 fputs ("nle", file);
11509 fputs ("ord", file);
11512 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11518 switch (GET_CODE (x))
11522 fputs ("eq", file);
11526 fputs ("lt", file);
11530 fputs ("le", file);
11533 fputs ("unord", file);
11537 fputs ("neq", file);
11541 fputs ("nlt", file);
11545 fputs ("nle", file);
11548 fputs ("ord", file);
11551 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11557 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11558 if (ASSEMBLER_DIALECT == ASM_ATT)
11560 switch (GET_MODE (x))
11562 case HImode: putc ('w', file); break;
11564 case SFmode: putc ('l', file); break;
11566 case DFmode: putc ('q', file); break;
11567 default: gcc_unreachable ();
11574 if (!COMPARISON_P (x))
11576 output_operand_lossage ("operand is neither a constant nor a "
11577 "condition code, invalid operand code "
11581 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11584 if (!COMPARISON_P (x))
11586 output_operand_lossage ("operand is neither a constant nor a "
11587 "condition code, invalid operand code "
11591 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11592 if (ASSEMBLER_DIALECT == ASM_ATT)
11595 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11598 /* Like above, but reverse condition */
11600 /* Check to see if argument to %c is really a constant
11601 and not a condition code which needs to be reversed. */
11602 if (!COMPARISON_P (x))
11604 output_operand_lossage ("operand is neither a constant nor a "
11605 "condition code, invalid operand "
11609 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11612 if (!COMPARISON_P (x))
11614 output_operand_lossage ("operand is neither a constant nor a "
11615 "condition code, invalid operand "
11619 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11620 if (ASSEMBLER_DIALECT == ASM_ATT)
11623 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11627 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11631 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11635 /* It doesn't actually matter what mode we use here, as we're
11636 only going to use this for printing. */
11637 x = adjust_address_nv (x, DImode, 8);
11645 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11648 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11651 int pred_val = INTVAL (XEXP (x, 0));
11653 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11654 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11656 int taken = pred_val > REG_BR_PROB_BASE / 2;
11657 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11659 /* Emit hints only in the case default branch prediction
11660 heuristics would fail. */
11661 if (taken != cputaken)
11663 /* We use 3e (DS) prefix for taken branches and
11664 2e (CS) prefix for not taken branches. */
11666 fputs ("ds ; ", file);
11668 fputs ("cs ; ", file);
11676 switch (GET_CODE (x))
11679 fputs ("neq", file);
11682 fputs ("eq", file);
11686 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11690 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11694 fputs ("le", file);
11698 fputs ("lt", file);
11701 fputs ("unord", file);
11704 fputs ("ord", file);
11707 fputs ("ueq", file);
11710 fputs ("nlt", file);
11713 fputs ("nle", file);
11716 fputs ("ule", file);
11719 fputs ("ult", file);
11722 fputs ("une", file);
11725 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11732 fputs (" ; ", file);
11739 output_operand_lossage ("invalid operand code '%c'", code);
11744 print_reg (x, code, file);
11746 else if (MEM_P (x))
11748 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11749 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11750 && GET_MODE (x) != BLKmode)
11753 switch (GET_MODE_SIZE (GET_MODE (x)))
11755 case 1: size = "BYTE"; break;
11756 case 2: size = "WORD"; break;
11757 case 4: size = "DWORD"; break;
11758 case 8: size = "QWORD"; break;
11759 case 12: size = "XWORD"; break;
11761 if (GET_MODE (x) == XFmode)
11767 gcc_unreachable ();
11770 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11773 else if (code == 'w')
11775 else if (code == 'k')
11778 fputs (size, file);
11779 fputs (" PTR ", file);
11783 /* Avoid (%rip) for call operands. */
11784 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11785 && !CONST_INT_P (x))
11786 output_addr_const (file, x);
11787 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11788 output_operand_lossage ("invalid constraints for operand");
11790 output_address (x);
11793 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11798 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11799 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11801 if (ASSEMBLER_DIALECT == ASM_ATT)
11803 fprintf (file, "0x%08lx", (long unsigned int) l);
11806 /* These float cases don't actually occur as immediate operands. */
11807 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11811 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11812 fprintf (file, "%s", dstr);
11815 else if (GET_CODE (x) == CONST_DOUBLE
11816 && GET_MODE (x) == XFmode)
11820 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11821 fprintf (file, "%s", dstr);
11826 /* We have patterns that allow zero sets of memory, for instance.
11827 In 64-bit mode, we should probably support all 8-byte vectors,
11828 since we can in fact encode that into an immediate. */
11829 if (GET_CODE (x) == CONST_VECTOR)
11831 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11837 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11839 if (ASSEMBLER_DIALECT == ASM_ATT)
11842 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11843 || GET_CODE (x) == LABEL_REF)
11845 if (ASSEMBLER_DIALECT == ASM_ATT)
11848 fputs ("OFFSET FLAT:", file);
11851 if (CONST_INT_P (x))
11852 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11854 output_pic_addr_const (file, x, code);
11856 output_addr_const (file, x);
11860 /* Print a memory operand whose address is ADDR. */
11863 print_operand_address (FILE *file, rtx addr)
11865 struct ix86_address parts;
11866 rtx base, index, disp;
11868 int ok = ix86_decompose_address (addr, &parts);
11873 index = parts.index;
11875 scale = parts.scale;
11883 if (ASSEMBLER_DIALECT == ASM_ATT)
11885 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11888 gcc_unreachable ();
11891 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11892 if (TARGET_64BIT && !base && !index)
11896 if (GET_CODE (disp) == CONST
11897 && GET_CODE (XEXP (disp, 0)) == PLUS
11898 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11899 symbol = XEXP (XEXP (disp, 0), 0);
11901 if (GET_CODE (symbol) == LABEL_REF
11902 || (GET_CODE (symbol) == SYMBOL_REF
11903 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11906 if (!base && !index)
11908 /* Displacement only requires special attention. */
11910 if (CONST_INT_P (disp))
11912 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11913 fputs ("ds:", file);
11914 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11917 output_pic_addr_const (file, disp, 0);
11919 output_addr_const (file, disp);
11923 if (ASSEMBLER_DIALECT == ASM_ATT)
11928 output_pic_addr_const (file, disp, 0);
11929 else if (GET_CODE (disp) == LABEL_REF)
11930 output_asm_label (disp);
11932 output_addr_const (file, disp);
11937 print_reg (base, 0, file);
11941 print_reg (index, 0, file);
11943 fprintf (file, ",%d", scale);
11949 rtx offset = NULL_RTX;
11953 /* Pull out the offset of a symbol; print any symbol itself. */
11954 if (GET_CODE (disp) == CONST
11955 && GET_CODE (XEXP (disp, 0)) == PLUS
11956 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11958 offset = XEXP (XEXP (disp, 0), 1);
11959 disp = gen_rtx_CONST (VOIDmode,
11960 XEXP (XEXP (disp, 0), 0));
11964 output_pic_addr_const (file, disp, 0);
11965 else if (GET_CODE (disp) == LABEL_REF)
11966 output_asm_label (disp);
11967 else if (CONST_INT_P (disp))
11970 output_addr_const (file, disp);
11976 print_reg (base, 0, file);
11979 if (INTVAL (offset) >= 0)
11981 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11985 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11992 print_reg (index, 0, file);
11994 fprintf (file, "*%d", scale);
12002 output_addr_const_extra (FILE *file, rtx x)
12006 if (GET_CODE (x) != UNSPEC)
12009 op = XVECEXP (x, 0, 0);
12010 switch (XINT (x, 1))
12012 case UNSPEC_GOTTPOFF:
12013 output_addr_const (file, op);
12014 /* FIXME: This might be @TPOFF in Sun ld. */
12015 fputs ("@GOTTPOFF", file);
12018 output_addr_const (file, op);
12019 fputs ("@TPOFF", file);
12021 case UNSPEC_NTPOFF:
12022 output_addr_const (file, op);
12024 fputs ("@TPOFF", file);
12026 fputs ("@NTPOFF", file);
12028 case UNSPEC_DTPOFF:
12029 output_addr_const (file, op);
12030 fputs ("@DTPOFF", file);
12032 case UNSPEC_GOTNTPOFF:
12033 output_addr_const (file, op);
12035 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12036 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
12038 fputs ("@GOTNTPOFF", file);
12040 case UNSPEC_INDNTPOFF:
12041 output_addr_const (file, op);
12042 fputs ("@INDNTPOFF", file);
12045 case UNSPEC_MACHOPIC_OFFSET:
12046 output_addr_const (file, op);
12048 machopic_output_function_base_name (file);
12059 /* Split one or more DImode RTL references into pairs of SImode
12060 references. The RTL can be REG, offsettable MEM, integer constant, or
12061 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12062 split and "num" is its length. lo_half and hi_half are output arrays
12063 that parallel "operands". */
12066 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12070 rtx op = operands[num];
12072 /* simplify_subreg refuse to split volatile memory addresses,
12073 but we still have to handle it. */
12076 lo_half[num] = adjust_address (op, SImode, 0);
12077 hi_half[num] = adjust_address (op, SImode, 4);
12081 lo_half[num] = simplify_gen_subreg (SImode, op,
12082 GET_MODE (op) == VOIDmode
12083 ? DImode : GET_MODE (op), 0);
12084 hi_half[num] = simplify_gen_subreg (SImode, op,
12085 GET_MODE (op) == VOIDmode
12086 ? DImode : GET_MODE (op), 4);
12090 /* Split one or more TImode RTL references into pairs of DImode
12091 references. The RTL can be REG, offsettable MEM, integer constant, or
12092 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12093 split and "num" is its length. lo_half and hi_half are output arrays
12094 that parallel "operands". */
12097 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12101 rtx op = operands[num];
12103 /* simplify_subreg refuse to split volatile memory addresses, but we
12104 still have to handle it. */
12107 lo_half[num] = adjust_address (op, DImode, 0);
12108 hi_half[num] = adjust_address (op, DImode, 8);
12112 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
12113 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
12118 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
12119 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
12120 is the expression of the binary operation. The output may either be
12121 emitted here, or returned to the caller, like all output_* functions.
12123 There is no guarantee that the operands are the same mode, as they
12124 might be within FLOAT or FLOAT_EXTEND expressions. */
12126 #ifndef SYSV386_COMPAT
12127 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
12128 wants to fix the assemblers because that causes incompatibility
12129 with gcc. No-one wants to fix gcc because that causes
12130 incompatibility with assemblers... You can use the option of
12131 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
12132 #define SYSV386_COMPAT 1
12136 output_387_binary_op (rtx insn, rtx *operands)
12138 static char buf[40];
12141 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
12143 #ifdef ENABLE_CHECKING
12144 /* Even if we do not want to check the inputs, this documents input
12145 constraints. Which helps in understanding the following code. */
12146 if (STACK_REG_P (operands[0])
12147 && ((REG_P (operands[1])
12148 && REGNO (operands[0]) == REGNO (operands[1])
12149 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
12150 || (REG_P (operands[2])
12151 && REGNO (operands[0]) == REGNO (operands[2])
12152 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
12153 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
12156 gcc_assert (is_sse);
12159 switch (GET_CODE (operands[3]))
12162 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12163 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12171 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12172 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12180 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12181 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12189 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12190 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12198 gcc_unreachable ();
12205 strcpy (buf, ssep);
12206 if (GET_MODE (operands[0]) == SFmode)
12207 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
12209 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
12213 strcpy (buf, ssep + 1);
12214 if (GET_MODE (operands[0]) == SFmode)
12215 strcat (buf, "ss\t{%2, %0|%0, %2}");
12217 strcat (buf, "sd\t{%2, %0|%0, %2}");
12223 switch (GET_CODE (operands[3]))
12227 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
12229 rtx temp = operands[2];
12230 operands[2] = operands[1];
12231 operands[1] = temp;
12234 /* know operands[0] == operands[1]. */
12236 if (MEM_P (operands[2]))
12242 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12244 if (STACK_TOP_P (operands[0]))
12245 /* How is it that we are storing to a dead operand[2]?
12246 Well, presumably operands[1] is dead too. We can't
12247 store the result to st(0) as st(0) gets popped on this
12248 instruction. Instead store to operands[2] (which I
12249 think has to be st(1)). st(1) will be popped later.
12250 gcc <= 2.8.1 didn't have this check and generated
12251 assembly code that the Unixware assembler rejected. */
12252 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12254 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12258 if (STACK_TOP_P (operands[0]))
12259 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12261 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12266 if (MEM_P (operands[1]))
12272 if (MEM_P (operands[2]))
12278 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12281 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12282 derived assemblers, confusingly reverse the direction of
12283 the operation for fsub{r} and fdiv{r} when the
12284 destination register is not st(0). The Intel assembler
12285 doesn't have this brain damage. Read !SYSV386_COMPAT to
12286 figure out what the hardware really does. */
12287 if (STACK_TOP_P (operands[0]))
12288 p = "{p\t%0, %2|rp\t%2, %0}";
12290 p = "{rp\t%2, %0|p\t%0, %2}";
12292 if (STACK_TOP_P (operands[0]))
12293 /* As above for fmul/fadd, we can't store to st(0). */
12294 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12296 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12301 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
12304 if (STACK_TOP_P (operands[0]))
12305 p = "{rp\t%0, %1|p\t%1, %0}";
12307 p = "{p\t%1, %0|rp\t%0, %1}";
12309 if (STACK_TOP_P (operands[0]))
12310 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
12312 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
12317 if (STACK_TOP_P (operands[0]))
12319 if (STACK_TOP_P (operands[1]))
12320 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12322 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
12325 else if (STACK_TOP_P (operands[1]))
12328 p = "{\t%1, %0|r\t%0, %1}";
12330 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
12336 p = "{r\t%2, %0|\t%0, %2}";
12338 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12344 gcc_unreachable ();
12351 /* Return needed mode for entity in optimize_mode_switching pass. */
12354 ix86_mode_needed (int entity, rtx insn)
12356 enum attr_i387_cw mode;
12358 /* The mode UNINITIALIZED is used to store control word after a
12359 function call or ASM pattern. The mode ANY specify that function
12360 has no requirements on the control word and make no changes in the
12361 bits we are interested in. */
12364 || (NONJUMP_INSN_P (insn)
12365 && (asm_noperands (PATTERN (insn)) >= 0
12366 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12367 return I387_CW_UNINITIALIZED;
12369 if (recog_memoized (insn) < 0)
12370 return I387_CW_ANY;
12372 mode = get_attr_i387_cw (insn);
12377 if (mode == I387_CW_TRUNC)
12382 if (mode == I387_CW_FLOOR)
12387 if (mode == I387_CW_CEIL)
12392 if (mode == I387_CW_MASK_PM)
12397 gcc_unreachable ();
12400 return I387_CW_ANY;
12403 /* Output code to initialize control word copies used by trunc?f?i and
12404 rounding patterns. CURRENT_MODE is set to current control word,
12405 while NEW_MODE is set to new control word. */
12408 emit_i387_cw_initialization (int mode)
12410 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12413 enum ix86_stack_slot slot;
12415 rtx reg = gen_reg_rtx (HImode);
12417 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12418 emit_move_insn (reg, copy_rtx (stored_mode));
12420 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12421 || optimize_function_for_size_p (cfun))
12425 case I387_CW_TRUNC:
12426 /* round toward zero (truncate) */
12427 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12428 slot = SLOT_CW_TRUNC;
12431 case I387_CW_FLOOR:
12432 /* round down toward -oo */
12433 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12434 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12435 slot = SLOT_CW_FLOOR;
12439 /* round up toward +oo */
12440 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12441 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12442 slot = SLOT_CW_CEIL;
12445 case I387_CW_MASK_PM:
12446 /* mask precision exception for nearbyint() */
12447 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12448 slot = SLOT_CW_MASK_PM;
12452 gcc_unreachable ();
12459 case I387_CW_TRUNC:
12460 /* round toward zero (truncate) */
12461 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12462 slot = SLOT_CW_TRUNC;
12465 case I387_CW_FLOOR:
12466 /* round down toward -oo */
12467 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12468 slot = SLOT_CW_FLOOR;
12472 /* round up toward +oo */
12473 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12474 slot = SLOT_CW_CEIL;
12477 case I387_CW_MASK_PM:
12478 /* mask precision exception for nearbyint() */
12479 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12480 slot = SLOT_CW_MASK_PM;
12484 gcc_unreachable ();
12488 gcc_assert (slot < MAX_386_STACK_LOCALS);
12490 new_mode = assign_386_stack_local (HImode, slot);
12491 emit_move_insn (new_mode, reg);
12494 /* Output code for INSN to convert a float to a signed int. OPERANDS
12495 are the insn operands. The output may be [HSD]Imode and the input
12496 operand may be [SDX]Fmode. */
12499 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12501 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12502 int dimode_p = GET_MODE (operands[0]) == DImode;
12503 int round_mode = get_attr_i387_cw (insn);
12505 /* Jump through a hoop or two for DImode, since the hardware has no
12506 non-popping instruction. We used to do this a different way, but
12507 that was somewhat fragile and broke with post-reload splitters. */
12508 if ((dimode_p || fisttp) && !stack_top_dies)
12509 output_asm_insn ("fld\t%y1", operands);
12511 gcc_assert (STACK_TOP_P (operands[1]));
12512 gcc_assert (MEM_P (operands[0]));
12513 gcc_assert (GET_MODE (operands[1]) != TFmode);
12516 output_asm_insn ("fisttp%Z0\t%0", operands);
12519 if (round_mode != I387_CW_ANY)
12520 output_asm_insn ("fldcw\t%3", operands);
12521 if (stack_top_dies || dimode_p)
12522 output_asm_insn ("fistp%Z0\t%0", operands);
12524 output_asm_insn ("fist%Z0\t%0", operands);
12525 if (round_mode != I387_CW_ANY)
12526 output_asm_insn ("fldcw\t%2", operands);
12532 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12533 have the values zero or one, indicates the ffreep insn's operand
12534 from the OPERANDS array. */
12536 static const char *
12537 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12539 if (TARGET_USE_FFREEP)
12540 #if HAVE_AS_IX86_FFREEP
12541 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12544 static char retval[] = ".word\t0xc_df";
12545 int regno = REGNO (operands[opno]);
12547 gcc_assert (FP_REGNO_P (regno));
12549 retval[9] = '0' + (regno - FIRST_STACK_REG);
12554 return opno ? "fstp\t%y1" : "fstp\t%y0";
12558 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12559 should be used. UNORDERED_P is true when fucom should be used. */
12562 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12564 int stack_top_dies;
12565 rtx cmp_op0, cmp_op1;
12566 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12570 cmp_op0 = operands[0];
12571 cmp_op1 = operands[1];
12575 cmp_op0 = operands[1];
12576 cmp_op1 = operands[2];
12581 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12582 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12583 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12584 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12586 if (GET_MODE (operands[0]) == SFmode)
12588 return &ucomiss[TARGET_AVX ? 0 : 1];
12590 return &comiss[TARGET_AVX ? 0 : 1];
12593 return &ucomisd[TARGET_AVX ? 0 : 1];
12595 return &comisd[TARGET_AVX ? 0 : 1];
12598 gcc_assert (STACK_TOP_P (cmp_op0));
12600 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12602 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12604 if (stack_top_dies)
12606 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12607 return output_387_ffreep (operands, 1);
12610 return "ftst\n\tfnstsw\t%0";
12613 if (STACK_REG_P (cmp_op1)
12615 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12616 && REGNO (cmp_op1) != FIRST_STACK_REG)
12618 /* If both the top of the 387 stack dies, and the other operand
12619 is also a stack register that dies, then this must be a
12620 `fcompp' float compare */
12624 /* There is no double popping fcomi variant. Fortunately,
12625 eflags is immune from the fstp's cc clobbering. */
12627 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12629 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12630 return output_387_ffreep (operands, 0);
12635 return "fucompp\n\tfnstsw\t%0";
12637 return "fcompp\n\tfnstsw\t%0";
12642 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12644 static const char * const alt[16] =
12646 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12647 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12648 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12649 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12651 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12652 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12656 "fcomi\t{%y1, %0|%0, %y1}",
12657 "fcomip\t{%y1, %0|%0, %y1}",
12658 "fucomi\t{%y1, %0|%0, %y1}",
12659 "fucomip\t{%y1, %0|%0, %y1}",
12670 mask = eflags_p << 3;
12671 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12672 mask |= unordered_p << 1;
12673 mask |= stack_top_dies;
12675 gcc_assert (mask < 16);
12684 ix86_output_addr_vec_elt (FILE *file, int value)
12686 const char *directive = ASM_LONG;
12690 directive = ASM_QUAD;
12692 gcc_assert (!TARGET_64BIT);
12695 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12699 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12701 const char *directive = ASM_LONG;
12704 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12705 directive = ASM_QUAD;
12707 gcc_assert (!TARGET_64BIT);
12709 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12710 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12711 fprintf (file, "%s%s%d-%s%d\n",
12712 directive, LPREFIX, value, LPREFIX, rel);
12713 else if (HAVE_AS_GOTOFF_IN_DATA)
12714 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12716 else if (TARGET_MACHO)
12718 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12719 machopic_output_function_base_name (file);
12720 fprintf(file, "\n");
12724 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12725 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12728 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12732 ix86_expand_clear (rtx dest)
12736 /* We play register width games, which are only valid after reload. */
12737 gcc_assert (reload_completed);
12739 /* Avoid HImode and its attendant prefix byte. */
12740 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12741 dest = gen_rtx_REG (SImode, REGNO (dest));
12742 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12744 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12745 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12747 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12748 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12754 /* X is an unchanging MEM. If it is a constant pool reference, return
12755 the constant pool rtx, else NULL. */
12758 maybe_get_pool_constant (rtx x)
12760 x = ix86_delegitimize_address (XEXP (x, 0));
12762 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12763 return get_pool_constant (x);
12769 ix86_expand_move (enum machine_mode mode, rtx operands[])
12772 enum tls_model model;
12777 if (GET_CODE (op1) == SYMBOL_REF)
12779 model = SYMBOL_REF_TLS_MODEL (op1);
12782 op1 = legitimize_tls_address (op1, model, true);
12783 op1 = force_operand (op1, op0);
12787 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12788 && SYMBOL_REF_DLLIMPORT_P (op1))
12789 op1 = legitimize_dllimport_symbol (op1, false);
12791 else if (GET_CODE (op1) == CONST
12792 && GET_CODE (XEXP (op1, 0)) == PLUS
12793 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12795 rtx addend = XEXP (XEXP (op1, 0), 1);
12796 rtx symbol = XEXP (XEXP (op1, 0), 0);
12799 model = SYMBOL_REF_TLS_MODEL (symbol);
12801 tmp = legitimize_tls_address (symbol, model, true);
12802 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12803 && SYMBOL_REF_DLLIMPORT_P (symbol))
12804 tmp = legitimize_dllimport_symbol (symbol, true);
12808 tmp = force_operand (tmp, NULL);
12809 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12810 op0, 1, OPTAB_DIRECT);
12816 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12818 if (TARGET_MACHO && !TARGET_64BIT)
12823 rtx temp = ((reload_in_progress
12824 || ((op0 && REG_P (op0))
12826 ? op0 : gen_reg_rtx (Pmode));
12827 op1 = machopic_indirect_data_reference (op1, temp);
12828 op1 = machopic_legitimize_pic_address (op1, mode,
12829 temp == op1 ? 0 : temp);
12831 else if (MACHOPIC_INDIRECT)
12832 op1 = machopic_indirect_data_reference (op1, 0);
12840 op1 = force_reg (Pmode, op1);
12841 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12843 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
12844 op1 = legitimize_pic_address (op1, reg);
12853 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12854 || !push_operand (op0, mode))
12856 op1 = force_reg (mode, op1);
12858 if (push_operand (op0, mode)
12859 && ! general_no_elim_operand (op1, mode))
12860 op1 = copy_to_mode_reg (mode, op1);
12862 /* Force large constants in 64bit compilation into register
12863 to get them CSEed. */
12864 if (can_create_pseudo_p ()
12865 && (mode == DImode) && TARGET_64BIT
12866 && immediate_operand (op1, mode)
12867 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12868 && !register_operand (op0, mode)
12870 op1 = copy_to_mode_reg (mode, op1);
12872 if (can_create_pseudo_p ()
12873 && FLOAT_MODE_P (mode)
12874 && GET_CODE (op1) == CONST_DOUBLE)
12876 /* If we are loading a floating point constant to a register,
12877 force the value to memory now, since we'll get better code
12878 out the back end. */
12880 op1 = validize_mem (force_const_mem (mode, op1));
12881 if (!register_operand (op0, mode))
12883 rtx temp = gen_reg_rtx (mode);
12884 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12885 emit_move_insn (op0, temp);
12891 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12895 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12897 rtx op0 = operands[0], op1 = operands[1];
12898 unsigned int align = GET_MODE_ALIGNMENT (mode);
12900 /* Force constants other than zero into memory. We do not know how
12901 the instructions used to build constants modify the upper 64 bits
12902 of the register, once we have that information we may be able
12903 to handle some of them more efficiently. */
12904 if (can_create_pseudo_p ()
12905 && register_operand (op0, mode)
12906 && (CONSTANT_P (op1)
12907 || (GET_CODE (op1) == SUBREG
12908 && CONSTANT_P (SUBREG_REG (op1))))
12909 && standard_sse_constant_p (op1) <= 0)
12910 op1 = validize_mem (force_const_mem (mode, op1));
12912 /* We need to check memory alignment for SSE mode since attribute
12913 can make operands unaligned. */
12914 if (can_create_pseudo_p ()
12915 && SSE_REG_MODE_P (mode)
12916 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12917 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12921 /* ix86_expand_vector_move_misalign() does not like constants ... */
12922 if (CONSTANT_P (op1)
12923 || (GET_CODE (op1) == SUBREG
12924 && CONSTANT_P (SUBREG_REG (op1))))
12925 op1 = validize_mem (force_const_mem (mode, op1));
12927 /* ... nor both arguments in memory. */
12928 if (!register_operand (op0, mode)
12929 && !register_operand (op1, mode))
12930 op1 = force_reg (mode, op1);
12932 tmp[0] = op0; tmp[1] = op1;
12933 ix86_expand_vector_move_misalign (mode, tmp);
12937 /* Make operand1 a register if it isn't already. */
12938 if (can_create_pseudo_p ()
12939 && !register_operand (op0, mode)
12940 && !register_operand (op1, mode))
12942 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12946 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12949 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12950 straight to ix86_expand_vector_move. */
12951 /* Code generation for scalar reg-reg moves of single and double precision data:
12952 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12956 if (x86_sse_partial_reg_dependency == true)
12961 Code generation for scalar loads of double precision data:
12962 if (x86_sse_split_regs == true)
12963 movlpd mem, reg (gas syntax)
12967 Code generation for unaligned packed loads of single precision data
12968 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12969 if (x86_sse_unaligned_move_optimal)
12972 if (x86_sse_partial_reg_dependency == true)
12984 Code generation for unaligned packed loads of double precision data
12985 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12986 if (x86_sse_unaligned_move_optimal)
12989 if (x86_sse_split_regs == true)
13002 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
13011 switch (GET_MODE_CLASS (mode))
13013 case MODE_VECTOR_INT:
13015 switch (GET_MODE_SIZE (mode))
13018 op0 = gen_lowpart (V16QImode, op0);
13019 op1 = gen_lowpart (V16QImode, op1);
13020 emit_insn (gen_avx_movdqu (op0, op1));
13023 op0 = gen_lowpart (V32QImode, op0);
13024 op1 = gen_lowpart (V32QImode, op1);
13025 emit_insn (gen_avx_movdqu256 (op0, op1));
13028 gcc_unreachable ();
13031 case MODE_VECTOR_FLOAT:
13032 op0 = gen_lowpart (mode, op0);
13033 op1 = gen_lowpart (mode, op1);
13038 emit_insn (gen_avx_movups (op0, op1));
13041 emit_insn (gen_avx_movups256 (op0, op1));
13044 emit_insn (gen_avx_movupd (op0, op1));
13047 emit_insn (gen_avx_movupd256 (op0, op1));
13050 gcc_unreachable ();
13055 gcc_unreachable ();
13063 /* If we're optimizing for size, movups is the smallest. */
13064 if (optimize_insn_for_size_p ())
13066 op0 = gen_lowpart (V4SFmode, op0);
13067 op1 = gen_lowpart (V4SFmode, op1);
13068 emit_insn (gen_sse_movups (op0, op1));
13072 /* ??? If we have typed data, then it would appear that using
13073 movdqu is the only way to get unaligned data loaded with
13075 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13077 op0 = gen_lowpart (V16QImode, op0);
13078 op1 = gen_lowpart (V16QImode, op1);
13079 emit_insn (gen_sse2_movdqu (op0, op1));
13083 if (TARGET_SSE2 && mode == V2DFmode)
13087 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13089 op0 = gen_lowpart (V2DFmode, op0);
13090 op1 = gen_lowpart (V2DFmode, op1);
13091 emit_insn (gen_sse2_movupd (op0, op1));
13095 /* When SSE registers are split into halves, we can avoid
13096 writing to the top half twice. */
13097 if (TARGET_SSE_SPLIT_REGS)
13099 emit_clobber (op0);
13104 /* ??? Not sure about the best option for the Intel chips.
13105 The following would seem to satisfy; the register is
13106 entirely cleared, breaking the dependency chain. We
13107 then store to the upper half, with a dependency depth
13108 of one. A rumor has it that Intel recommends two movsd
13109 followed by an unpacklpd, but this is unconfirmed. And
13110 given that the dependency depth of the unpacklpd would
13111 still be one, I'm not sure why this would be better. */
13112 zero = CONST0_RTX (V2DFmode);
13115 m = adjust_address (op1, DFmode, 0);
13116 emit_insn (gen_sse2_loadlpd (op0, zero, m));
13117 m = adjust_address (op1, DFmode, 8);
13118 emit_insn (gen_sse2_loadhpd (op0, op0, m));
13122 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13124 op0 = gen_lowpart (V4SFmode, op0);
13125 op1 = gen_lowpart (V4SFmode, op1);
13126 emit_insn (gen_sse_movups (op0, op1));
13130 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
13131 emit_move_insn (op0, CONST0_RTX (mode));
13133 emit_clobber (op0);
13135 if (mode != V4SFmode)
13136 op0 = gen_lowpart (V4SFmode, op0);
13137 m = adjust_address (op1, V2SFmode, 0);
13138 emit_insn (gen_sse_loadlps (op0, op0, m));
13139 m = adjust_address (op1, V2SFmode, 8);
13140 emit_insn (gen_sse_loadhps (op0, op0, m));
13143 else if (MEM_P (op0))
13145 /* If we're optimizing for size, movups is the smallest. */
13146 if (optimize_insn_for_size_p ())
13148 op0 = gen_lowpart (V4SFmode, op0);
13149 op1 = gen_lowpart (V4SFmode, op1);
13150 emit_insn (gen_sse_movups (op0, op1));
13154 /* ??? Similar to above, only less clear because of quote
13155 typeless stores unquote. */
13156 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
13157 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13159 op0 = gen_lowpart (V16QImode, op0);
13160 op1 = gen_lowpart (V16QImode, op1);
13161 emit_insn (gen_sse2_movdqu (op0, op1));
13165 if (TARGET_SSE2 && mode == V2DFmode)
13167 m = adjust_address (op0, DFmode, 0);
13168 emit_insn (gen_sse2_storelpd (m, op1));
13169 m = adjust_address (op0, DFmode, 8);
13170 emit_insn (gen_sse2_storehpd (m, op1));
13174 if (mode != V4SFmode)
13175 op1 = gen_lowpart (V4SFmode, op1);
13176 m = adjust_address (op0, V2SFmode, 0);
13177 emit_insn (gen_sse_storelps (m, op1));
13178 m = adjust_address (op0, V2SFmode, 8);
13179 emit_insn (gen_sse_storehps (m, op1));
13183 gcc_unreachable ();
13186 /* Expand a push in MODE. This is some mode for which we do not support
13187 proper push instructions, at least from the registers that we expect
13188 the value to live in. */
13191 ix86_expand_push (enum machine_mode mode, rtx x)
13195 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
13196 GEN_INT (-GET_MODE_SIZE (mode)),
13197 stack_pointer_rtx, 1, OPTAB_DIRECT);
13198 if (tmp != stack_pointer_rtx)
13199 emit_move_insn (stack_pointer_rtx, tmp);
13201 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
13203 /* When we push an operand onto stack, it has to be aligned at least
13204 at the function argument boundary. However since we don't have
13205 the argument type, we can't determine the actual argument
13207 emit_move_insn (tmp, x);
13210 /* Helper function of ix86_fixup_binary_operands to canonicalize
13211 operand order. Returns true if the operands should be swapped. */
13214 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
13217 rtx dst = operands[0];
13218 rtx src1 = operands[1];
13219 rtx src2 = operands[2];
13221 /* If the operation is not commutative, we can't do anything. */
13222 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
13225 /* Highest priority is that src1 should match dst. */
13226 if (rtx_equal_p (dst, src1))
13228 if (rtx_equal_p (dst, src2))
13231 /* Next highest priority is that immediate constants come second. */
13232 if (immediate_operand (src2, mode))
13234 if (immediate_operand (src1, mode))
13237 /* Lowest priority is that memory references should come second. */
13247 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
13248 destination to use for the operation. If different from the true
13249 destination in operands[0], a copy operation will be required. */
13252 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
13255 rtx dst = operands[0];
13256 rtx src1 = operands[1];
13257 rtx src2 = operands[2];
13259 /* Canonicalize operand order. */
13260 if (ix86_swap_binary_operands_p (code, mode, operands))
13264 /* It is invalid to swap operands of different modes. */
13265 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
13272 /* Both source operands cannot be in memory. */
13273 if (MEM_P (src1) && MEM_P (src2))
13275 /* Optimization: Only read from memory once. */
13276 if (rtx_equal_p (src1, src2))
13278 src2 = force_reg (mode, src2);
13282 src2 = force_reg (mode, src2);
13285 /* If the destination is memory, and we do not have matching source
13286 operands, do things in registers. */
13287 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13288 dst = gen_reg_rtx (mode);
13290 /* Source 1 cannot be a constant. */
13291 if (CONSTANT_P (src1))
13292 src1 = force_reg (mode, src1);
13294 /* Source 1 cannot be a non-matching memory. */
13295 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13296 src1 = force_reg (mode, src1);
13298 operands[1] = src1;
13299 operands[2] = src2;
13303 /* Similarly, but assume that the destination has already been
13304 set up properly. */
13307 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
13308 enum machine_mode mode, rtx operands[])
13310 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
13311 gcc_assert (dst == operands[0]);
13314 /* Attempt to expand a binary operator. Make the expansion closer to the
13315 actual machine, then just general_operand, which will allow 3 separate
13316 memory references (one output, two input) in a single insn. */
13319 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
13322 rtx src1, src2, dst, op, clob;
13324 dst = ix86_fixup_binary_operands (code, mode, operands);
13325 src1 = operands[1];
13326 src2 = operands[2];
13328 /* Emit the instruction. */
13330 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
13331 if (reload_in_progress)
13333 /* Reload doesn't know about the flags register, and doesn't know that
13334 it doesn't want to clobber it. We can only do this with PLUS. */
13335 gcc_assert (code == PLUS);
13340 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13341 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13344 /* Fix up the destination if needed. */
13345 if (dst != operands[0])
13346 emit_move_insn (operands[0], dst);
13349 /* Return TRUE or FALSE depending on whether the binary operator meets the
13350 appropriate constraints. */
13353 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13356 rtx dst = operands[0];
13357 rtx src1 = operands[1];
13358 rtx src2 = operands[2];
13360 /* Both source operands cannot be in memory. */
13361 if (MEM_P (src1) && MEM_P (src2))
13364 /* Canonicalize operand order for commutative operators. */
13365 if (ix86_swap_binary_operands_p (code, mode, operands))
13372 /* If the destination is memory, we must have a matching source operand. */
13373 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13376 /* Source 1 cannot be a constant. */
13377 if (CONSTANT_P (src1))
13380 /* Source 1 cannot be a non-matching memory. */
13381 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13387 /* Attempt to expand a unary operator. Make the expansion closer to the
13388 actual machine, then just general_operand, which will allow 2 separate
13389 memory references (one output, one input) in a single insn. */
13392 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13395 int matching_memory;
13396 rtx src, dst, op, clob;
13401 /* If the destination is memory, and we do not have matching source
13402 operands, do things in registers. */
13403 matching_memory = 0;
13406 if (rtx_equal_p (dst, src))
13407 matching_memory = 1;
13409 dst = gen_reg_rtx (mode);
13412 /* When source operand is memory, destination must match. */
13413 if (MEM_P (src) && !matching_memory)
13414 src = force_reg (mode, src);
13416 /* Emit the instruction. */
13418 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13419 if (reload_in_progress || code == NOT)
13421 /* Reload doesn't know about the flags register, and doesn't know that
13422 it doesn't want to clobber it. */
13423 gcc_assert (code == NOT);
13428 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13429 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13432 /* Fix up the destination if needed. */
13433 if (dst != operands[0])
13434 emit_move_insn (operands[0], dst);
13437 #define LEA_SEARCH_THRESHOLD 12
13439 /* Search backward for non-agu definition of register number REGNO1
13440 or register number REGNO2 in INSN's basic block until
13441 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13442 2. Reach BB boundary, or
13443 3. Reach agu definition.
13444 Returns the distance between the non-agu definition point and INSN.
13445 If no definition point, returns -1. */
13448 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13451 basic_block bb = BLOCK_FOR_INSN (insn);
13454 enum attr_type insn_type;
13456 if (insn != BB_HEAD (bb))
13458 rtx prev = PREV_INSN (insn);
13459 while (prev && distance < LEA_SEARCH_THRESHOLD)
13464 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13465 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13466 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13467 && (regno1 == DF_REF_REGNO (*def_rec)
13468 || regno2 == DF_REF_REGNO (*def_rec)))
13470 insn_type = get_attr_type (prev);
13471 if (insn_type != TYPE_LEA)
13475 if (prev == BB_HEAD (bb))
13477 prev = PREV_INSN (prev);
13481 if (distance < LEA_SEARCH_THRESHOLD)
13485 bool simple_loop = false;
13487 FOR_EACH_EDGE (e, ei, bb->preds)
13490 simple_loop = true;
13496 rtx prev = BB_END (bb);
13499 && distance < LEA_SEARCH_THRESHOLD)
13504 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13505 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13506 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13507 && (regno1 == DF_REF_REGNO (*def_rec)
13508 || regno2 == DF_REF_REGNO (*def_rec)))
13510 insn_type = get_attr_type (prev);
13511 if (insn_type != TYPE_LEA)
13515 prev = PREV_INSN (prev);
13523 /* get_attr_type may modify recog data. We want to make sure
13524 that recog data is valid for instruction INSN, on which
13525 distance_non_agu_define is called. INSN is unchanged here. */
13526 extract_insn_cached (insn);
13530 /* Return the distance between INSN and the next insn that uses
13531 register number REGNO0 in memory address. Return -1 if no such
13532 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13535 distance_agu_use (unsigned int regno0, rtx insn)
13537 basic_block bb = BLOCK_FOR_INSN (insn);
13542 if (insn != BB_END (bb))
13544 rtx next = NEXT_INSN (insn);
13545 while (next && distance < LEA_SEARCH_THRESHOLD)
13551 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13552 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13553 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13554 && regno0 == DF_REF_REGNO (*use_rec))
13556 /* Return DISTANCE if OP0 is used in memory
13557 address in NEXT. */
13561 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13562 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13563 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13564 && regno0 == DF_REF_REGNO (*def_rec))
13566 /* Return -1 if OP0 is set in NEXT. */
13570 if (next == BB_END (bb))
13572 next = NEXT_INSN (next);
13576 if (distance < LEA_SEARCH_THRESHOLD)
13580 bool simple_loop = false;
13582 FOR_EACH_EDGE (e, ei, bb->succs)
13585 simple_loop = true;
13591 rtx next = BB_HEAD (bb);
13594 && distance < LEA_SEARCH_THRESHOLD)
13600 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13601 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13602 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13603 && regno0 == DF_REF_REGNO (*use_rec))
13605 /* Return DISTANCE if OP0 is used in memory
13606 address in NEXT. */
13610 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13611 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13612 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13613 && regno0 == DF_REF_REGNO (*def_rec))
13615 /* Return -1 if OP0 is set in NEXT. */
13620 next = NEXT_INSN (next);
13628 /* Define this macro to tune LEA priority vs ADD, it take effect when
13629 there is a dilemma of choicing LEA or ADD
13630 Negative value: ADD is more preferred than LEA
13632 Positive value: LEA is more preferred than ADD*/
13633 #define IX86_LEA_PRIORITY 2
13635 /* Return true if it is ok to optimize an ADD operation to LEA
13636 operation to avoid flag register consumation. For the processors
13637 like ATOM, if the destination register of LEA holds an actual
13638 address which will be used soon, LEA is better and otherwise ADD
13642 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13643 rtx insn, rtx operands[])
13645 unsigned int regno0 = true_regnum (operands[0]);
13646 unsigned int regno1 = true_regnum (operands[1]);
13647 unsigned int regno2;
13649 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13650 return regno0 != regno1;
13652 regno2 = true_regnum (operands[2]);
13654 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13655 if (regno0 != regno1 && regno0 != regno2)
13659 int dist_define, dist_use;
13660 dist_define = distance_non_agu_define (regno1, regno2, insn);
13661 if (dist_define <= 0)
13664 /* If this insn has both backward non-agu dependence and forward
13665 agu dependence, the one with short distance take effect. */
13666 dist_use = distance_agu_use (regno0, insn);
13668 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13675 /* Return true if destination reg of SET_BODY is shift count of
13679 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13685 /* Retrieve destination of SET_BODY. */
13686 switch (GET_CODE (set_body))
13689 set_dest = SET_DEST (set_body);
13690 if (!set_dest || !REG_P (set_dest))
13694 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13695 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13703 /* Retrieve shift count of USE_BODY. */
13704 switch (GET_CODE (use_body))
13707 shift_rtx = XEXP (use_body, 1);
13710 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13711 if (ix86_dep_by_shift_count_body (set_body,
13712 XVECEXP (use_body, 0, i)))
13720 && (GET_CODE (shift_rtx) == ASHIFT
13721 || GET_CODE (shift_rtx) == LSHIFTRT
13722 || GET_CODE (shift_rtx) == ASHIFTRT
13723 || GET_CODE (shift_rtx) == ROTATE
13724 || GET_CODE (shift_rtx) == ROTATERT))
13726 rtx shift_count = XEXP (shift_rtx, 1);
13728 /* Return true if shift count is dest of SET_BODY. */
13729 if (REG_P (shift_count)
13730 && true_regnum (set_dest) == true_regnum (shift_count))
13737 /* Return true if destination reg of SET_INSN is shift count of
13741 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13743 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13744 PATTERN (use_insn));
13747 /* Return TRUE or FALSE depending on whether the unary operator meets the
13748 appropriate constraints. */
13751 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13752 enum machine_mode mode ATTRIBUTE_UNUSED,
13753 rtx operands[2] ATTRIBUTE_UNUSED)
13755 /* If one of operands is memory, source and destination must match. */
13756 if ((MEM_P (operands[0])
13757 || MEM_P (operands[1]))
13758 && ! rtx_equal_p (operands[0], operands[1]))
13763 /* Post-reload splitter for converting an SF or DFmode value in an
13764 SSE register into an unsigned SImode. */
13767 ix86_split_convert_uns_si_sse (rtx operands[])
13769 enum machine_mode vecmode;
13770 rtx value, large, zero_or_two31, input, two31, x;
13772 large = operands[1];
13773 zero_or_two31 = operands[2];
13774 input = operands[3];
13775 two31 = operands[4];
13776 vecmode = GET_MODE (large);
13777 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13779 /* Load up the value into the low element. We must ensure that the other
13780 elements are valid floats -- zero is the easiest such value. */
13783 if (vecmode == V4SFmode)
13784 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13786 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13790 input = gen_rtx_REG (vecmode, REGNO (input));
13791 emit_move_insn (value, CONST0_RTX (vecmode));
13792 if (vecmode == V4SFmode)
13793 emit_insn (gen_sse_movss (value, value, input));
13795 emit_insn (gen_sse2_movsd (value, value, input));
13798 emit_move_insn (large, two31);
13799 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13801 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13802 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13804 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13805 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13807 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13808 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13810 large = gen_rtx_REG (V4SImode, REGNO (large));
13811 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13813 x = gen_rtx_REG (V4SImode, REGNO (value));
13814 if (vecmode == V4SFmode)
13815 emit_insn (gen_sse2_cvttps2dq (x, value));
13817 emit_insn (gen_sse2_cvttpd2dq (x, value));
13820 emit_insn (gen_xorv4si3 (value, value, large));
13823 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13824 Expects the 64-bit DImode to be supplied in a pair of integral
13825 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13826 -mfpmath=sse, !optimize_size only. */
13829 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13831 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13832 rtx int_xmm, fp_xmm;
13833 rtx biases, exponents;
13836 int_xmm = gen_reg_rtx (V4SImode);
13837 if (TARGET_INTER_UNIT_MOVES)
13838 emit_insn (gen_movdi_to_sse (int_xmm, input));
13839 else if (TARGET_SSE_SPLIT_REGS)
13841 emit_clobber (int_xmm);
13842 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13846 x = gen_reg_rtx (V2DImode);
13847 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13848 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13851 x = gen_rtx_CONST_VECTOR (V4SImode,
13852 gen_rtvec (4, GEN_INT (0x43300000UL),
13853 GEN_INT (0x45300000UL),
13854 const0_rtx, const0_rtx));
13855 exponents = validize_mem (force_const_mem (V4SImode, x));
13857 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13858 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13860 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13861 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13862 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13863 (0x1.0p84 + double(fp_value_hi_xmm)).
13864 Note these exponents differ by 32. */
13866 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13868 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13869 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13870 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13871 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13872 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13873 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13874 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13875 biases = validize_mem (force_const_mem (V2DFmode, biases));
13876 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13878 /* Add the upper and lower DFmode values together. */
13880 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13883 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13884 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13885 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13888 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13891 /* Not used, but eases macroization of patterns. */
13893 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13894 rtx input ATTRIBUTE_UNUSED)
13896 gcc_unreachable ();
13899 /* Convert an unsigned SImode value into a DFmode. Only currently used
13900 for SSE, but applicable anywhere. */
13903 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13905 REAL_VALUE_TYPE TWO31r;
13908 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13909 NULL, 1, OPTAB_DIRECT);
13911 fp = gen_reg_rtx (DFmode);
13912 emit_insn (gen_floatsidf2 (fp, x));
13914 real_ldexp (&TWO31r, &dconst1, 31);
13915 x = const_double_from_real_value (TWO31r, DFmode);
13917 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13919 emit_move_insn (target, x);
13922 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13923 32-bit mode; otherwise we have a direct convert instruction. */
13926 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13928 REAL_VALUE_TYPE TWO32r;
13929 rtx fp_lo, fp_hi, x;
13931 fp_lo = gen_reg_rtx (DFmode);
13932 fp_hi = gen_reg_rtx (DFmode);
13934 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13936 real_ldexp (&TWO32r, &dconst1, 32);
13937 x = const_double_from_real_value (TWO32r, DFmode);
13938 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13940 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13942 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13945 emit_move_insn (target, x);
13948 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13949 For x86_32, -mfpmath=sse, !optimize_size only. */
13951 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13953 REAL_VALUE_TYPE ONE16r;
13954 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13956 real_ldexp (&ONE16r, &dconst1, 16);
13957 x = const_double_from_real_value (ONE16r, SFmode);
13958 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13959 NULL, 0, OPTAB_DIRECT);
13960 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13961 NULL, 0, OPTAB_DIRECT);
13962 fp_hi = gen_reg_rtx (SFmode);
13963 fp_lo = gen_reg_rtx (SFmode);
13964 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13965 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13966 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13968 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13970 if (!rtx_equal_p (target, fp_hi))
13971 emit_move_insn (target, fp_hi);
13974 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13975 then replicate the value for all elements of the vector
13979 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13986 v = gen_rtvec (4, value, value, value, value);
13987 return gen_rtx_CONST_VECTOR (V4SImode, v);
13991 v = gen_rtvec (2, value, value);
13992 return gen_rtx_CONST_VECTOR (V2DImode, v);
13996 v = gen_rtvec (4, value, value, value, value);
13998 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13999 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14000 return gen_rtx_CONST_VECTOR (V4SFmode, v);
14004 v = gen_rtvec (2, value, value);
14006 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
14007 return gen_rtx_CONST_VECTOR (V2DFmode, v);
14010 gcc_unreachable ();
14014 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14015 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
14016 for an SSE register. If VECT is true, then replicate the mask for
14017 all elements of the vector register. If INVERT is true, then create
14018 a mask excluding the sign bit. */
14021 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
14023 enum machine_mode vec_mode, imode;
14024 HOST_WIDE_INT hi, lo;
14029 /* Find the sign bit, sign extended to 2*HWI. */
14035 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
14036 lo = 0x80000000, hi = lo < 0;
14042 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
14043 if (HOST_BITS_PER_WIDE_INT >= 64)
14044 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
14046 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14051 vec_mode = VOIDmode;
14052 if (HOST_BITS_PER_WIDE_INT >= 64)
14055 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
14062 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14066 lo = ~lo, hi = ~hi;
14072 mask = immed_double_const (lo, hi, imode);
14074 vec = gen_rtvec (2, v, mask);
14075 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
14076 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
14083 gcc_unreachable ();
14087 lo = ~lo, hi = ~hi;
14089 /* Force this value into the low part of a fp vector constant. */
14090 mask = immed_double_const (lo, hi, imode);
14091 mask = gen_lowpart (mode, mask);
14093 if (vec_mode == VOIDmode)
14094 return force_reg (mode, mask);
14096 v = ix86_build_const_vector (mode, vect, mask);
14097 return force_reg (vec_mode, v);
14100 /* Generate code for floating point ABS or NEG. */
14103 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
14106 rtx mask, set, use, clob, dst, src;
14107 bool use_sse = false;
14108 bool vector_mode = VECTOR_MODE_P (mode);
14109 enum machine_mode elt_mode = mode;
14113 elt_mode = GET_MODE_INNER (mode);
14116 else if (mode == TFmode)
14118 else if (TARGET_SSE_MATH)
14119 use_sse = SSE_FLOAT_MODE_P (mode);
14121 /* NEG and ABS performed with SSE use bitwise mask operations.
14122 Create the appropriate mask now. */
14124 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
14133 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
14134 set = gen_rtx_SET (VOIDmode, dst, set);
14139 set = gen_rtx_fmt_e (code, mode, src);
14140 set = gen_rtx_SET (VOIDmode, dst, set);
14143 use = gen_rtx_USE (VOIDmode, mask);
14144 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14145 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14146 gen_rtvec (3, set, use, clob)));
14153 /* Expand a copysign operation. Special case operand 0 being a constant. */
14156 ix86_expand_copysign (rtx operands[])
14158 enum machine_mode mode;
14159 rtx dest, op0, op1, mask, nmask;
14161 dest = operands[0];
14165 mode = GET_MODE (dest);
14167 if (GET_CODE (op0) == CONST_DOUBLE)
14169 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
14171 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
14172 op0 = simplify_unary_operation (ABS, mode, op0, mode);
14174 if (mode == SFmode || mode == DFmode)
14176 enum machine_mode vmode;
14178 vmode = mode == SFmode ? V4SFmode : V2DFmode;
14180 if (op0 == CONST0_RTX (mode))
14181 op0 = CONST0_RTX (vmode);
14186 if (mode == SFmode)
14187 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
14188 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14190 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
14192 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
14195 else if (op0 != CONST0_RTX (mode))
14196 op0 = force_reg (mode, op0);
14198 mask = ix86_build_signbit_mask (mode, 0, 0);
14200 if (mode == SFmode)
14201 copysign_insn = gen_copysignsf3_const;
14202 else if (mode == DFmode)
14203 copysign_insn = gen_copysigndf3_const;
14205 copysign_insn = gen_copysigntf3_const;
14207 emit_insn (copysign_insn (dest, op0, op1, mask));
14211 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
14213 nmask = ix86_build_signbit_mask (mode, 0, 1);
14214 mask = ix86_build_signbit_mask (mode, 0, 0);
14216 if (mode == SFmode)
14217 copysign_insn = gen_copysignsf3_var;
14218 else if (mode == DFmode)
14219 copysign_insn = gen_copysigndf3_var;
14221 copysign_insn = gen_copysigntf3_var;
14223 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
14227 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
14228 be a constant, and so has already been expanded into a vector constant. */
14231 ix86_split_copysign_const (rtx operands[])
14233 enum machine_mode mode, vmode;
14234 rtx dest, op0, op1, mask, x;
14236 dest = operands[0];
14239 mask = operands[3];
14241 mode = GET_MODE (dest);
14242 vmode = GET_MODE (mask);
14244 dest = simplify_gen_subreg (vmode, dest, mode, 0);
14245 x = gen_rtx_AND (vmode, dest, mask);
14246 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14248 if (op0 != CONST0_RTX (vmode))
14250 x = gen_rtx_IOR (vmode, dest, op0);
14251 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14255 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
14256 so we have to do two masks. */
14259 ix86_split_copysign_var (rtx operands[])
14261 enum machine_mode mode, vmode;
14262 rtx dest, scratch, op0, op1, mask, nmask, x;
14264 dest = operands[0];
14265 scratch = operands[1];
14268 nmask = operands[4];
14269 mask = operands[5];
14271 mode = GET_MODE (dest);
14272 vmode = GET_MODE (mask);
14274 if (rtx_equal_p (op0, op1))
14276 /* Shouldn't happen often (it's useless, obviously), but when it does
14277 we'd generate incorrect code if we continue below. */
14278 emit_move_insn (dest, op0);
14282 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
14284 gcc_assert (REGNO (op1) == REGNO (scratch));
14286 x = gen_rtx_AND (vmode, scratch, mask);
14287 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14290 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14291 x = gen_rtx_NOT (vmode, dest);
14292 x = gen_rtx_AND (vmode, x, op0);
14293 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14297 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
14299 x = gen_rtx_AND (vmode, scratch, mask);
14301 else /* alternative 2,4 */
14303 gcc_assert (REGNO (mask) == REGNO (scratch));
14304 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
14305 x = gen_rtx_AND (vmode, scratch, op1);
14307 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14309 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
14311 dest = simplify_gen_subreg (vmode, op0, mode, 0);
14312 x = gen_rtx_AND (vmode, dest, nmask);
14314 else /* alternative 3,4 */
14316 gcc_assert (REGNO (nmask) == REGNO (dest));
14318 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14319 x = gen_rtx_AND (vmode, dest, op0);
14321 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14324 x = gen_rtx_IOR (vmode, dest, scratch);
14325 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14328 /* Return TRUE or FALSE depending on whether the first SET in INSN
14329 has source and destination with matching CC modes, and that the
14330 CC mode is at least as constrained as REQ_MODE. */
14333 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
14336 enum machine_mode set_mode;
14338 set = PATTERN (insn);
14339 if (GET_CODE (set) == PARALLEL)
14340 set = XVECEXP (set, 0, 0);
14341 gcc_assert (GET_CODE (set) == SET);
14342 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14344 set_mode = GET_MODE (SET_DEST (set));
14348 if (req_mode != CCNOmode
14349 && (req_mode != CCmode
14350 || XEXP (SET_SRC (set), 1) != const0_rtx))
14354 if (req_mode == CCGCmode)
14358 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14362 if (req_mode == CCZmode)
14373 gcc_unreachable ();
14376 return (GET_MODE (SET_SRC (set)) == set_mode);
14379 /* Generate insn patterns to do an integer compare of OPERANDS. */
14382 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14384 enum machine_mode cmpmode;
14387 cmpmode = SELECT_CC_MODE (code, op0, op1);
14388 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14390 /* This is very simple, but making the interface the same as in the
14391 FP case makes the rest of the code easier. */
14392 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14393 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14395 /* Return the test that should be put into the flags user, i.e.
14396 the bcc, scc, or cmov instruction. */
14397 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14400 /* Figure out whether to use ordered or unordered fp comparisons.
14401 Return the appropriate mode to use. */
14404 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14406 /* ??? In order to make all comparisons reversible, we do all comparisons
14407 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14408 all forms trapping and nontrapping comparisons, we can make inequality
14409 comparisons trapping again, since it results in better code when using
14410 FCOM based compares. */
14411 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14415 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14417 enum machine_mode mode = GET_MODE (op0);
14419 if (SCALAR_FLOAT_MODE_P (mode))
14421 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14422 return ix86_fp_compare_mode (code);
14427 /* Only zero flag is needed. */
14428 case EQ: /* ZF=0 */
14429 case NE: /* ZF!=0 */
14431 /* Codes needing carry flag. */
14432 case GEU: /* CF=0 */
14433 case LTU: /* CF=1 */
14434 /* Detect overflow checks. They need just the carry flag. */
14435 if (GET_CODE (op0) == PLUS
14436 && rtx_equal_p (op1, XEXP (op0, 0)))
14440 case GTU: /* CF=0 & ZF=0 */
14441 case LEU: /* CF=1 | ZF=1 */
14442 /* Detect overflow checks. They need just the carry flag. */
14443 if (GET_CODE (op0) == MINUS
14444 && rtx_equal_p (op1, XEXP (op0, 0)))
14448 /* Codes possibly doable only with sign flag when
14449 comparing against zero. */
14450 case GE: /* SF=OF or SF=0 */
14451 case LT: /* SF<>OF or SF=1 */
14452 if (op1 == const0_rtx)
14455 /* For other cases Carry flag is not required. */
14457 /* Codes doable only with sign flag when comparing
14458 against zero, but we miss jump instruction for it
14459 so we need to use relational tests against overflow
14460 that thus needs to be zero. */
14461 case GT: /* ZF=0 & SF=OF */
14462 case LE: /* ZF=1 | SF<>OF */
14463 if (op1 == const0_rtx)
14467 /* strcmp pattern do (use flags) and combine may ask us for proper
14472 gcc_unreachable ();
14476 /* Return the fixed registers used for condition codes. */
14479 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14486 /* If two condition code modes are compatible, return a condition code
14487 mode which is compatible with both. Otherwise, return
14490 static enum machine_mode
14491 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14496 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14499 if ((m1 == CCGCmode && m2 == CCGOCmode)
14500 || (m1 == CCGOCmode && m2 == CCGCmode))
14506 gcc_unreachable ();
14536 /* These are only compatible with themselves, which we already
14543 /* Return a comparison we can do and that it is equivalent to
14544 swap_condition (code) apart possibly from orderedness.
14545 But, never change orderedness if TARGET_IEEE_FP, returning
14546 UNKNOWN in that case if necessary. */
14548 static enum rtx_code
14549 ix86_fp_swap_condition (enum rtx_code code)
14553 case GT: /* GTU - CF=0 & ZF=0 */
14554 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
14555 case GE: /* GEU - CF=0 */
14556 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
14557 case UNLT: /* LTU - CF=1 */
14558 return TARGET_IEEE_FP ? UNKNOWN : GT;
14559 case UNLE: /* LEU - CF=1 | ZF=1 */
14560 return TARGET_IEEE_FP ? UNKNOWN : GE;
14562 return swap_condition (code);
14566 /* Return cost of comparison CODE using the best strategy for performance.
14567 All following functions do use number of instructions as a cost metrics.
14568 In future this should be tweaked to compute bytes for optimize_size and
14569 take into account performance of various instructions on various CPUs. */
14572 ix86_fp_comparison_cost (enum rtx_code code)
14576 /* The cost of code using bit-twiddling on %ah. */
14593 arith_cost = TARGET_IEEE_FP ? 5 : 4;
14597 arith_cost = TARGET_IEEE_FP ? 6 : 4;
14600 gcc_unreachable ();
14603 switch (ix86_fp_comparison_strategy (code))
14605 case IX86_FPCMP_COMI:
14606 return arith_cost > 4 ? 3 : 2;
14607 case IX86_FPCMP_SAHF:
14608 return arith_cost > 4 ? 4 : 3;
14614 /* Return strategy to use for floating-point. We assume that fcomi is always
14615 preferrable where available, since that is also true when looking at size
14616 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
14618 enum ix86_fpcmp_strategy
14619 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
14621 /* Do fcomi/sahf based test when profitable. */
14624 return IX86_FPCMP_COMI;
14626 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
14627 return IX86_FPCMP_SAHF;
14629 return IX86_FPCMP_ARITH;
14632 /* Swap, force into registers, or otherwise massage the two operands
14633 to a fp comparison. The operands are updated in place; the new
14634 comparison code is returned. */
14636 static enum rtx_code
14637 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14639 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14640 rtx op0 = *pop0, op1 = *pop1;
14641 enum machine_mode op_mode = GET_MODE (op0);
14642 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14644 /* All of the unordered compare instructions only work on registers.
14645 The same is true of the fcomi compare instructions. The XFmode
14646 compare instructions require registers except when comparing
14647 against zero or when converting operand 1 from fixed point to
14651 && (fpcmp_mode == CCFPUmode
14652 || (op_mode == XFmode
14653 && ! (standard_80387_constant_p (op0) == 1
14654 || standard_80387_constant_p (op1) == 1)
14655 && GET_CODE (op1) != FLOAT)
14656 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
14658 op0 = force_reg (op_mode, op0);
14659 op1 = force_reg (op_mode, op1);
14663 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14664 things around if they appear profitable, otherwise force op0
14665 into a register. */
14667 if (standard_80387_constant_p (op0) == 0
14669 && ! (standard_80387_constant_p (op1) == 0
14672 enum rtx_code new_code = ix86_fp_swap_condition (code);
14673 if (new_code != UNKNOWN)
14676 tmp = op0, op0 = op1, op1 = tmp;
14682 op0 = force_reg (op_mode, op0);
14684 if (CONSTANT_P (op1))
14686 int tmp = standard_80387_constant_p (op1);
14688 op1 = validize_mem (force_const_mem (op_mode, op1));
14692 op1 = force_reg (op_mode, op1);
14695 op1 = force_reg (op_mode, op1);
14699 /* Try to rearrange the comparison to make it cheaper. */
14700 if (ix86_fp_comparison_cost (code)
14701 > ix86_fp_comparison_cost (swap_condition (code))
14702 && (REG_P (op1) || can_create_pseudo_p ()))
14705 tmp = op0, op0 = op1, op1 = tmp;
14706 code = swap_condition (code);
14708 op0 = force_reg (op_mode, op0);
14716 /* Convert comparison codes we use to represent FP comparison to integer
14717 code that will result in proper branch. Return UNKNOWN if no such code
14721 ix86_fp_compare_code_to_integer (enum rtx_code code)
14750 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14753 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
14755 enum machine_mode fpcmp_mode, intcmp_mode;
14758 fpcmp_mode = ix86_fp_compare_mode (code);
14759 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14761 /* Do fcomi/sahf based test when profitable. */
14762 switch (ix86_fp_comparison_strategy (code))
14764 case IX86_FPCMP_COMI:
14765 intcmp_mode = fpcmp_mode;
14766 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14767 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14772 case IX86_FPCMP_SAHF:
14773 intcmp_mode = fpcmp_mode;
14774 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14775 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14779 scratch = gen_reg_rtx (HImode);
14780 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14781 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14784 case IX86_FPCMP_ARITH:
14785 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14786 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14787 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14789 scratch = gen_reg_rtx (HImode);
14790 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14792 /* In the unordered case, we have to check C2 for NaN's, which
14793 doesn't happen to work out to anything nice combination-wise.
14794 So do some bit twiddling on the value we've got in AH to come
14795 up with an appropriate set of condition codes. */
14797 intcmp_mode = CCNOmode;
14802 if (code == GT || !TARGET_IEEE_FP)
14804 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14809 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14810 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14811 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14812 intcmp_mode = CCmode;
14818 if (code == LT && TARGET_IEEE_FP)
14820 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14821 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14822 intcmp_mode = CCmode;
14827 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14833 if (code == GE || !TARGET_IEEE_FP)
14835 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14840 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14841 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14848 if (code == LE && TARGET_IEEE_FP)
14850 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14851 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14852 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14853 intcmp_mode = CCmode;
14858 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14864 if (code == EQ && TARGET_IEEE_FP)
14866 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14867 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14868 intcmp_mode = CCmode;
14873 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14880 if (code == NE && TARGET_IEEE_FP)
14882 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14883 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14889 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14895 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14899 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14904 gcc_unreachable ();
14912 /* Return the test that should be put into the flags user, i.e.
14913 the bcc, scc, or cmov instruction. */
14914 return gen_rtx_fmt_ee (code, VOIDmode,
14915 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14920 ix86_expand_compare (enum rtx_code code)
14923 op0 = ix86_compare_op0;
14924 op1 = ix86_compare_op1;
14926 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14927 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14929 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14931 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14932 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
14935 ret = ix86_expand_int_compare (code, op0, op1);
14941 ix86_expand_branch (enum rtx_code code, rtx label)
14945 switch (GET_MODE (ix86_compare_op0))
14954 tmp = ix86_expand_compare (code);
14955 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14956 gen_rtx_LABEL_REF (VOIDmode, label),
14958 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14965 /* Expand DImode branch into multiple compare+branch. */
14967 rtx lo[2], hi[2], label2;
14968 enum rtx_code code1, code2, code3;
14969 enum machine_mode submode;
14971 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14973 tmp = ix86_compare_op0;
14974 ix86_compare_op0 = ix86_compare_op1;
14975 ix86_compare_op1 = tmp;
14976 code = swap_condition (code);
14978 if (GET_MODE (ix86_compare_op0) == DImode)
14980 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14981 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14986 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14987 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14991 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14992 avoid two branches. This costs one extra insn, so disable when
14993 optimizing for size. */
14995 if ((code == EQ || code == NE)
14996 && (!optimize_insn_for_size_p ()
14997 || hi[1] == const0_rtx || lo[1] == const0_rtx))
15002 if (hi[1] != const0_rtx)
15003 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
15004 NULL_RTX, 0, OPTAB_WIDEN);
15007 if (lo[1] != const0_rtx)
15008 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
15009 NULL_RTX, 0, OPTAB_WIDEN);
15011 tmp = expand_binop (submode, ior_optab, xor1, xor0,
15012 NULL_RTX, 0, OPTAB_WIDEN);
15014 ix86_compare_op0 = tmp;
15015 ix86_compare_op1 = const0_rtx;
15016 ix86_expand_branch (code, label);
15020 /* Otherwise, if we are doing less-than or greater-or-equal-than,
15021 op1 is a constant and the low word is zero, then we can just
15022 examine the high word. Similarly for low word -1 and
15023 less-or-equal-than or greater-than. */
15025 if (CONST_INT_P (hi[1]))
15028 case LT: case LTU: case GE: case GEU:
15029 if (lo[1] == const0_rtx)
15031 ix86_compare_op0 = hi[0];
15032 ix86_compare_op1 = hi[1];
15033 ix86_expand_branch (code, label);
15037 case LE: case LEU: case GT: case GTU:
15038 if (lo[1] == constm1_rtx)
15040 ix86_compare_op0 = hi[0];
15041 ix86_compare_op1 = hi[1];
15042 ix86_expand_branch (code, label);
15050 /* Otherwise, we need two or three jumps. */
15052 label2 = gen_label_rtx ();
15055 code2 = swap_condition (code);
15056 code3 = unsigned_condition (code);
15060 case LT: case GT: case LTU: case GTU:
15063 case LE: code1 = LT; code2 = GT; break;
15064 case GE: code1 = GT; code2 = LT; break;
15065 case LEU: code1 = LTU; code2 = GTU; break;
15066 case GEU: code1 = GTU; code2 = LTU; break;
15068 case EQ: code1 = UNKNOWN; code2 = NE; break;
15069 case NE: code2 = UNKNOWN; break;
15072 gcc_unreachable ();
15077 * if (hi(a) < hi(b)) goto true;
15078 * if (hi(a) > hi(b)) goto false;
15079 * if (lo(a) < lo(b)) goto true;
15083 ix86_compare_op0 = hi[0];
15084 ix86_compare_op1 = hi[1];
15086 if (code1 != UNKNOWN)
15087 ix86_expand_branch (code1, label);
15088 if (code2 != UNKNOWN)
15089 ix86_expand_branch (code2, label2);
15091 ix86_compare_op0 = lo[0];
15092 ix86_compare_op1 = lo[1];
15093 ix86_expand_branch (code3, label);
15095 if (code2 != UNKNOWN)
15096 emit_label (label2);
15101 /* If we have already emitted a compare insn, go straight to simple.
15102 ix86_expand_compare won't emit anything if ix86_compare_emitted
15104 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
15109 /* Split branch based on floating point condition. */
15111 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
15112 rtx target1, rtx target2, rtx tmp, rtx pushed)
15117 if (target2 != pc_rtx)
15120 code = reverse_condition_maybe_unordered (code);
15125 condition = ix86_expand_fp_compare (code, op1, op2,
15128 /* Remove pushed operand from stack. */
15130 ix86_free_from_memory (GET_MODE (pushed));
15132 i = emit_jump_insn (gen_rtx_SET
15134 gen_rtx_IF_THEN_ELSE (VOIDmode,
15135 condition, target1, target2)));
15136 if (split_branch_probability >= 0)
15137 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
15141 ix86_expand_setcc (enum rtx_code code, rtx dest)
15145 gcc_assert (GET_MODE (dest) == QImode);
15147 ret = ix86_expand_compare (code);
15148 PUT_MODE (ret, QImode);
15149 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
15152 /* Expand comparison setting or clearing carry flag. Return true when
15153 successful and set pop for the operation. */
15155 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15157 enum machine_mode mode =
15158 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15160 /* Do not handle DImode compares that go through special path. */
15161 if (mode == (TARGET_64BIT ? TImode : DImode))
15164 if (SCALAR_FLOAT_MODE_P (mode))
15166 rtx compare_op, compare_seq;
15168 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15170 /* Shortcut: following common codes never translate
15171 into carry flag compares. */
15172 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15173 || code == ORDERED || code == UNORDERED)
15176 /* These comparisons require zero flag; swap operands so they won't. */
15177 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15178 && !TARGET_IEEE_FP)
15183 code = swap_condition (code);
15186 /* Try to expand the comparison and verify that we end up with
15187 carry flag based comparison. This fails to be true only when
15188 we decide to expand comparison using arithmetic that is not
15189 too common scenario. */
15191 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15192 compare_seq = get_insns ();
15195 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15196 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15197 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15199 code = GET_CODE (compare_op);
15201 if (code != LTU && code != GEU)
15204 emit_insn (compare_seq);
15209 if (!INTEGRAL_MODE_P (mode))
15218 /* Convert a==0 into (unsigned)a<1. */
15221 if (op1 != const0_rtx)
15224 code = (code == EQ ? LTU : GEU);
15227 /* Convert a>b into b<a or a>=b-1. */
15230 if (CONST_INT_P (op1))
15232 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15233 /* Bail out on overflow. We still can swap operands but that
15234 would force loading of the constant into register. */
15235 if (op1 == const0_rtx
15236 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15238 code = (code == GTU ? GEU : LTU);
15245 code = (code == GTU ? LTU : GEU);
15249 /* Convert a>=0 into (unsigned)a<0x80000000. */
15252 if (mode == DImode || op1 != const0_rtx)
15254 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15255 code = (code == LT ? GEU : LTU);
15259 if (mode == DImode || op1 != constm1_rtx)
15261 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15262 code = (code == LE ? GEU : LTU);
15268 /* Swapping operands may cause constant to appear as first operand. */
15269 if (!nonimmediate_operand (op0, VOIDmode))
15271 if (!can_create_pseudo_p ())
15273 op0 = force_reg (mode, op0);
15275 ix86_compare_op0 = op0;
15276 ix86_compare_op1 = op1;
15277 *pop = ix86_expand_compare (code);
15278 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15283 ix86_expand_int_movcc (rtx operands[])
15285 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15286 rtx compare_seq, compare_op;
15287 enum machine_mode mode = GET_MODE (operands[0]);
15288 bool sign_bit_compare_p = false;;
15291 ix86_compare_op0 = XEXP (operands[1], 0);
15292 ix86_compare_op1 = XEXP (operands[1], 1);
15293 compare_op = ix86_expand_compare (code);
15294 compare_seq = get_insns ();
15297 compare_code = GET_CODE (compare_op);
15299 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15300 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15301 sign_bit_compare_p = true;
15303 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15304 HImode insns, we'd be swallowed in word prefix ops. */
15306 if ((mode != HImode || TARGET_FAST_PREFIX)
15307 && (mode != (TARGET_64BIT ? TImode : DImode))
15308 && CONST_INT_P (operands[2])
15309 && CONST_INT_P (operands[3]))
15311 rtx out = operands[0];
15312 HOST_WIDE_INT ct = INTVAL (operands[2]);
15313 HOST_WIDE_INT cf = INTVAL (operands[3]);
15314 HOST_WIDE_INT diff;
15317 /* Sign bit compares are better done using shifts than we do by using
15319 if (sign_bit_compare_p
15320 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15321 ix86_compare_op1, &compare_op))
15323 /* Detect overlap between destination and compare sources. */
15326 if (!sign_bit_compare_p)
15328 bool fpcmp = false;
15330 compare_code = GET_CODE (compare_op);
15332 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15333 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15336 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15339 /* To simplify rest of code, restrict to the GEU case. */
15340 if (compare_code == LTU)
15342 HOST_WIDE_INT tmp = ct;
15345 compare_code = reverse_condition (compare_code);
15346 code = reverse_condition (code);
15351 PUT_CODE (compare_op,
15352 reverse_condition_maybe_unordered
15353 (GET_CODE (compare_op)));
15355 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15359 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15360 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15361 tmp = gen_reg_rtx (mode);
15363 if (mode == DImode)
15364 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15366 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15370 if (code == GT || code == GE)
15371 code = reverse_condition (code);
15374 HOST_WIDE_INT tmp = ct;
15379 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15380 ix86_compare_op1, VOIDmode, 0, -1);
15393 tmp = expand_simple_binop (mode, PLUS,
15395 copy_rtx (tmp), 1, OPTAB_DIRECT);
15406 tmp = expand_simple_binop (mode, IOR,
15408 copy_rtx (tmp), 1, OPTAB_DIRECT);
15410 else if (diff == -1 && ct)
15420 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15422 tmp = expand_simple_binop (mode, PLUS,
15423 copy_rtx (tmp), GEN_INT (cf),
15424 copy_rtx (tmp), 1, OPTAB_DIRECT);
15432 * andl cf - ct, dest
15442 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15445 tmp = expand_simple_binop (mode, AND,
15447 gen_int_mode (cf - ct, mode),
15448 copy_rtx (tmp), 1, OPTAB_DIRECT);
15450 tmp = expand_simple_binop (mode, PLUS,
15451 copy_rtx (tmp), GEN_INT (ct),
15452 copy_rtx (tmp), 1, OPTAB_DIRECT);
15455 if (!rtx_equal_p (tmp, out))
15456 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15458 return 1; /* DONE */
15463 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15466 tmp = ct, ct = cf, cf = tmp;
15469 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15471 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15473 /* We may be reversing unordered compare to normal compare, that
15474 is not valid in general (we may convert non-trapping condition
15475 to trapping one), however on i386 we currently emit all
15476 comparisons unordered. */
15477 compare_code = reverse_condition_maybe_unordered (compare_code);
15478 code = reverse_condition_maybe_unordered (code);
15482 compare_code = reverse_condition (compare_code);
15483 code = reverse_condition (code);
15487 compare_code = UNKNOWN;
15488 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15489 && CONST_INT_P (ix86_compare_op1))
15491 if (ix86_compare_op1 == const0_rtx
15492 && (code == LT || code == GE))
15493 compare_code = code;
15494 else if (ix86_compare_op1 == constm1_rtx)
15498 else if (code == GT)
15503 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15504 if (compare_code != UNKNOWN
15505 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15506 && (cf == -1 || ct == -1))
15508 /* If lea code below could be used, only optimize
15509 if it results in a 2 insn sequence. */
15511 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15512 || diff == 3 || diff == 5 || diff == 9)
15513 || (compare_code == LT && ct == -1)
15514 || (compare_code == GE && cf == -1))
15517 * notl op1 (if necessary)
15525 code = reverse_condition (code);
15528 out = emit_store_flag (out, code, ix86_compare_op0,
15529 ix86_compare_op1, VOIDmode, 0, -1);
15531 out = expand_simple_binop (mode, IOR,
15533 out, 1, OPTAB_DIRECT);
15534 if (out != operands[0])
15535 emit_move_insn (operands[0], out);
15537 return 1; /* DONE */
15542 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15543 || diff == 3 || diff == 5 || diff == 9)
15544 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15546 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15552 * lea cf(dest*(ct-cf)),dest
15556 * This also catches the degenerate setcc-only case.
15562 out = emit_store_flag (out, code, ix86_compare_op0,
15563 ix86_compare_op1, VOIDmode, 0, 1);
15566 /* On x86_64 the lea instruction operates on Pmode, so we need
15567 to get arithmetics done in proper mode to match. */
15569 tmp = copy_rtx (out);
15573 out1 = copy_rtx (out);
15574 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15578 tmp = gen_rtx_PLUS (mode, tmp, out1);
15584 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15587 if (!rtx_equal_p (tmp, out))
15590 out = force_operand (tmp, copy_rtx (out));
15592 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15594 if (!rtx_equal_p (out, operands[0]))
15595 emit_move_insn (operands[0], copy_rtx (out));
15597 return 1; /* DONE */
15601 * General case: Jumpful:
15602 * xorl dest,dest cmpl op1, op2
15603 * cmpl op1, op2 movl ct, dest
15604 * setcc dest jcc 1f
15605 * decl dest movl cf, dest
15606 * andl (cf-ct),dest 1:
15609 * Size 20. Size 14.
15611 * This is reasonably steep, but branch mispredict costs are
15612 * high on modern cpus, so consider failing only if optimizing
15616 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15617 && BRANCH_COST (optimize_insn_for_speed_p (),
15622 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15627 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15629 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15631 /* We may be reversing unordered compare to normal compare,
15632 that is not valid in general (we may convert non-trapping
15633 condition to trapping one), however on i386 we currently
15634 emit all comparisons unordered. */
15635 code = reverse_condition_maybe_unordered (code);
15639 code = reverse_condition (code);
15640 if (compare_code != UNKNOWN)
15641 compare_code = reverse_condition (compare_code);
15645 if (compare_code != UNKNOWN)
15647 /* notl op1 (if needed)
15652 For x < 0 (resp. x <= -1) there will be no notl,
15653 so if possible swap the constants to get rid of the
15655 True/false will be -1/0 while code below (store flag
15656 followed by decrement) is 0/-1, so the constants need
15657 to be exchanged once more. */
15659 if (compare_code == GE || !cf)
15661 code = reverse_condition (code);
15666 HOST_WIDE_INT tmp = cf;
15671 out = emit_store_flag (out, code, ix86_compare_op0,
15672 ix86_compare_op1, VOIDmode, 0, -1);
15676 out = emit_store_flag (out, code, ix86_compare_op0,
15677 ix86_compare_op1, VOIDmode, 0, 1);
15679 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15680 copy_rtx (out), 1, OPTAB_DIRECT);
15683 out = expand_simple_binop (mode, AND, copy_rtx (out),
15684 gen_int_mode (cf - ct, mode),
15685 copy_rtx (out), 1, OPTAB_DIRECT);
15687 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15688 copy_rtx (out), 1, OPTAB_DIRECT);
15689 if (!rtx_equal_p (out, operands[0]))
15690 emit_move_insn (operands[0], copy_rtx (out));
15692 return 1; /* DONE */
15696 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15698 /* Try a few things more with specific constants and a variable. */
15701 rtx var, orig_out, out, tmp;
15703 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15704 return 0; /* FAIL */
15706 /* If one of the two operands is an interesting constant, load a
15707 constant with the above and mask it in with a logical operation. */
15709 if (CONST_INT_P (operands[2]))
15712 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15713 operands[3] = constm1_rtx, op = and_optab;
15714 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15715 operands[3] = const0_rtx, op = ior_optab;
15717 return 0; /* FAIL */
15719 else if (CONST_INT_P (operands[3]))
15722 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15723 operands[2] = constm1_rtx, op = and_optab;
15724 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15725 operands[2] = const0_rtx, op = ior_optab;
15727 return 0; /* FAIL */
15730 return 0; /* FAIL */
15732 orig_out = operands[0];
15733 tmp = gen_reg_rtx (mode);
15736 /* Recurse to get the constant loaded. */
15737 if (ix86_expand_int_movcc (operands) == 0)
15738 return 0; /* FAIL */
15740 /* Mask in the interesting variable. */
15741 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15743 if (!rtx_equal_p (out, orig_out))
15744 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15746 return 1; /* DONE */
15750 * For comparison with above,
15760 if (! nonimmediate_operand (operands[2], mode))
15761 operands[2] = force_reg (mode, operands[2]);
15762 if (! nonimmediate_operand (operands[3], mode))
15763 operands[3] = force_reg (mode, operands[3]);
15765 if (! register_operand (operands[2], VOIDmode)
15767 || ! register_operand (operands[3], VOIDmode)))
15768 operands[2] = force_reg (mode, operands[2]);
15771 && ! register_operand (operands[3], VOIDmode))
15772 operands[3] = force_reg (mode, operands[3]);
15774 emit_insn (compare_seq);
15775 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15776 gen_rtx_IF_THEN_ELSE (mode,
15777 compare_op, operands[2],
15780 return 1; /* DONE */
15783 /* Swap, force into registers, or otherwise massage the two operands
15784 to an sse comparison with a mask result. Thus we differ a bit from
15785 ix86_prepare_fp_compare_args which expects to produce a flags result.
15787 The DEST operand exists to help determine whether to commute commutative
15788 operators. The POP0/POP1 operands are updated in place. The new
15789 comparison code is returned, or UNKNOWN if not implementable. */
15791 static enum rtx_code
15792 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15793 rtx *pop0, rtx *pop1)
15801 /* We have no LTGT as an operator. We could implement it with
15802 NE & ORDERED, but this requires an extra temporary. It's
15803 not clear that it's worth it. */
15810 /* These are supported directly. */
15817 /* For commutative operators, try to canonicalize the destination
15818 operand to be first in the comparison - this helps reload to
15819 avoid extra moves. */
15820 if (!dest || !rtx_equal_p (dest, *pop1))
15828 /* These are not supported directly. Swap the comparison operands
15829 to transform into something that is supported. */
15833 code = swap_condition (code);
15837 gcc_unreachable ();
15843 /* Detect conditional moves that exactly match min/max operational
15844 semantics. Note that this is IEEE safe, as long as we don't
15845 interchange the operands.
15847 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15848 and TRUE if the operation is successful and instructions are emitted. */
15851 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15852 rtx cmp_op1, rtx if_true, rtx if_false)
15854 enum machine_mode mode;
15860 else if (code == UNGE)
15863 if_true = if_false;
15869 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15871 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15876 mode = GET_MODE (dest);
15878 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15879 but MODE may be a vector mode and thus not appropriate. */
15880 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15882 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15885 if_true = force_reg (mode, if_true);
15886 v = gen_rtvec (2, if_true, if_false);
15887 tmp = gen_rtx_UNSPEC (mode, v, u);
15891 code = is_min ? SMIN : SMAX;
15892 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15895 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15899 /* Expand an sse vector comparison. Return the register with the result. */
15902 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15903 rtx op_true, rtx op_false)
15905 enum machine_mode mode = GET_MODE (dest);
15908 cmp_op0 = force_reg (mode, cmp_op0);
15909 if (!nonimmediate_operand (cmp_op1, mode))
15910 cmp_op1 = force_reg (mode, cmp_op1);
15913 || reg_overlap_mentioned_p (dest, op_true)
15914 || reg_overlap_mentioned_p (dest, op_false))
15915 dest = gen_reg_rtx (mode);
15917 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15918 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15923 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15924 operations. This is used for both scalar and vector conditional moves. */
15927 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15929 enum machine_mode mode = GET_MODE (dest);
15932 if (op_false == CONST0_RTX (mode))
15934 op_true = force_reg (mode, op_true);
15935 x = gen_rtx_AND (mode, cmp, op_true);
15936 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15938 else if (op_true == CONST0_RTX (mode))
15940 op_false = force_reg (mode, op_false);
15941 x = gen_rtx_NOT (mode, cmp);
15942 x = gen_rtx_AND (mode, x, op_false);
15943 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15945 else if (TARGET_SSE5)
15947 rtx pcmov = gen_rtx_SET (mode, dest,
15948 gen_rtx_IF_THEN_ELSE (mode, cmp,
15955 op_true = force_reg (mode, op_true);
15956 op_false = force_reg (mode, op_false);
15958 t2 = gen_reg_rtx (mode);
15960 t3 = gen_reg_rtx (mode);
15964 x = gen_rtx_AND (mode, op_true, cmp);
15965 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15967 x = gen_rtx_NOT (mode, cmp);
15968 x = gen_rtx_AND (mode, x, op_false);
15969 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15971 x = gen_rtx_IOR (mode, t3, t2);
15972 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15976 /* Expand a floating-point conditional move. Return true if successful. */
15979 ix86_expand_fp_movcc (rtx operands[])
15981 enum machine_mode mode = GET_MODE (operands[0]);
15982 enum rtx_code code = GET_CODE (operands[1]);
15983 rtx tmp, compare_op;
15985 ix86_compare_op0 = XEXP (operands[1], 0);
15986 ix86_compare_op1 = XEXP (operands[1], 1);
15987 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15989 enum machine_mode cmode;
15991 /* Since we've no cmove for sse registers, don't force bad register
15992 allocation just to gain access to it. Deny movcc when the
15993 comparison mode doesn't match the move mode. */
15994 cmode = GET_MODE (ix86_compare_op0);
15995 if (cmode == VOIDmode)
15996 cmode = GET_MODE (ix86_compare_op1);
16000 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16002 &ix86_compare_op1);
16003 if (code == UNKNOWN)
16006 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
16007 ix86_compare_op1, operands[2],
16011 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
16012 ix86_compare_op1, operands[2], operands[3]);
16013 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
16017 /* The floating point conditional move instructions don't directly
16018 support conditions resulting from a signed integer comparison. */
16020 compare_op = ix86_expand_compare (code);
16021 if (!fcmov_comparison_operator (compare_op, VOIDmode))
16023 tmp = gen_reg_rtx (QImode);
16024 ix86_expand_setcc (code, tmp);
16026 ix86_compare_op0 = tmp;
16027 ix86_compare_op1 = const0_rtx;
16028 compare_op = ix86_expand_compare (code);
16031 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16032 gen_rtx_IF_THEN_ELSE (mode, compare_op,
16033 operands[2], operands[3])));
16038 /* Expand a floating-point vector conditional move; a vcond operation
16039 rather than a movcc operation. */
16042 ix86_expand_fp_vcond (rtx operands[])
16044 enum rtx_code code = GET_CODE (operands[3]);
16047 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16048 &operands[4], &operands[5]);
16049 if (code == UNKNOWN)
16052 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
16053 operands[5], operands[1], operands[2]))
16056 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
16057 operands[1], operands[2]);
16058 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
16062 /* Expand a signed/unsigned integral vector conditional move. */
16065 ix86_expand_int_vcond (rtx operands[])
16067 enum machine_mode mode = GET_MODE (operands[0]);
16068 enum rtx_code code = GET_CODE (operands[3]);
16069 bool negate = false;
16072 cop0 = operands[4];
16073 cop1 = operands[5];
16075 /* SSE5 supports all of the comparisons on all vector int types. */
16078 /* Canonicalize the comparison to EQ, GT, GTU. */
16089 code = reverse_condition (code);
16095 code = reverse_condition (code);
16101 code = swap_condition (code);
16102 x = cop0, cop0 = cop1, cop1 = x;
16106 gcc_unreachable ();
16109 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16110 if (mode == V2DImode)
16115 /* SSE4.1 supports EQ. */
16116 if (!TARGET_SSE4_1)
16122 /* SSE4.2 supports GT/GTU. */
16123 if (!TARGET_SSE4_2)
16128 gcc_unreachable ();
16132 /* Unsigned parallel compare is not supported by the hardware. Play some
16133 tricks to turn this into a signed comparison against 0. */
16136 cop0 = force_reg (mode, cop0);
16145 /* Perform a parallel modulo subtraction. */
16146 t1 = gen_reg_rtx (mode);
16147 emit_insn ((mode == V4SImode
16149 : gen_subv2di3) (t1, cop0, cop1));
16151 /* Extract the original sign bit of op0. */
16152 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16154 t2 = gen_reg_rtx (mode);
16155 emit_insn ((mode == V4SImode
16157 : gen_andv2di3) (t2, cop0, mask));
16159 /* XOR it back into the result of the subtraction. This results
16160 in the sign bit set iff we saw unsigned underflow. */
16161 x = gen_reg_rtx (mode);
16162 emit_insn ((mode == V4SImode
16164 : gen_xorv2di3) (x, t1, t2));
16172 /* Perform a parallel unsigned saturating subtraction. */
16173 x = gen_reg_rtx (mode);
16174 emit_insn (gen_rtx_SET (VOIDmode, x,
16175 gen_rtx_US_MINUS (mode, cop0, cop1)));
16182 gcc_unreachable ();
16186 cop1 = CONST0_RTX (mode);
16190 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16191 operands[1+negate], operands[2-negate]);
16193 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16194 operands[2-negate]);
16198 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16199 true if we should do zero extension, else sign extension. HIGH_P is
16200 true if we want the N/2 high elements, else the low elements. */
16203 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16205 enum machine_mode imode = GET_MODE (operands[1]);
16206 rtx (*unpack)(rtx, rtx, rtx);
16213 unpack = gen_vec_interleave_highv16qi;
16215 unpack = gen_vec_interleave_lowv16qi;
16219 unpack = gen_vec_interleave_highv8hi;
16221 unpack = gen_vec_interleave_lowv8hi;
16225 unpack = gen_vec_interleave_highv4si;
16227 unpack = gen_vec_interleave_lowv4si;
16230 gcc_unreachable ();
16233 dest = gen_lowpart (imode, operands[0]);
16236 se = force_reg (imode, CONST0_RTX (imode));
16238 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16239 operands[1], pc_rtx, pc_rtx);
16241 emit_insn (unpack (dest, operands[1], se));
16244 /* This function performs the same task as ix86_expand_sse_unpack,
16245 but with SSE4.1 instructions. */
16248 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16250 enum machine_mode imode = GET_MODE (operands[1]);
16251 rtx (*unpack)(rtx, rtx);
16258 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16260 unpack = gen_sse4_1_extendv8qiv8hi2;
16264 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16266 unpack = gen_sse4_1_extendv4hiv4si2;
16270 unpack = gen_sse4_1_zero_extendv2siv2di2;
16272 unpack = gen_sse4_1_extendv2siv2di2;
16275 gcc_unreachable ();
16278 dest = operands[0];
16281 /* Shift higher 8 bytes to lower 8 bytes. */
16282 src = gen_reg_rtx (imode);
16283 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16284 gen_lowpart (TImode, operands[1]),
16290 emit_insn (unpack (dest, src));
16293 /* This function performs the same task as ix86_expand_sse_unpack,
16294 but with sse5 instructions. */
16297 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16299 enum machine_mode imode = GET_MODE (operands[1]);
16300 int pperm_bytes[16];
16302 int h = (high_p) ? 8 : 0;
16305 rtvec v = rtvec_alloc (16);
16308 rtx op0 = operands[0], op1 = operands[1];
16313 vs = rtvec_alloc (8);
16314 h2 = (high_p) ? 8 : 0;
16315 for (i = 0; i < 8; i++)
16317 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16318 pperm_bytes[2*i+1] = ((unsigned_p)
16320 : PPERM_SIGN | PPERM_SRC2 | i | h);
16323 for (i = 0; i < 16; i++)
16324 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16326 for (i = 0; i < 8; i++)
16327 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16329 p = gen_rtx_PARALLEL (VOIDmode, vs);
16330 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16332 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16334 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16338 vs = rtvec_alloc (4);
16339 h2 = (high_p) ? 4 : 0;
16340 for (i = 0; i < 4; i++)
16342 sign_extend = ((unsigned_p)
16344 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16345 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16346 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16347 pperm_bytes[4*i+2] = sign_extend;
16348 pperm_bytes[4*i+3] = sign_extend;
16351 for (i = 0; i < 16; i++)
16352 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16354 for (i = 0; i < 4; i++)
16355 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16357 p = gen_rtx_PARALLEL (VOIDmode, vs);
16358 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16360 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16362 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16366 vs = rtvec_alloc (2);
16367 h2 = (high_p) ? 2 : 0;
16368 for (i = 0; i < 2; i++)
16370 sign_extend = ((unsigned_p)
16372 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16373 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16374 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16375 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16376 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16377 pperm_bytes[8*i+4] = sign_extend;
16378 pperm_bytes[8*i+5] = sign_extend;
16379 pperm_bytes[8*i+6] = sign_extend;
16380 pperm_bytes[8*i+7] = sign_extend;
16383 for (i = 0; i < 16; i++)
16384 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16386 for (i = 0; i < 2; i++)
16387 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16389 p = gen_rtx_PARALLEL (VOIDmode, vs);
16390 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16392 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16394 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16398 gcc_unreachable ();
16404 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16405 next narrower integer vector type */
16407 ix86_expand_sse5_pack (rtx operands[3])
16409 enum machine_mode imode = GET_MODE (operands[0]);
16410 int pperm_bytes[16];
16412 rtvec v = rtvec_alloc (16);
16414 rtx op0 = operands[0];
16415 rtx op1 = operands[1];
16416 rtx op2 = operands[2];
16421 for (i = 0; i < 8; i++)
16423 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16424 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16427 for (i = 0; i < 16; i++)
16428 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16430 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16431 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16435 for (i = 0; i < 4; i++)
16437 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16438 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16439 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16440 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16443 for (i = 0; i < 16; i++)
16444 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16446 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16447 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16451 for (i = 0; i < 2; i++)
16453 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16454 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16455 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16456 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16457 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16458 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16459 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16460 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16463 for (i = 0; i < 16; i++)
16464 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16466 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16467 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16471 gcc_unreachable ();
16477 /* Expand conditional increment or decrement using adb/sbb instructions.
16478 The default case using setcc followed by the conditional move can be
16479 done by generic code. */
16481 ix86_expand_int_addcc (rtx operands[])
16483 enum rtx_code code = GET_CODE (operands[1]);
16485 rtx val = const0_rtx;
16486 bool fpcmp = false;
16487 enum machine_mode mode = GET_MODE (operands[0]);
16489 ix86_compare_op0 = XEXP (operands[1], 0);
16490 ix86_compare_op1 = XEXP (operands[1], 1);
16491 if (operands[3] != const1_rtx
16492 && operands[3] != constm1_rtx)
16494 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16495 ix86_compare_op1, &compare_op))
16497 code = GET_CODE (compare_op);
16499 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16500 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16503 code = ix86_fp_compare_code_to_integer (code);
16510 PUT_CODE (compare_op,
16511 reverse_condition_maybe_unordered
16512 (GET_CODE (compare_op)));
16514 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16516 PUT_MODE (compare_op, mode);
16518 /* Construct either adc or sbb insn. */
16519 if ((code == LTU) == (operands[3] == constm1_rtx))
16521 switch (GET_MODE (operands[0]))
16524 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16527 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16530 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16533 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16536 gcc_unreachable ();
16541 switch (GET_MODE (operands[0]))
16544 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16547 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16550 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16553 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16556 gcc_unreachable ();
16559 return 1; /* DONE */
16563 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16564 works for floating pointer parameters and nonoffsetable memories.
16565 For pushes, it returns just stack offsets; the values will be saved
16566 in the right order. Maximally three parts are generated. */
16569 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16574 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16576 size = (GET_MODE_SIZE (mode) + 4) / 8;
16578 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16579 gcc_assert (size >= 2 && size <= 4);
16581 /* Optimize constant pool reference to immediates. This is used by fp
16582 moves, that force all constants to memory to allow combining. */
16583 if (MEM_P (operand) && MEM_READONLY_P (operand))
16585 rtx tmp = maybe_get_pool_constant (operand);
16590 if (MEM_P (operand) && !offsettable_memref_p (operand))
16592 /* The only non-offsetable memories we handle are pushes. */
16593 int ok = push_operand (operand, VOIDmode);
16597 operand = copy_rtx (operand);
16598 PUT_MODE (operand, Pmode);
16599 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16603 if (GET_CODE (operand) == CONST_VECTOR)
16605 enum machine_mode imode = int_mode_for_mode (mode);
16606 /* Caution: if we looked through a constant pool memory above,
16607 the operand may actually have a different mode now. That's
16608 ok, since we want to pun this all the way back to an integer. */
16609 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16610 gcc_assert (operand != NULL);
16616 if (mode == DImode)
16617 split_di (&operand, 1, &parts[0], &parts[1]);
16622 if (REG_P (operand))
16624 gcc_assert (reload_completed);
16625 for (i = 0; i < size; i++)
16626 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16628 else if (offsettable_memref_p (operand))
16630 operand = adjust_address (operand, SImode, 0);
16631 parts[0] = operand;
16632 for (i = 1; i < size; i++)
16633 parts[i] = adjust_address (operand, SImode, 4 * i);
16635 else if (GET_CODE (operand) == CONST_DOUBLE)
16640 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16644 real_to_target (l, &r, mode);
16645 parts[3] = gen_int_mode (l[3], SImode);
16646 parts[2] = gen_int_mode (l[2], SImode);
16649 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16650 parts[2] = gen_int_mode (l[2], SImode);
16653 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16656 gcc_unreachable ();
16658 parts[1] = gen_int_mode (l[1], SImode);
16659 parts[0] = gen_int_mode (l[0], SImode);
16662 gcc_unreachable ();
16667 if (mode == TImode)
16668 split_ti (&operand, 1, &parts[0], &parts[1]);
16669 if (mode == XFmode || mode == TFmode)
16671 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16672 if (REG_P (operand))
16674 gcc_assert (reload_completed);
16675 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16676 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16678 else if (offsettable_memref_p (operand))
16680 operand = adjust_address (operand, DImode, 0);
16681 parts[0] = operand;
16682 parts[1] = adjust_address (operand, upper_mode, 8);
16684 else if (GET_CODE (operand) == CONST_DOUBLE)
16689 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16690 real_to_target (l, &r, mode);
16692 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16693 if (HOST_BITS_PER_WIDE_INT >= 64)
16696 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16697 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16700 parts[0] = immed_double_const (l[0], l[1], DImode);
16702 if (upper_mode == SImode)
16703 parts[1] = gen_int_mode (l[2], SImode);
16704 else if (HOST_BITS_PER_WIDE_INT >= 64)
16707 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16708 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16711 parts[1] = immed_double_const (l[2], l[3], DImode);
16714 gcc_unreachable ();
16721 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16722 Return false when normal moves are needed; true when all required
16723 insns have been emitted. Operands 2-4 contain the input values
16724 int the correct order; operands 5-7 contain the output values. */
16727 ix86_split_long_move (rtx operands[])
16732 int collisions = 0;
16733 enum machine_mode mode = GET_MODE (operands[0]);
16734 bool collisionparts[4];
16736 /* The DFmode expanders may ask us to move double.
16737 For 64bit target this is single move. By hiding the fact
16738 here we simplify i386.md splitters. */
16739 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16741 /* Optimize constant pool reference to immediates. This is used by
16742 fp moves, that force all constants to memory to allow combining. */
16744 if (MEM_P (operands[1])
16745 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16746 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16747 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16748 if (push_operand (operands[0], VOIDmode))
16750 operands[0] = copy_rtx (operands[0]);
16751 PUT_MODE (operands[0], Pmode);
16754 operands[0] = gen_lowpart (DImode, operands[0]);
16755 operands[1] = gen_lowpart (DImode, operands[1]);
16756 emit_move_insn (operands[0], operands[1]);
16760 /* The only non-offsettable memory we handle is push. */
16761 if (push_operand (operands[0], VOIDmode))
16764 gcc_assert (!MEM_P (operands[0])
16765 || offsettable_memref_p (operands[0]));
16767 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16768 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16770 /* When emitting push, take care for source operands on the stack. */
16771 if (push && MEM_P (operands[1])
16772 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16773 for (i = 0; i < nparts - 1; i++)
16774 part[1][i] = change_address (part[1][i],
16775 GET_MODE (part[1][i]),
16776 XEXP (part[1][i + 1], 0));
16778 /* We need to do copy in the right order in case an address register
16779 of the source overlaps the destination. */
16780 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16784 for (i = 0; i < nparts; i++)
16787 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16788 if (collisionparts[i])
16792 /* Collision in the middle part can be handled by reordering. */
16793 if (collisions == 1 && nparts == 3 && collisionparts [1])
16795 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16796 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16798 else if (collisions == 1
16800 && (collisionparts [1] || collisionparts [2]))
16802 if (collisionparts [1])
16804 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16805 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16809 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16810 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16814 /* If there are more collisions, we can't handle it by reordering.
16815 Do an lea to the last part and use only one colliding move. */
16816 else if (collisions > 1)
16822 base = part[0][nparts - 1];
16824 /* Handle the case when the last part isn't valid for lea.
16825 Happens in 64-bit mode storing the 12-byte XFmode. */
16826 if (GET_MODE (base) != Pmode)
16827 base = gen_rtx_REG (Pmode, REGNO (base));
16829 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16830 part[1][0] = replace_equiv_address (part[1][0], base);
16831 for (i = 1; i < nparts; i++)
16833 tmp = plus_constant (base, UNITS_PER_WORD * i);
16834 part[1][i] = replace_equiv_address (part[1][i], tmp);
16845 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16846 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16847 emit_move_insn (part[0][2], part[1][2]);
16849 else if (nparts == 4)
16851 emit_move_insn (part[0][3], part[1][3]);
16852 emit_move_insn (part[0][2], part[1][2]);
16857 /* In 64bit mode we don't have 32bit push available. In case this is
16858 register, it is OK - we will just use larger counterpart. We also
16859 retype memory - these comes from attempt to avoid REX prefix on
16860 moving of second half of TFmode value. */
16861 if (GET_MODE (part[1][1]) == SImode)
16863 switch (GET_CODE (part[1][1]))
16866 part[1][1] = adjust_address (part[1][1], DImode, 0);
16870 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16874 gcc_unreachable ();
16877 if (GET_MODE (part[1][0]) == SImode)
16878 part[1][0] = part[1][1];
16881 emit_move_insn (part[0][1], part[1][1]);
16882 emit_move_insn (part[0][0], part[1][0]);
16886 /* Choose correct order to not overwrite the source before it is copied. */
16887 if ((REG_P (part[0][0])
16888 && REG_P (part[1][1])
16889 && (REGNO (part[0][0]) == REGNO (part[1][1])
16891 && REGNO (part[0][0]) == REGNO (part[1][2]))
16893 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16895 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16897 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16899 operands[2 + i] = part[0][j];
16900 operands[6 + i] = part[1][j];
16905 for (i = 0; i < nparts; i++)
16907 operands[2 + i] = part[0][i];
16908 operands[6 + i] = part[1][i];
16912 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16913 if (optimize_insn_for_size_p ())
16915 for (j = 0; j < nparts - 1; j++)
16916 if (CONST_INT_P (operands[6 + j])
16917 && operands[6 + j] != const0_rtx
16918 && REG_P (operands[2 + j]))
16919 for (i = j; i < nparts - 1; i++)
16920 if (CONST_INT_P (operands[7 + i])
16921 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16922 operands[7 + i] = operands[2 + j];
16925 for (i = 0; i < nparts; i++)
16926 emit_move_insn (operands[2 + i], operands[6 + i]);
16931 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16932 left shift by a constant, either using a single shift or
16933 a sequence of add instructions. */
16936 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16940 emit_insn ((mode == DImode
16942 : gen_adddi3) (operand, operand, operand));
16944 else if (!optimize_insn_for_size_p ()
16945 && count * ix86_cost->add <= ix86_cost->shift_const)
16948 for (i=0; i<count; i++)
16950 emit_insn ((mode == DImode
16952 : gen_adddi3) (operand, operand, operand));
16956 emit_insn ((mode == DImode
16958 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16962 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16964 rtx low[2], high[2];
16966 const int single_width = mode == DImode ? 32 : 64;
16968 if (CONST_INT_P (operands[2]))
16970 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16971 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16973 if (count >= single_width)
16975 emit_move_insn (high[0], low[1]);
16976 emit_move_insn (low[0], const0_rtx);
16978 if (count > single_width)
16979 ix86_expand_ashl_const (high[0], count - single_width, mode);
16983 if (!rtx_equal_p (operands[0], operands[1]))
16984 emit_move_insn (operands[0], operands[1]);
16985 emit_insn ((mode == DImode
16987 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16988 ix86_expand_ashl_const (low[0], count, mode);
16993 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16995 if (operands[1] == const1_rtx)
16997 /* Assuming we've chosen a QImode capable registers, then 1 << N
16998 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16999 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17001 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17003 ix86_expand_clear (low[0]);
17004 ix86_expand_clear (high[0]);
17005 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17007 d = gen_lowpart (QImode, low[0]);
17008 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17009 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17010 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17012 d = gen_lowpart (QImode, high[0]);
17013 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17014 s = gen_rtx_NE (QImode, flags, const0_rtx);
17015 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17018 /* Otherwise, we can get the same results by manually performing
17019 a bit extract operation on bit 5/6, and then performing the two
17020 shifts. The two methods of getting 0/1 into low/high are exactly
17021 the same size. Avoiding the shift in the bit extract case helps
17022 pentium4 a bit; no one else seems to care much either way. */
17027 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17028 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17030 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17031 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17033 emit_insn ((mode == DImode
17035 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
17036 emit_insn ((mode == DImode
17038 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
17039 emit_move_insn (low[0], high[0]);
17040 emit_insn ((mode == DImode
17042 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
17045 emit_insn ((mode == DImode
17047 : gen_ashldi3) (low[0], low[0], operands[2]));
17048 emit_insn ((mode == DImode
17050 : gen_ashldi3) (high[0], high[0], operands[2]));
17054 if (operands[1] == constm1_rtx)
17056 /* For -1 << N, we can avoid the shld instruction, because we
17057 know that we're shifting 0...31/63 ones into a -1. */
17058 emit_move_insn (low[0], constm1_rtx);
17059 if (optimize_insn_for_size_p ())
17060 emit_move_insn (high[0], low[0]);
17062 emit_move_insn (high[0], constm1_rtx);
17066 if (!rtx_equal_p (operands[0], operands[1]))
17067 emit_move_insn (operands[0], operands[1]);
17069 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17070 emit_insn ((mode == DImode
17072 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17075 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
17077 if (TARGET_CMOVE && scratch)
17079 ix86_expand_clear (scratch);
17080 emit_insn ((mode == DImode
17081 ? gen_x86_shift_adj_1
17082 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17086 emit_insn ((mode == DImode
17087 ? gen_x86_shift_adj_2
17088 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17092 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17094 rtx low[2], high[2];
17096 const int single_width = mode == DImode ? 32 : 64;
17098 if (CONST_INT_P (operands[2]))
17100 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17101 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17103 if (count == single_width * 2 - 1)
17105 emit_move_insn (high[0], high[1]);
17106 emit_insn ((mode == DImode
17108 : gen_ashrdi3) (high[0], high[0],
17109 GEN_INT (single_width - 1)));
17110 emit_move_insn (low[0], high[0]);
17113 else if (count >= single_width)
17115 emit_move_insn (low[0], high[1]);
17116 emit_move_insn (high[0], low[0]);
17117 emit_insn ((mode == DImode
17119 : gen_ashrdi3) (high[0], high[0],
17120 GEN_INT (single_width - 1)));
17121 if (count > single_width)
17122 emit_insn ((mode == DImode
17124 : gen_ashrdi3) (low[0], low[0],
17125 GEN_INT (count - single_width)));
17129 if (!rtx_equal_p (operands[0], operands[1]))
17130 emit_move_insn (operands[0], operands[1]);
17131 emit_insn ((mode == DImode
17133 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17134 emit_insn ((mode == DImode
17136 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17141 if (!rtx_equal_p (operands[0], operands[1]))
17142 emit_move_insn (operands[0], operands[1]);
17144 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17146 emit_insn ((mode == DImode
17148 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17149 emit_insn ((mode == DImode
17151 : gen_ashrdi3) (high[0], high[0], operands[2]));
17153 if (TARGET_CMOVE && scratch)
17155 emit_move_insn (scratch, high[0]);
17156 emit_insn ((mode == DImode
17158 : gen_ashrdi3) (scratch, scratch,
17159 GEN_INT (single_width - 1)));
17160 emit_insn ((mode == DImode
17161 ? gen_x86_shift_adj_1
17162 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17166 emit_insn ((mode == DImode
17167 ? gen_x86_shift_adj_3
17168 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17173 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17175 rtx low[2], high[2];
17177 const int single_width = mode == DImode ? 32 : 64;
17179 if (CONST_INT_P (operands[2]))
17181 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17182 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17184 if (count >= single_width)
17186 emit_move_insn (low[0], high[1]);
17187 ix86_expand_clear (high[0]);
17189 if (count > single_width)
17190 emit_insn ((mode == DImode
17192 : gen_lshrdi3) (low[0], low[0],
17193 GEN_INT (count - single_width)));
17197 if (!rtx_equal_p (operands[0], operands[1]))
17198 emit_move_insn (operands[0], operands[1]);
17199 emit_insn ((mode == DImode
17201 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17202 emit_insn ((mode == DImode
17204 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17209 if (!rtx_equal_p (operands[0], operands[1]))
17210 emit_move_insn (operands[0], operands[1]);
17212 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17214 emit_insn ((mode == DImode
17216 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17217 emit_insn ((mode == DImode
17219 : gen_lshrdi3) (high[0], high[0], operands[2]));
17221 /* Heh. By reversing the arguments, we can reuse this pattern. */
17222 if (TARGET_CMOVE && scratch)
17224 ix86_expand_clear (scratch);
17225 emit_insn ((mode == DImode
17226 ? gen_x86_shift_adj_1
17227 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17231 emit_insn ((mode == DImode
17232 ? gen_x86_shift_adj_2
17233 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17237 /* Predict just emitted jump instruction to be taken with probability PROB. */
17239 predict_jump (int prob)
17241 rtx insn = get_last_insn ();
17242 gcc_assert (JUMP_P (insn));
17243 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17246 /* Helper function for the string operations below. Dest VARIABLE whether
17247 it is aligned to VALUE bytes. If true, jump to the label. */
17249 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17251 rtx label = gen_label_rtx ();
17252 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17253 if (GET_MODE (variable) == DImode)
17254 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17256 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17257 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17260 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17262 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17266 /* Adjust COUNTER by the VALUE. */
17268 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17270 if (GET_MODE (countreg) == DImode)
17271 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17273 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17276 /* Zero extend possibly SImode EXP to Pmode register. */
17278 ix86_zero_extend_to_Pmode (rtx exp)
17281 if (GET_MODE (exp) == VOIDmode)
17282 return force_reg (Pmode, exp);
17283 if (GET_MODE (exp) == Pmode)
17284 return copy_to_mode_reg (Pmode, exp);
17285 r = gen_reg_rtx (Pmode);
17286 emit_insn (gen_zero_extendsidi2 (r, exp));
17290 /* Divide COUNTREG by SCALE. */
17292 scale_counter (rtx countreg, int scale)
17295 rtx piece_size_mask;
17299 if (CONST_INT_P (countreg))
17300 return GEN_INT (INTVAL (countreg) / scale);
17301 gcc_assert (REG_P (countreg));
17303 piece_size_mask = GEN_INT (scale - 1);
17304 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17305 GEN_INT (exact_log2 (scale)),
17306 NULL, 1, OPTAB_DIRECT);
17310 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17311 DImode for constant loop counts. */
17313 static enum machine_mode
17314 counter_mode (rtx count_exp)
17316 if (GET_MODE (count_exp) != VOIDmode)
17317 return GET_MODE (count_exp);
17318 if (!CONST_INT_P (count_exp))
17320 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17325 /* When SRCPTR is non-NULL, output simple loop to move memory
17326 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17327 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17328 equivalent loop to set memory by VALUE (supposed to be in MODE).
17330 The size is rounded down to whole number of chunk size moved at once.
17331 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17335 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17336 rtx destptr, rtx srcptr, rtx value,
17337 rtx count, enum machine_mode mode, int unroll,
17340 rtx out_label, top_label, iter, tmp;
17341 enum machine_mode iter_mode = counter_mode (count);
17342 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17343 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17349 top_label = gen_label_rtx ();
17350 out_label = gen_label_rtx ();
17351 iter = gen_reg_rtx (iter_mode);
17353 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17354 NULL, 1, OPTAB_DIRECT);
17355 /* Those two should combine. */
17356 if (piece_size == const1_rtx)
17358 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17360 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17362 emit_move_insn (iter, const0_rtx);
17364 emit_label (top_label);
17366 tmp = convert_modes (Pmode, iter_mode, iter, true);
17367 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17368 destmem = change_address (destmem, mode, x_addr);
17372 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17373 srcmem = change_address (srcmem, mode, y_addr);
17375 /* When unrolling for chips that reorder memory reads and writes,
17376 we can save registers by using single temporary.
17377 Also using 4 temporaries is overkill in 32bit mode. */
17378 if (!TARGET_64BIT && 0)
17380 for (i = 0; i < unroll; i++)
17385 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17387 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17389 emit_move_insn (destmem, srcmem);
17395 gcc_assert (unroll <= 4);
17396 for (i = 0; i < unroll; i++)
17398 tmpreg[i] = gen_reg_rtx (mode);
17402 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17404 emit_move_insn (tmpreg[i], srcmem);
17406 for (i = 0; i < unroll; i++)
17411 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17413 emit_move_insn (destmem, tmpreg[i]);
17418 for (i = 0; i < unroll; i++)
17422 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17423 emit_move_insn (destmem, value);
17426 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17427 true, OPTAB_LIB_WIDEN);
17429 emit_move_insn (iter, tmp);
17431 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17433 if (expected_size != -1)
17435 expected_size /= GET_MODE_SIZE (mode) * unroll;
17436 if (expected_size == 0)
17438 else if (expected_size > REG_BR_PROB_BASE)
17439 predict_jump (REG_BR_PROB_BASE - 1);
17441 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17444 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17445 iter = ix86_zero_extend_to_Pmode (iter);
17446 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17447 true, OPTAB_LIB_WIDEN);
17448 if (tmp != destptr)
17449 emit_move_insn (destptr, tmp);
17452 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17453 true, OPTAB_LIB_WIDEN);
17455 emit_move_insn (srcptr, tmp);
17457 emit_label (out_label);
17460 /* Output "rep; mov" instruction.
17461 Arguments have same meaning as for previous function */
17463 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17464 rtx destptr, rtx srcptr,
17466 enum machine_mode mode)
17472 /* If the size is known, it is shorter to use rep movs. */
17473 if (mode == QImode && CONST_INT_P (count)
17474 && !(INTVAL (count) & 3))
17477 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17478 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17479 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17480 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17481 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17482 if (mode != QImode)
17484 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17485 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17486 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17487 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17488 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17489 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17493 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17494 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17496 if (CONST_INT_P (count))
17498 count = GEN_INT (INTVAL (count)
17499 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17500 destmem = shallow_copy_rtx (destmem);
17501 srcmem = shallow_copy_rtx (srcmem);
17502 set_mem_size (destmem, count);
17503 set_mem_size (srcmem, count);
17507 if (MEM_SIZE (destmem))
17508 set_mem_size (destmem, NULL_RTX);
17509 if (MEM_SIZE (srcmem))
17510 set_mem_size (srcmem, NULL_RTX);
17512 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17516 /* Output "rep; stos" instruction.
17517 Arguments have same meaning as for previous function */
17519 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17520 rtx count, enum machine_mode mode,
17526 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17527 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17528 value = force_reg (mode, gen_lowpart (mode, value));
17529 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17530 if (mode != QImode)
17532 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17533 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17534 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17537 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17538 if (orig_value == const0_rtx && CONST_INT_P (count))
17540 count = GEN_INT (INTVAL (count)
17541 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17542 destmem = shallow_copy_rtx (destmem);
17543 set_mem_size (destmem, count);
17545 else if (MEM_SIZE (destmem))
17546 set_mem_size (destmem, NULL_RTX);
17547 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17551 emit_strmov (rtx destmem, rtx srcmem,
17552 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17554 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17555 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17556 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17559 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17561 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17562 rtx destptr, rtx srcptr, rtx count, int max_size)
17565 if (CONST_INT_P (count))
17567 HOST_WIDE_INT countval = INTVAL (count);
17570 if ((countval & 0x10) && max_size > 16)
17574 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17575 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17578 gcc_unreachable ();
17581 if ((countval & 0x08) && max_size > 8)
17584 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17587 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17588 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17592 if ((countval & 0x04) && max_size > 4)
17594 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17597 if ((countval & 0x02) && max_size > 2)
17599 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17602 if ((countval & 0x01) && max_size > 1)
17604 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17611 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17612 count, 1, OPTAB_DIRECT);
17613 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17614 count, QImode, 1, 4);
17618 /* When there are stringops, we can cheaply increase dest and src pointers.
17619 Otherwise we save code size by maintaining offset (zero is readily
17620 available from preceding rep operation) and using x86 addressing modes.
17622 if (TARGET_SINGLE_STRINGOP)
17626 rtx label = ix86_expand_aligntest (count, 4, true);
17627 src = change_address (srcmem, SImode, srcptr);
17628 dest = change_address (destmem, SImode, destptr);
17629 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17630 emit_label (label);
17631 LABEL_NUSES (label) = 1;
17635 rtx label = ix86_expand_aligntest (count, 2, true);
17636 src = change_address (srcmem, HImode, srcptr);
17637 dest = change_address (destmem, HImode, destptr);
17638 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17639 emit_label (label);
17640 LABEL_NUSES (label) = 1;
17644 rtx label = ix86_expand_aligntest (count, 1, true);
17645 src = change_address (srcmem, QImode, srcptr);
17646 dest = change_address (destmem, QImode, destptr);
17647 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17648 emit_label (label);
17649 LABEL_NUSES (label) = 1;
17654 rtx offset = force_reg (Pmode, const0_rtx);
17659 rtx label = ix86_expand_aligntest (count, 4, true);
17660 src = change_address (srcmem, SImode, srcptr);
17661 dest = change_address (destmem, SImode, destptr);
17662 emit_move_insn (dest, src);
17663 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17664 true, OPTAB_LIB_WIDEN);
17666 emit_move_insn (offset, tmp);
17667 emit_label (label);
17668 LABEL_NUSES (label) = 1;
17672 rtx label = ix86_expand_aligntest (count, 2, true);
17673 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17674 src = change_address (srcmem, HImode, tmp);
17675 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17676 dest = change_address (destmem, HImode, tmp);
17677 emit_move_insn (dest, src);
17678 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17679 true, OPTAB_LIB_WIDEN);
17681 emit_move_insn (offset, tmp);
17682 emit_label (label);
17683 LABEL_NUSES (label) = 1;
17687 rtx label = ix86_expand_aligntest (count, 1, true);
17688 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17689 src = change_address (srcmem, QImode, tmp);
17690 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17691 dest = change_address (destmem, QImode, tmp);
17692 emit_move_insn (dest, src);
17693 emit_label (label);
17694 LABEL_NUSES (label) = 1;
17699 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17701 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17702 rtx count, int max_size)
17705 expand_simple_binop (counter_mode (count), AND, count,
17706 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17707 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17708 gen_lowpart (QImode, value), count, QImode,
17712 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17714 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17718 if (CONST_INT_P (count))
17720 HOST_WIDE_INT countval = INTVAL (count);
17723 if ((countval & 0x10) && max_size > 16)
17727 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17728 emit_insn (gen_strset (destptr, dest, value));
17729 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17730 emit_insn (gen_strset (destptr, dest, value));
17733 gcc_unreachable ();
17736 if ((countval & 0x08) && max_size > 8)
17740 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17741 emit_insn (gen_strset (destptr, dest, value));
17745 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17746 emit_insn (gen_strset (destptr, dest, value));
17747 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17748 emit_insn (gen_strset (destptr, dest, value));
17752 if ((countval & 0x04) && max_size > 4)
17754 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17755 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17758 if ((countval & 0x02) && max_size > 2)
17760 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17761 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17764 if ((countval & 0x01) && max_size > 1)
17766 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17767 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17774 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17779 rtx label = ix86_expand_aligntest (count, 16, true);
17782 dest = change_address (destmem, DImode, destptr);
17783 emit_insn (gen_strset (destptr, dest, value));
17784 emit_insn (gen_strset (destptr, dest, value));
17788 dest = change_address (destmem, SImode, destptr);
17789 emit_insn (gen_strset (destptr, dest, value));
17790 emit_insn (gen_strset (destptr, dest, value));
17791 emit_insn (gen_strset (destptr, dest, value));
17792 emit_insn (gen_strset (destptr, dest, value));
17794 emit_label (label);
17795 LABEL_NUSES (label) = 1;
17799 rtx label = ix86_expand_aligntest (count, 8, true);
17802 dest = change_address (destmem, DImode, destptr);
17803 emit_insn (gen_strset (destptr, dest, value));
17807 dest = change_address (destmem, SImode, destptr);
17808 emit_insn (gen_strset (destptr, dest, value));
17809 emit_insn (gen_strset (destptr, dest, value));
17811 emit_label (label);
17812 LABEL_NUSES (label) = 1;
17816 rtx label = ix86_expand_aligntest (count, 4, true);
17817 dest = change_address (destmem, SImode, destptr);
17818 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17819 emit_label (label);
17820 LABEL_NUSES (label) = 1;
17824 rtx label = ix86_expand_aligntest (count, 2, true);
17825 dest = change_address (destmem, HImode, destptr);
17826 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17827 emit_label (label);
17828 LABEL_NUSES (label) = 1;
17832 rtx label = ix86_expand_aligntest (count, 1, true);
17833 dest = change_address (destmem, QImode, destptr);
17834 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17835 emit_label (label);
17836 LABEL_NUSES (label) = 1;
17840 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17841 DESIRED_ALIGNMENT. */
17843 expand_movmem_prologue (rtx destmem, rtx srcmem,
17844 rtx destptr, rtx srcptr, rtx count,
17845 int align, int desired_alignment)
17847 if (align <= 1 && desired_alignment > 1)
17849 rtx label = ix86_expand_aligntest (destptr, 1, false);
17850 srcmem = change_address (srcmem, QImode, srcptr);
17851 destmem = change_address (destmem, QImode, destptr);
17852 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17853 ix86_adjust_counter (count, 1);
17854 emit_label (label);
17855 LABEL_NUSES (label) = 1;
17857 if (align <= 2 && desired_alignment > 2)
17859 rtx label = ix86_expand_aligntest (destptr, 2, false);
17860 srcmem = change_address (srcmem, HImode, srcptr);
17861 destmem = change_address (destmem, HImode, destptr);
17862 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17863 ix86_adjust_counter (count, 2);
17864 emit_label (label);
17865 LABEL_NUSES (label) = 1;
17867 if (align <= 4 && desired_alignment > 4)
17869 rtx label = ix86_expand_aligntest (destptr, 4, false);
17870 srcmem = change_address (srcmem, SImode, srcptr);
17871 destmem = change_address (destmem, SImode, destptr);
17872 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17873 ix86_adjust_counter (count, 4);
17874 emit_label (label);
17875 LABEL_NUSES (label) = 1;
17877 gcc_assert (desired_alignment <= 8);
17880 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17881 ALIGN_BYTES is how many bytes need to be copied. */
17883 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17884 int desired_align, int align_bytes)
17887 rtx src_size, dst_size;
17889 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17890 if (src_align_bytes >= 0)
17891 src_align_bytes = desired_align - src_align_bytes;
17892 src_size = MEM_SIZE (src);
17893 dst_size = MEM_SIZE (dst);
17894 if (align_bytes & 1)
17896 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17897 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17899 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17901 if (align_bytes & 2)
17903 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17904 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17905 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17906 set_mem_align (dst, 2 * BITS_PER_UNIT);
17907 if (src_align_bytes >= 0
17908 && (src_align_bytes & 1) == (align_bytes & 1)
17909 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17910 set_mem_align (src, 2 * BITS_PER_UNIT);
17912 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17914 if (align_bytes & 4)
17916 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17917 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17918 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17919 set_mem_align (dst, 4 * BITS_PER_UNIT);
17920 if (src_align_bytes >= 0)
17922 unsigned int src_align = 0;
17923 if ((src_align_bytes & 3) == (align_bytes & 3))
17925 else if ((src_align_bytes & 1) == (align_bytes & 1))
17927 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17928 set_mem_align (src, src_align * BITS_PER_UNIT);
17931 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17933 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17934 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17935 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17936 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17937 if (src_align_bytes >= 0)
17939 unsigned int src_align = 0;
17940 if ((src_align_bytes & 7) == (align_bytes & 7))
17942 else if ((src_align_bytes & 3) == (align_bytes & 3))
17944 else if ((src_align_bytes & 1) == (align_bytes & 1))
17946 if (src_align > (unsigned int) desired_align)
17947 src_align = desired_align;
17948 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17949 set_mem_align (src, src_align * BITS_PER_UNIT);
17952 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17954 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17959 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17960 DESIRED_ALIGNMENT. */
17962 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17963 int align, int desired_alignment)
17965 if (align <= 1 && desired_alignment > 1)
17967 rtx label = ix86_expand_aligntest (destptr, 1, false);
17968 destmem = change_address (destmem, QImode, destptr);
17969 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17970 ix86_adjust_counter (count, 1);
17971 emit_label (label);
17972 LABEL_NUSES (label) = 1;
17974 if (align <= 2 && desired_alignment > 2)
17976 rtx label = ix86_expand_aligntest (destptr, 2, false);
17977 destmem = change_address (destmem, HImode, destptr);
17978 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17979 ix86_adjust_counter (count, 2);
17980 emit_label (label);
17981 LABEL_NUSES (label) = 1;
17983 if (align <= 4 && desired_alignment > 4)
17985 rtx label = ix86_expand_aligntest (destptr, 4, false);
17986 destmem = change_address (destmem, SImode, destptr);
17987 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17988 ix86_adjust_counter (count, 4);
17989 emit_label (label);
17990 LABEL_NUSES (label) = 1;
17992 gcc_assert (desired_alignment <= 8);
17995 /* Set enough from DST to align DST known to by aligned by ALIGN to
17996 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17998 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17999 int desired_align, int align_bytes)
18002 rtx dst_size = MEM_SIZE (dst);
18003 if (align_bytes & 1)
18005 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18007 emit_insn (gen_strset (destreg, dst,
18008 gen_lowpart (QImode, value)));
18010 if (align_bytes & 2)
18012 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18013 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18014 set_mem_align (dst, 2 * BITS_PER_UNIT);
18016 emit_insn (gen_strset (destreg, dst,
18017 gen_lowpart (HImode, value)));
18019 if (align_bytes & 4)
18021 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18022 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18023 set_mem_align (dst, 4 * BITS_PER_UNIT);
18025 emit_insn (gen_strset (destreg, dst,
18026 gen_lowpart (SImode, value)));
18028 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18029 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18030 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18032 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18036 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18037 static enum stringop_alg
18038 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18039 int *dynamic_check)
18041 const struct stringop_algs * algs;
18042 bool optimize_for_speed;
18043 /* Algorithms using the rep prefix want at least edi and ecx;
18044 additionally, memset wants eax and memcpy wants esi. Don't
18045 consider such algorithms if the user has appropriated those
18046 registers for their own purposes. */
18047 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18049 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18051 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18052 || (alg != rep_prefix_1_byte \
18053 && alg != rep_prefix_4_byte \
18054 && alg != rep_prefix_8_byte))
18055 const struct processor_costs *cost;
18057 /* Even if the string operation call is cold, we still might spend a lot
18058 of time processing large blocks. */
18059 if (optimize_function_for_size_p (cfun)
18060 || (optimize_insn_for_size_p ()
18061 && expected_size != -1 && expected_size < 256))
18062 optimize_for_speed = false;
18064 optimize_for_speed = true;
18066 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18068 *dynamic_check = -1;
18070 algs = &cost->memset[TARGET_64BIT != 0];
18072 algs = &cost->memcpy[TARGET_64BIT != 0];
18073 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18074 return stringop_alg;
18075 /* rep; movq or rep; movl is the smallest variant. */
18076 else if (!optimize_for_speed)
18078 if (!count || (count & 3))
18079 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18081 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18083 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18085 else if (expected_size != -1 && expected_size < 4)
18086 return loop_1_byte;
18087 else if (expected_size != -1)
18090 enum stringop_alg alg = libcall;
18091 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18093 /* We get here if the algorithms that were not libcall-based
18094 were rep-prefix based and we are unable to use rep prefixes
18095 based on global register usage. Break out of the loop and
18096 use the heuristic below. */
18097 if (algs->size[i].max == 0)
18099 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18101 enum stringop_alg candidate = algs->size[i].alg;
18103 if (candidate != libcall && ALG_USABLE_P (candidate))
18105 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18106 last non-libcall inline algorithm. */
18107 if (TARGET_INLINE_ALL_STRINGOPS)
18109 /* When the current size is best to be copied by a libcall,
18110 but we are still forced to inline, run the heuristic below
18111 that will pick code for medium sized blocks. */
18112 if (alg != libcall)
18116 else if (ALG_USABLE_P (candidate))
18120 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18122 /* When asked to inline the call anyway, try to pick meaningful choice.
18123 We look for maximal size of block that is faster to copy by hand and
18124 take blocks of at most of that size guessing that average size will
18125 be roughly half of the block.
18127 If this turns out to be bad, we might simply specify the preferred
18128 choice in ix86_costs. */
18129 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18130 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18133 enum stringop_alg alg;
18135 bool any_alg_usable_p = true;
18137 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18139 enum stringop_alg candidate = algs->size[i].alg;
18140 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18142 if (candidate != libcall && candidate
18143 && ALG_USABLE_P (candidate))
18144 max = algs->size[i].max;
18146 /* If there aren't any usable algorithms, then recursing on
18147 smaller sizes isn't going to find anything. Just return the
18148 simple byte-at-a-time copy loop. */
18149 if (!any_alg_usable_p)
18151 /* Pick something reasonable. */
18152 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18153 *dynamic_check = 128;
18154 return loop_1_byte;
18158 alg = decide_alg (count, max / 2, memset, dynamic_check);
18159 gcc_assert (*dynamic_check == -1);
18160 gcc_assert (alg != libcall);
18161 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18162 *dynamic_check = max;
18165 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18166 #undef ALG_USABLE_P
18169 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18170 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18172 decide_alignment (int align,
18173 enum stringop_alg alg,
18176 int desired_align = 0;
18180 gcc_unreachable ();
18182 case unrolled_loop:
18183 desired_align = GET_MODE_SIZE (Pmode);
18185 case rep_prefix_8_byte:
18188 case rep_prefix_4_byte:
18189 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18190 copying whole cacheline at once. */
18191 if (TARGET_PENTIUMPRO)
18196 case rep_prefix_1_byte:
18197 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18198 copying whole cacheline at once. */
18199 if (TARGET_PENTIUMPRO)
18213 if (desired_align < align)
18214 desired_align = align;
18215 if (expected_size != -1 && expected_size < 4)
18216 desired_align = align;
18217 return desired_align;
18220 /* Return the smallest power of 2 greater than VAL. */
18222 smallest_pow2_greater_than (int val)
18230 /* Expand string move (memcpy) operation. Use i386 string operations when
18231 profitable. expand_setmem contains similar code. The code depends upon
18232 architecture, block size and alignment, but always has the same
18235 1) Prologue guard: Conditional that jumps up to epilogues for small
18236 blocks that can be handled by epilogue alone. This is faster but
18237 also needed for correctness, since prologue assume the block is larger
18238 than the desired alignment.
18240 Optional dynamic check for size and libcall for large
18241 blocks is emitted here too, with -minline-stringops-dynamically.
18243 2) Prologue: copy first few bytes in order to get destination aligned
18244 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18245 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18246 We emit either a jump tree on power of two sized blocks, or a byte loop.
18248 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18249 with specified algorithm.
18251 4) Epilogue: code copying tail of the block that is too small to be
18252 handled by main body (or up to size guarded by prologue guard). */
18255 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18256 rtx expected_align_exp, rtx expected_size_exp)
18262 rtx jump_around_label = NULL;
18263 HOST_WIDE_INT align = 1;
18264 unsigned HOST_WIDE_INT count = 0;
18265 HOST_WIDE_INT expected_size = -1;
18266 int size_needed = 0, epilogue_size_needed;
18267 int desired_align = 0, align_bytes = 0;
18268 enum stringop_alg alg;
18270 bool need_zero_guard = false;
18272 if (CONST_INT_P (align_exp))
18273 align = INTVAL (align_exp);
18274 /* i386 can do misaligned access on reasonably increased cost. */
18275 if (CONST_INT_P (expected_align_exp)
18276 && INTVAL (expected_align_exp) > align)
18277 align = INTVAL (expected_align_exp);
18278 /* ALIGN is the minimum of destination and source alignment, but we care here
18279 just about destination alignment. */
18280 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18281 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18283 if (CONST_INT_P (count_exp))
18284 count = expected_size = INTVAL (count_exp);
18285 if (CONST_INT_P (expected_size_exp) && count == 0)
18286 expected_size = INTVAL (expected_size_exp);
18288 /* Make sure we don't need to care about overflow later on. */
18289 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18292 /* Step 0: Decide on preferred algorithm, desired alignment and
18293 size of chunks to be copied by main loop. */
18295 alg = decide_alg (count, expected_size, false, &dynamic_check);
18296 desired_align = decide_alignment (align, alg, expected_size);
18298 if (!TARGET_ALIGN_STRINGOPS)
18299 align = desired_align;
18301 if (alg == libcall)
18303 gcc_assert (alg != no_stringop);
18305 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18306 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18307 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18312 gcc_unreachable ();
18314 need_zero_guard = true;
18315 size_needed = GET_MODE_SIZE (Pmode);
18317 case unrolled_loop:
18318 need_zero_guard = true;
18319 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18321 case rep_prefix_8_byte:
18324 case rep_prefix_4_byte:
18327 case rep_prefix_1_byte:
18331 need_zero_guard = true;
18336 epilogue_size_needed = size_needed;
18338 /* Step 1: Prologue guard. */
18340 /* Alignment code needs count to be in register. */
18341 if (CONST_INT_P (count_exp) && desired_align > align)
18343 if (INTVAL (count_exp) > desired_align
18344 && INTVAL (count_exp) > size_needed)
18347 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18348 if (align_bytes <= 0)
18351 align_bytes = desired_align - align_bytes;
18353 if (align_bytes == 0)
18354 count_exp = force_reg (counter_mode (count_exp), count_exp);
18356 gcc_assert (desired_align >= 1 && align >= 1);
18358 /* Ensure that alignment prologue won't copy past end of block. */
18359 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18361 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18362 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18363 Make sure it is power of 2. */
18364 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18368 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18370 /* If main algorithm works on QImode, no epilogue is needed.
18371 For small sizes just don't align anything. */
18372 if (size_needed == 1)
18373 desired_align = align;
18380 label = gen_label_rtx ();
18381 emit_cmp_and_jump_insns (count_exp,
18382 GEN_INT (epilogue_size_needed),
18383 LTU, 0, counter_mode (count_exp), 1, label);
18384 if (expected_size == -1 || expected_size < epilogue_size_needed)
18385 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18387 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18391 /* Emit code to decide on runtime whether library call or inline should be
18393 if (dynamic_check != -1)
18395 if (CONST_INT_P (count_exp))
18397 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18399 emit_block_move_via_libcall (dst, src, count_exp, false);
18400 count_exp = const0_rtx;
18406 rtx hot_label = gen_label_rtx ();
18407 jump_around_label = gen_label_rtx ();
18408 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18409 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18410 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18411 emit_block_move_via_libcall (dst, src, count_exp, false);
18412 emit_jump (jump_around_label);
18413 emit_label (hot_label);
18417 /* Step 2: Alignment prologue. */
18419 if (desired_align > align)
18421 if (align_bytes == 0)
18423 /* Except for the first move in epilogue, we no longer know
18424 constant offset in aliasing info. It don't seems to worth
18425 the pain to maintain it for the first move, so throw away
18427 src = change_address (src, BLKmode, srcreg);
18428 dst = change_address (dst, BLKmode, destreg);
18429 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18434 /* If we know how many bytes need to be stored before dst is
18435 sufficiently aligned, maintain aliasing info accurately. */
18436 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18437 desired_align, align_bytes);
18438 count_exp = plus_constant (count_exp, -align_bytes);
18439 count -= align_bytes;
18441 if (need_zero_guard
18442 && (count < (unsigned HOST_WIDE_INT) size_needed
18443 || (align_bytes == 0
18444 && count < ((unsigned HOST_WIDE_INT) size_needed
18445 + desired_align - align))))
18447 /* It is possible that we copied enough so the main loop will not
18449 gcc_assert (size_needed > 1);
18450 if (label == NULL_RTX)
18451 label = gen_label_rtx ();
18452 emit_cmp_and_jump_insns (count_exp,
18453 GEN_INT (size_needed),
18454 LTU, 0, counter_mode (count_exp), 1, label);
18455 if (expected_size == -1
18456 || expected_size < (desired_align - align) / 2 + size_needed)
18457 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18459 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18462 if (label && size_needed == 1)
18464 emit_label (label);
18465 LABEL_NUSES (label) = 1;
18467 epilogue_size_needed = 1;
18469 else if (label == NULL_RTX)
18470 epilogue_size_needed = size_needed;
18472 /* Step 3: Main loop. */
18478 gcc_unreachable ();
18480 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18481 count_exp, QImode, 1, expected_size);
18484 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18485 count_exp, Pmode, 1, expected_size);
18487 case unrolled_loop:
18488 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18489 registers for 4 temporaries anyway. */
18490 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18491 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18494 case rep_prefix_8_byte:
18495 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18498 case rep_prefix_4_byte:
18499 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18502 case rep_prefix_1_byte:
18503 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18507 /* Adjust properly the offset of src and dest memory for aliasing. */
18508 if (CONST_INT_P (count_exp))
18510 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18511 (count / size_needed) * size_needed);
18512 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18513 (count / size_needed) * size_needed);
18517 src = change_address (src, BLKmode, srcreg);
18518 dst = change_address (dst, BLKmode, destreg);
18521 /* Step 4: Epilogue to copy the remaining bytes. */
18525 /* When the main loop is done, COUNT_EXP might hold original count,
18526 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18527 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18528 bytes. Compensate if needed. */
18530 if (size_needed < epilogue_size_needed)
18533 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18534 GEN_INT (size_needed - 1), count_exp, 1,
18536 if (tmp != count_exp)
18537 emit_move_insn (count_exp, tmp);
18539 emit_label (label);
18540 LABEL_NUSES (label) = 1;
18543 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18544 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18545 epilogue_size_needed);
18546 if (jump_around_label)
18547 emit_label (jump_around_label);
18551 /* Helper function for memcpy. For QImode value 0xXY produce
18552 0xXYXYXYXY of wide specified by MODE. This is essentially
18553 a * 0x10101010, but we can do slightly better than
18554 synth_mult by unwinding the sequence by hand on CPUs with
18557 promote_duplicated_reg (enum machine_mode mode, rtx val)
18559 enum machine_mode valmode = GET_MODE (val);
18561 int nops = mode == DImode ? 3 : 2;
18563 gcc_assert (mode == SImode || mode == DImode);
18564 if (val == const0_rtx)
18565 return copy_to_mode_reg (mode, const0_rtx);
18566 if (CONST_INT_P (val))
18568 HOST_WIDE_INT v = INTVAL (val) & 255;
18572 if (mode == DImode)
18573 v |= (v << 16) << 16;
18574 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18577 if (valmode == VOIDmode)
18579 if (valmode != QImode)
18580 val = gen_lowpart (QImode, val);
18581 if (mode == QImode)
18583 if (!TARGET_PARTIAL_REG_STALL)
18585 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18586 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18587 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18588 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18590 rtx reg = convert_modes (mode, QImode, val, true);
18591 tmp = promote_duplicated_reg (mode, const1_rtx);
18592 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18597 rtx reg = convert_modes (mode, QImode, val, true);
18599 if (!TARGET_PARTIAL_REG_STALL)
18600 if (mode == SImode)
18601 emit_insn (gen_movsi_insv_1 (reg, reg));
18603 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18606 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18607 NULL, 1, OPTAB_DIRECT);
18609 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18611 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18612 NULL, 1, OPTAB_DIRECT);
18613 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18614 if (mode == SImode)
18616 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18617 NULL, 1, OPTAB_DIRECT);
18618 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18623 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18624 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18625 alignment from ALIGN to DESIRED_ALIGN. */
18627 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18632 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18633 promoted_val = promote_duplicated_reg (DImode, val);
18634 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18635 promoted_val = promote_duplicated_reg (SImode, val);
18636 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18637 promoted_val = promote_duplicated_reg (HImode, val);
18639 promoted_val = val;
18641 return promoted_val;
18644 /* Expand string clear operation (bzero). Use i386 string operations when
18645 profitable. See expand_movmem comment for explanation of individual
18646 steps performed. */
18648 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18649 rtx expected_align_exp, rtx expected_size_exp)
18654 rtx jump_around_label = NULL;
18655 HOST_WIDE_INT align = 1;
18656 unsigned HOST_WIDE_INT count = 0;
18657 HOST_WIDE_INT expected_size = -1;
18658 int size_needed = 0, epilogue_size_needed;
18659 int desired_align = 0, align_bytes = 0;
18660 enum stringop_alg alg;
18661 rtx promoted_val = NULL;
18662 bool force_loopy_epilogue = false;
18664 bool need_zero_guard = false;
18666 if (CONST_INT_P (align_exp))
18667 align = INTVAL (align_exp);
18668 /* i386 can do misaligned access on reasonably increased cost. */
18669 if (CONST_INT_P (expected_align_exp)
18670 && INTVAL (expected_align_exp) > align)
18671 align = INTVAL (expected_align_exp);
18672 if (CONST_INT_P (count_exp))
18673 count = expected_size = INTVAL (count_exp);
18674 if (CONST_INT_P (expected_size_exp) && count == 0)
18675 expected_size = INTVAL (expected_size_exp);
18677 /* Make sure we don't need to care about overflow later on. */
18678 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18681 /* Step 0: Decide on preferred algorithm, desired alignment and
18682 size of chunks to be copied by main loop. */
18684 alg = decide_alg (count, expected_size, true, &dynamic_check);
18685 desired_align = decide_alignment (align, alg, expected_size);
18687 if (!TARGET_ALIGN_STRINGOPS)
18688 align = desired_align;
18690 if (alg == libcall)
18692 gcc_assert (alg != no_stringop);
18694 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18695 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18700 gcc_unreachable ();
18702 need_zero_guard = true;
18703 size_needed = GET_MODE_SIZE (Pmode);
18705 case unrolled_loop:
18706 need_zero_guard = true;
18707 size_needed = GET_MODE_SIZE (Pmode) * 4;
18709 case rep_prefix_8_byte:
18712 case rep_prefix_4_byte:
18715 case rep_prefix_1_byte:
18719 need_zero_guard = true;
18723 epilogue_size_needed = size_needed;
18725 /* Step 1: Prologue guard. */
18727 /* Alignment code needs count to be in register. */
18728 if (CONST_INT_P (count_exp) && desired_align > align)
18730 if (INTVAL (count_exp) > desired_align
18731 && INTVAL (count_exp) > size_needed)
18734 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18735 if (align_bytes <= 0)
18738 align_bytes = desired_align - align_bytes;
18740 if (align_bytes == 0)
18742 enum machine_mode mode = SImode;
18743 if (TARGET_64BIT && (count & ~0xffffffff))
18745 count_exp = force_reg (mode, count_exp);
18748 /* Do the cheap promotion to allow better CSE across the
18749 main loop and epilogue (ie one load of the big constant in the
18750 front of all code. */
18751 if (CONST_INT_P (val_exp))
18752 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18753 desired_align, align);
18754 /* Ensure that alignment prologue won't copy past end of block. */
18755 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18757 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18758 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18759 Make sure it is power of 2. */
18760 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18762 /* To improve performance of small blocks, we jump around the VAL
18763 promoting mode. This mean that if the promoted VAL is not constant,
18764 we might not use it in the epilogue and have to use byte
18766 if (epilogue_size_needed > 2 && !promoted_val)
18767 force_loopy_epilogue = true;
18770 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18772 /* If main algorithm works on QImode, no epilogue is needed.
18773 For small sizes just don't align anything. */
18774 if (size_needed == 1)
18775 desired_align = align;
18782 label = gen_label_rtx ();
18783 emit_cmp_and_jump_insns (count_exp,
18784 GEN_INT (epilogue_size_needed),
18785 LTU, 0, counter_mode (count_exp), 1, label);
18786 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18787 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18789 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18792 if (dynamic_check != -1)
18794 rtx hot_label = gen_label_rtx ();
18795 jump_around_label = gen_label_rtx ();
18796 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18797 LEU, 0, counter_mode (count_exp), 1, hot_label);
18798 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18799 set_storage_via_libcall (dst, count_exp, val_exp, false);
18800 emit_jump (jump_around_label);
18801 emit_label (hot_label);
18804 /* Step 2: Alignment prologue. */
18806 /* Do the expensive promotion once we branched off the small blocks. */
18808 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18809 desired_align, align);
18810 gcc_assert (desired_align >= 1 && align >= 1);
18812 if (desired_align > align)
18814 if (align_bytes == 0)
18816 /* Except for the first move in epilogue, we no longer know
18817 constant offset in aliasing info. It don't seems to worth
18818 the pain to maintain it for the first move, so throw away
18820 dst = change_address (dst, BLKmode, destreg);
18821 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18826 /* If we know how many bytes need to be stored before dst is
18827 sufficiently aligned, maintain aliasing info accurately. */
18828 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18829 desired_align, align_bytes);
18830 count_exp = plus_constant (count_exp, -align_bytes);
18831 count -= align_bytes;
18833 if (need_zero_guard
18834 && (count < (unsigned HOST_WIDE_INT) size_needed
18835 || (align_bytes == 0
18836 && count < ((unsigned HOST_WIDE_INT) size_needed
18837 + desired_align - align))))
18839 /* It is possible that we copied enough so the main loop will not
18841 gcc_assert (size_needed > 1);
18842 if (label == NULL_RTX)
18843 label = gen_label_rtx ();
18844 emit_cmp_and_jump_insns (count_exp,
18845 GEN_INT (size_needed),
18846 LTU, 0, counter_mode (count_exp), 1, label);
18847 if (expected_size == -1
18848 || expected_size < (desired_align - align) / 2 + size_needed)
18849 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18851 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18854 if (label && size_needed == 1)
18856 emit_label (label);
18857 LABEL_NUSES (label) = 1;
18859 promoted_val = val_exp;
18860 epilogue_size_needed = 1;
18862 else if (label == NULL_RTX)
18863 epilogue_size_needed = size_needed;
18865 /* Step 3: Main loop. */
18871 gcc_unreachable ();
18873 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18874 count_exp, QImode, 1, expected_size);
18877 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18878 count_exp, Pmode, 1, expected_size);
18880 case unrolled_loop:
18881 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18882 count_exp, Pmode, 4, expected_size);
18884 case rep_prefix_8_byte:
18885 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18888 case rep_prefix_4_byte:
18889 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18892 case rep_prefix_1_byte:
18893 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18897 /* Adjust properly the offset of src and dest memory for aliasing. */
18898 if (CONST_INT_P (count_exp))
18899 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18900 (count / size_needed) * size_needed);
18902 dst = change_address (dst, BLKmode, destreg);
18904 /* Step 4: Epilogue to copy the remaining bytes. */
18908 /* When the main loop is done, COUNT_EXP might hold original count,
18909 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18910 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18911 bytes. Compensate if needed. */
18913 if (size_needed < epilogue_size_needed)
18916 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18917 GEN_INT (size_needed - 1), count_exp, 1,
18919 if (tmp != count_exp)
18920 emit_move_insn (count_exp, tmp);
18922 emit_label (label);
18923 LABEL_NUSES (label) = 1;
18926 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18928 if (force_loopy_epilogue)
18929 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18930 epilogue_size_needed);
18932 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18933 epilogue_size_needed);
18935 if (jump_around_label)
18936 emit_label (jump_around_label);
18940 /* Expand the appropriate insns for doing strlen if not just doing
18943 out = result, initialized with the start address
18944 align_rtx = alignment of the address.
18945 scratch = scratch register, initialized with the startaddress when
18946 not aligned, otherwise undefined
18948 This is just the body. It needs the initializations mentioned above and
18949 some address computing at the end. These things are done in i386.md. */
18952 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18956 rtx align_2_label = NULL_RTX;
18957 rtx align_3_label = NULL_RTX;
18958 rtx align_4_label = gen_label_rtx ();
18959 rtx end_0_label = gen_label_rtx ();
18961 rtx tmpreg = gen_reg_rtx (SImode);
18962 rtx scratch = gen_reg_rtx (SImode);
18966 if (CONST_INT_P (align_rtx))
18967 align = INTVAL (align_rtx);
18969 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18971 /* Is there a known alignment and is it less than 4? */
18974 rtx scratch1 = gen_reg_rtx (Pmode);
18975 emit_move_insn (scratch1, out);
18976 /* Is there a known alignment and is it not 2? */
18979 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18980 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18982 /* Leave just the 3 lower bits. */
18983 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18984 NULL_RTX, 0, OPTAB_WIDEN);
18986 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18987 Pmode, 1, align_4_label);
18988 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18989 Pmode, 1, align_2_label);
18990 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18991 Pmode, 1, align_3_label);
18995 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18996 check if is aligned to 4 - byte. */
18998 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18999 NULL_RTX, 0, OPTAB_WIDEN);
19001 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19002 Pmode, 1, align_4_label);
19005 mem = change_address (src, QImode, out);
19007 /* Now compare the bytes. */
19009 /* Compare the first n unaligned byte on a byte per byte basis. */
19010 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19011 QImode, 1, end_0_label);
19013 /* Increment the address. */
19014 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19016 /* Not needed with an alignment of 2 */
19019 emit_label (align_2_label);
19021 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19024 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19026 emit_label (align_3_label);
19029 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19032 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19035 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19036 align this loop. It gives only huge programs, but does not help to
19038 emit_label (align_4_label);
19040 mem = change_address (src, SImode, out);
19041 emit_move_insn (scratch, mem);
19042 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
19044 /* This formula yields a nonzero result iff one of the bytes is zero.
19045 This saves three branches inside loop and many cycles. */
19047 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19048 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19049 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19050 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19051 gen_int_mode (0x80808080, SImode)));
19052 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19057 rtx reg = gen_reg_rtx (SImode);
19058 rtx reg2 = gen_reg_rtx (Pmode);
19059 emit_move_insn (reg, tmpreg);
19060 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19062 /* If zero is not in the first two bytes, move two bytes forward. */
19063 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19064 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19065 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19066 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19067 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19070 /* Emit lea manually to avoid clobbering of flags. */
19071 emit_insn (gen_rtx_SET (SImode, reg2,
19072 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19074 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19075 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19076 emit_insn (gen_rtx_SET (VOIDmode, out,
19077 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19084 rtx end_2_label = gen_label_rtx ();
19085 /* Is zero in the first two bytes? */
19087 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19088 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19089 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19090 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19091 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19093 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19094 JUMP_LABEL (tmp) = end_2_label;
19096 /* Not in the first two. Move two bytes forward. */
19097 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19098 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19100 emit_label (end_2_label);
19104 /* Avoid branch in fixing the byte. */
19105 tmpreg = gen_lowpart (QImode, tmpreg);
19106 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19107 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19108 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19110 emit_label (end_0_label);
19113 /* Expand strlen. */
19116 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19118 rtx addr, scratch1, scratch2, scratch3, scratch4;
19120 /* The generic case of strlen expander is long. Avoid it's
19121 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19123 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19124 && !TARGET_INLINE_ALL_STRINGOPS
19125 && !optimize_insn_for_size_p ()
19126 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19129 addr = force_reg (Pmode, XEXP (src, 0));
19130 scratch1 = gen_reg_rtx (Pmode);
19132 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19133 && !optimize_insn_for_size_p ())
19135 /* Well it seems that some optimizer does not combine a call like
19136 foo(strlen(bar), strlen(bar));
19137 when the move and the subtraction is done here. It does calculate
19138 the length just once when these instructions are done inside of
19139 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19140 often used and I use one fewer register for the lifetime of
19141 output_strlen_unroll() this is better. */
19143 emit_move_insn (out, addr);
19145 ix86_expand_strlensi_unroll_1 (out, src, align);
19147 /* strlensi_unroll_1 returns the address of the zero at the end of
19148 the string, like memchr(), so compute the length by subtracting
19149 the start address. */
19150 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19156 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19157 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19160 scratch2 = gen_reg_rtx (Pmode);
19161 scratch3 = gen_reg_rtx (Pmode);
19162 scratch4 = force_reg (Pmode, constm1_rtx);
19164 emit_move_insn (scratch3, addr);
19165 eoschar = force_reg (QImode, eoschar);
19167 src = replace_equiv_address_nv (src, scratch3);
19169 /* If .md starts supporting :P, this can be done in .md. */
19170 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19171 scratch4), UNSPEC_SCAS);
19172 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19173 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19174 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19179 /* For given symbol (function) construct code to compute address of it's PLT
19180 entry in large x86-64 PIC model. */
19182 construct_plt_address (rtx symbol)
19184 rtx tmp = gen_reg_rtx (Pmode);
19185 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19187 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19188 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19190 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19191 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19196 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19198 rtx pop, int sibcall)
19200 rtx use = NULL, call;
19202 if (pop == const0_rtx)
19204 gcc_assert (!TARGET_64BIT || !pop);
19206 if (TARGET_MACHO && !TARGET_64BIT)
19209 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19210 fnaddr = machopic_indirect_call_target (fnaddr);
19215 /* Static functions and indirect calls don't need the pic register. */
19216 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19217 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19218 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19219 use_reg (&use, pic_offset_table_rtx);
19222 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19224 rtx al = gen_rtx_REG (QImode, AX_REG);
19225 emit_move_insn (al, callarg2);
19226 use_reg (&use, al);
19229 if (ix86_cmodel == CM_LARGE_PIC
19231 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19232 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19233 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19235 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
19236 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
19238 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19239 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19242 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19244 call = gen_rtx_SET (VOIDmode, retval, call);
19247 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19248 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19249 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19252 && ix86_cfun_abi () == MS_ABI
19253 && (!callarg2 || INTVAL (callarg2) != -2))
19255 /* We need to represent that SI and DI registers are clobbered
19257 static int clobbered_registers[] = {
19258 XMM6_REG, XMM7_REG, XMM8_REG,
19259 XMM9_REG, XMM10_REG, XMM11_REG,
19260 XMM12_REG, XMM13_REG, XMM14_REG,
19261 XMM15_REG, SI_REG, DI_REG
19264 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19265 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19266 UNSPEC_MS_TO_SYSV_CALL);
19270 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19271 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19274 (SSE_REGNO_P (clobbered_registers[i])
19276 clobbered_registers[i]));
19278 call = gen_rtx_PARALLEL (VOIDmode,
19279 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19283 call = emit_call_insn (call);
19285 CALL_INSN_FUNCTION_USAGE (call) = use;
19289 /* Clear stack slot assignments remembered from previous functions.
19290 This is called from INIT_EXPANDERS once before RTL is emitted for each
19293 static struct machine_function *
19294 ix86_init_machine_status (void)
19296 struct machine_function *f;
19298 f = GGC_CNEW (struct machine_function);
19299 f->use_fast_prologue_epilogue_nregs = -1;
19300 f->tls_descriptor_call_expanded_p = 0;
19301 f->call_abi = ix86_abi;
19306 /* Return a MEM corresponding to a stack slot with mode MODE.
19307 Allocate a new slot if necessary.
19309 The RTL for a function can have several slots available: N is
19310 which slot to use. */
19313 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19315 struct stack_local_entry *s;
19317 gcc_assert (n < MAX_386_STACK_LOCALS);
19319 /* Virtual slot is valid only before vregs are instantiated. */
19320 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19322 for (s = ix86_stack_locals; s; s = s->next)
19323 if (s->mode == mode && s->n == n)
19324 return copy_rtx (s->rtl);
19326 s = (struct stack_local_entry *)
19327 ggc_alloc (sizeof (struct stack_local_entry));
19330 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19332 s->next = ix86_stack_locals;
19333 ix86_stack_locals = s;
19337 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19339 static GTY(()) rtx ix86_tls_symbol;
19341 ix86_tls_get_addr (void)
19344 if (!ix86_tls_symbol)
19346 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19347 (TARGET_ANY_GNU_TLS
19349 ? "___tls_get_addr"
19350 : "__tls_get_addr");
19353 return ix86_tls_symbol;
19356 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19358 static GTY(()) rtx ix86_tls_module_base_symbol;
19360 ix86_tls_module_base (void)
19363 if (!ix86_tls_module_base_symbol)
19365 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19366 "_TLS_MODULE_BASE_");
19367 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19368 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19371 return ix86_tls_module_base_symbol;
19374 /* Calculate the length of the memory address in the instruction
19375 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19378 memory_address_length (rtx addr)
19380 struct ix86_address parts;
19381 rtx base, index, disp;
19385 if (GET_CODE (addr) == PRE_DEC
19386 || GET_CODE (addr) == POST_INC
19387 || GET_CODE (addr) == PRE_MODIFY
19388 || GET_CODE (addr) == POST_MODIFY)
19391 ok = ix86_decompose_address (addr, &parts);
19394 if (parts.base && GET_CODE (parts.base) == SUBREG)
19395 parts.base = SUBREG_REG (parts.base);
19396 if (parts.index && GET_CODE (parts.index) == SUBREG)
19397 parts.index = SUBREG_REG (parts.index);
19400 index = parts.index;
19405 - esp as the base always wants an index,
19406 - ebp as the base always wants a displacement,
19407 - r12 as the base always wants an index,
19408 - r13 as the base always wants a displacement. */
19410 /* Register Indirect. */
19411 if (base && !index && !disp)
19413 /* esp (for its index) and ebp (for its displacement) need
19414 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19417 && (addr == arg_pointer_rtx
19418 || addr == frame_pointer_rtx
19419 || REGNO (addr) == SP_REG
19420 || REGNO (addr) == BP_REG
19421 || REGNO (addr) == R12_REG
19422 || REGNO (addr) == R13_REG))
19426 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
19427 is not disp32, but disp32(%rip), so for disp32
19428 SIB byte is needed, unless print_operand_address
19429 optimizes it into disp32(%rip) or (%rip) is implied
19431 else if (disp && !base && !index)
19438 if (GET_CODE (disp) == CONST)
19439 symbol = XEXP (disp, 0);
19440 if (GET_CODE (symbol) == PLUS
19441 && CONST_INT_P (XEXP (symbol, 1)))
19442 symbol = XEXP (symbol, 0);
19444 if (GET_CODE (symbol) != LABEL_REF
19445 && (GET_CODE (symbol) != SYMBOL_REF
19446 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19447 && (GET_CODE (symbol) != UNSPEC
19448 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19449 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19456 /* Find the length of the displacement constant. */
19459 if (base && satisfies_constraint_K (disp))
19464 /* ebp always wants a displacement. Similarly r13. */
19465 else if (base && REG_P (base)
19466 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19469 /* An index requires the two-byte modrm form.... */
19471 /* ...like esp (or r12), which always wants an index. */
19472 || base == arg_pointer_rtx
19473 || base == frame_pointer_rtx
19474 || (base && REG_P (base)
19475 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19492 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19493 is set, expect that insn have 8bit immediate alternative. */
19495 ix86_attr_length_immediate_default (rtx insn, int shortform)
19499 extract_insn_cached (insn);
19500 for (i = recog_data.n_operands - 1; i >= 0; --i)
19501 if (CONSTANT_P (recog_data.operand[i]))
19503 enum attr_mode mode = get_attr_mode (insn);
19506 if (shortform && CONST_INT_P (recog_data.operand[i]))
19508 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19515 ival = trunc_int_for_mode (ival, HImode);
19518 ival = trunc_int_for_mode (ival, SImode);
19523 if (IN_RANGE (ival, -128, 127))
19540 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19545 fatal_insn ("unknown insn mode", insn);
19550 /* Compute default value for "length_address" attribute. */
19552 ix86_attr_length_address_default (rtx insn)
19556 if (get_attr_type (insn) == TYPE_LEA)
19558 rtx set = PATTERN (insn), addr;
19560 if (GET_CODE (set) == PARALLEL)
19561 set = XVECEXP (set, 0, 0);
19563 gcc_assert (GET_CODE (set) == SET);
19565 addr = SET_SRC (set);
19566 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19568 if (GET_CODE (addr) == ZERO_EXTEND)
19569 addr = XEXP (addr, 0);
19570 if (GET_CODE (addr) == SUBREG)
19571 addr = SUBREG_REG (addr);
19574 return memory_address_length (addr);
19577 extract_insn_cached (insn);
19578 for (i = recog_data.n_operands - 1; i >= 0; --i)
19579 if (MEM_P (recog_data.operand[i]))
19581 constrain_operands_cached (reload_completed);
19582 if (which_alternative != -1)
19584 const char *constraints = recog_data.constraints[i];
19585 int alt = which_alternative;
19587 while (*constraints == '=' || *constraints == '+')
19590 while (*constraints++ != ',')
19592 /* Skip ignored operands. */
19593 if (*constraints == 'X')
19596 return memory_address_length (XEXP (recog_data.operand[i], 0));
19601 /* Compute default value for "length_vex" attribute. It includes
19602 2 or 3 byte VEX prefix and 1 opcode byte. */
19605 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19610 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19611 byte VEX prefix. */
19612 if (!has_0f_opcode || has_vex_w)
19615 /* We can always use 2 byte VEX prefix in 32bit. */
19619 extract_insn_cached (insn);
19621 for (i = recog_data.n_operands - 1; i >= 0; --i)
19622 if (REG_P (recog_data.operand[i]))
19624 /* REX.W bit uses 3 byte VEX prefix. */
19625 if (GET_MODE (recog_data.operand[i]) == DImode
19626 && GENERAL_REG_P (recog_data.operand[i]))
19631 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19632 if (MEM_P (recog_data.operand[i])
19633 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19640 /* Return the maximum number of instructions a cpu can issue. */
19643 ix86_issue_rate (void)
19647 case PROCESSOR_PENTIUM:
19648 case PROCESSOR_ATOM:
19652 case PROCESSOR_PENTIUMPRO:
19653 case PROCESSOR_PENTIUM4:
19654 case PROCESSOR_ATHLON:
19656 case PROCESSOR_AMDFAM10:
19657 case PROCESSOR_NOCONA:
19658 case PROCESSOR_GENERIC32:
19659 case PROCESSOR_GENERIC64:
19662 case PROCESSOR_CORE2:
19670 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19671 by DEP_INSN and nothing set by DEP_INSN. */
19674 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19678 /* Simplify the test for uninteresting insns. */
19679 if (insn_type != TYPE_SETCC
19680 && insn_type != TYPE_ICMOV
19681 && insn_type != TYPE_FCMOV
19682 && insn_type != TYPE_IBR)
19685 if ((set = single_set (dep_insn)) != 0)
19687 set = SET_DEST (set);
19690 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19691 && XVECLEN (PATTERN (dep_insn), 0) == 2
19692 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19693 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19695 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19696 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19701 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19704 /* This test is true if the dependent insn reads the flags but
19705 not any other potentially set register. */
19706 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19709 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19715 /* Return true iff USE_INSN has a memory address with operands set by
19719 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19722 extract_insn_cached (use_insn);
19723 for (i = recog_data.n_operands - 1; i >= 0; --i)
19724 if (MEM_P (recog_data.operand[i]))
19726 rtx addr = XEXP (recog_data.operand[i], 0);
19727 return modified_in_p (addr, set_insn) != 0;
19733 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19735 enum attr_type insn_type, dep_insn_type;
19736 enum attr_memory memory;
19738 int dep_insn_code_number;
19740 /* Anti and output dependencies have zero cost on all CPUs. */
19741 if (REG_NOTE_KIND (link) != 0)
19744 dep_insn_code_number = recog_memoized (dep_insn);
19746 /* If we can't recognize the insns, we can't really do anything. */
19747 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19750 insn_type = get_attr_type (insn);
19751 dep_insn_type = get_attr_type (dep_insn);
19755 case PROCESSOR_PENTIUM:
19756 /* Address Generation Interlock adds a cycle of latency. */
19757 if (insn_type == TYPE_LEA)
19759 rtx addr = PATTERN (insn);
19761 if (GET_CODE (addr) == PARALLEL)
19762 addr = XVECEXP (addr, 0, 0);
19764 gcc_assert (GET_CODE (addr) == SET);
19766 addr = SET_SRC (addr);
19767 if (modified_in_p (addr, dep_insn))
19770 else if (ix86_agi_dependent (dep_insn, insn))
19773 /* ??? Compares pair with jump/setcc. */
19774 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19777 /* Floating point stores require value to be ready one cycle earlier. */
19778 if (insn_type == TYPE_FMOV
19779 && get_attr_memory (insn) == MEMORY_STORE
19780 && !ix86_agi_dependent (dep_insn, insn))
19784 case PROCESSOR_PENTIUMPRO:
19785 memory = get_attr_memory (insn);
19787 /* INT->FP conversion is expensive. */
19788 if (get_attr_fp_int_src (dep_insn))
19791 /* There is one cycle extra latency between an FP op and a store. */
19792 if (insn_type == TYPE_FMOV
19793 && (set = single_set (dep_insn)) != NULL_RTX
19794 && (set2 = single_set (insn)) != NULL_RTX
19795 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19796 && MEM_P (SET_DEST (set2)))
19799 /* Show ability of reorder buffer to hide latency of load by executing
19800 in parallel with previous instruction in case
19801 previous instruction is not needed to compute the address. */
19802 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19803 && !ix86_agi_dependent (dep_insn, insn))
19805 /* Claim moves to take one cycle, as core can issue one load
19806 at time and the next load can start cycle later. */
19807 if (dep_insn_type == TYPE_IMOV
19808 || dep_insn_type == TYPE_FMOV)
19816 memory = get_attr_memory (insn);
19818 /* The esp dependency is resolved before the instruction is really
19820 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19821 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19824 /* INT->FP conversion is expensive. */
19825 if (get_attr_fp_int_src (dep_insn))
19828 /* Show ability of reorder buffer to hide latency of load by executing
19829 in parallel with previous instruction in case
19830 previous instruction is not needed to compute the address. */
19831 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19832 && !ix86_agi_dependent (dep_insn, insn))
19834 /* Claim moves to take one cycle, as core can issue one load
19835 at time and the next load can start cycle later. */
19836 if (dep_insn_type == TYPE_IMOV
19837 || dep_insn_type == TYPE_FMOV)
19846 case PROCESSOR_ATHLON:
19848 case PROCESSOR_AMDFAM10:
19849 case PROCESSOR_ATOM:
19850 case PROCESSOR_GENERIC32:
19851 case PROCESSOR_GENERIC64:
19852 memory = get_attr_memory (insn);
19854 /* Show ability of reorder buffer to hide latency of load by executing
19855 in parallel with previous instruction in case
19856 previous instruction is not needed to compute the address. */
19857 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19858 && !ix86_agi_dependent (dep_insn, insn))
19860 enum attr_unit unit = get_attr_unit (insn);
19863 /* Because of the difference between the length of integer and
19864 floating unit pipeline preparation stages, the memory operands
19865 for floating point are cheaper.
19867 ??? For Athlon it the difference is most probably 2. */
19868 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19871 loadcost = TARGET_ATHLON ? 2 : 0;
19873 if (cost >= loadcost)
19886 /* How many alternative schedules to try. This should be as wide as the
19887 scheduling freedom in the DFA, but no wider. Making this value too
19888 large results extra work for the scheduler. */
19891 ia32_multipass_dfa_lookahead (void)
19895 case PROCESSOR_PENTIUM:
19898 case PROCESSOR_PENTIUMPRO:
19908 /* Compute the alignment given to a constant that is being placed in memory.
19909 EXP is the constant and ALIGN is the alignment that the object would
19911 The value of this function is used instead of that alignment to align
19915 ix86_constant_alignment (tree exp, int align)
19917 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19918 || TREE_CODE (exp) == INTEGER_CST)
19920 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19922 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19925 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19926 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19927 return BITS_PER_WORD;
19932 /* Compute the alignment for a static variable.
19933 TYPE is the data type, and ALIGN is the alignment that
19934 the object would ordinarily have. The value of this function is used
19935 instead of that alignment to align the object. */
19938 ix86_data_alignment (tree type, int align)
19940 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19942 if (AGGREGATE_TYPE_P (type)
19943 && TYPE_SIZE (type)
19944 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19945 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19946 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19947 && align < max_align)
19950 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19951 to 16byte boundary. */
19954 if (AGGREGATE_TYPE_P (type)
19955 && TYPE_SIZE (type)
19956 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19957 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19958 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19962 if (TREE_CODE (type) == ARRAY_TYPE)
19964 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19966 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19969 else if (TREE_CODE (type) == COMPLEX_TYPE)
19972 if (TYPE_MODE (type) == DCmode && align < 64)
19974 if ((TYPE_MODE (type) == XCmode
19975 || TYPE_MODE (type) == TCmode) && align < 128)
19978 else if ((TREE_CODE (type) == RECORD_TYPE
19979 || TREE_CODE (type) == UNION_TYPE
19980 || TREE_CODE (type) == QUAL_UNION_TYPE)
19981 && TYPE_FIELDS (type))
19983 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19985 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19988 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19989 || TREE_CODE (type) == INTEGER_TYPE)
19991 if (TYPE_MODE (type) == DFmode && align < 64)
19993 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20000 /* Compute the alignment for a local variable or a stack slot. EXP is
20001 the data type or decl itself, MODE is the widest mode available and
20002 ALIGN is the alignment that the object would ordinarily have. The
20003 value of this macro is used instead of that alignment to align the
20007 ix86_local_alignment (tree exp, enum machine_mode mode,
20008 unsigned int align)
20012 if (exp && DECL_P (exp))
20014 type = TREE_TYPE (exp);
20023 /* Don't do dynamic stack realignment for long long objects with
20024 -mpreferred-stack-boundary=2. */
20027 && ix86_preferred_stack_boundary < 64
20028 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20029 && (!type || !TYPE_USER_ALIGN (type))
20030 && (!decl || !DECL_USER_ALIGN (decl)))
20033 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20034 register in MODE. We will return the largest alignment of XF
20038 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20039 align = GET_MODE_ALIGNMENT (DFmode);
20043 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20044 to 16byte boundary. */
20047 if (AGGREGATE_TYPE_P (type)
20048 && TYPE_SIZE (type)
20049 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20050 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20051 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20054 if (TREE_CODE (type) == ARRAY_TYPE)
20056 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20058 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20061 else if (TREE_CODE (type) == COMPLEX_TYPE)
20063 if (TYPE_MODE (type) == DCmode && align < 64)
20065 if ((TYPE_MODE (type) == XCmode
20066 || TYPE_MODE (type) == TCmode) && align < 128)
20069 else if ((TREE_CODE (type) == RECORD_TYPE
20070 || TREE_CODE (type) == UNION_TYPE
20071 || TREE_CODE (type) == QUAL_UNION_TYPE)
20072 && TYPE_FIELDS (type))
20074 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20076 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20079 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20080 || TREE_CODE (type) == INTEGER_TYPE)
20083 if (TYPE_MODE (type) == DFmode && align < 64)
20085 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20091 /* Emit RTL insns to initialize the variable parts of a trampoline.
20092 FNADDR is an RTX for the address of the function's pure code.
20093 CXT is an RTX for the static chain value for the function. */
20095 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
20099 /* Compute offset from the end of the jmp to the target function. */
20100 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
20101 plus_constant (tramp, 10),
20102 NULL_RTX, 1, OPTAB_DIRECT);
20103 emit_move_insn (gen_rtx_MEM (QImode, tramp),
20104 gen_int_mode (0xb9, QImode));
20105 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
20106 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
20107 gen_int_mode (0xe9, QImode));
20108 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
20113 /* Try to load address using shorter movl instead of movabs.
20114 We may want to support movq for kernel mode, but kernel does not use
20115 trampolines at the moment. */
20116 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20118 fnaddr = copy_to_mode_reg (DImode, fnaddr);
20119 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20120 gen_int_mode (0xbb41, HImode));
20121 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
20122 gen_lowpart (SImode, fnaddr));
20127 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20128 gen_int_mode (0xbb49, HImode));
20129 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20133 /* Load static chain using movabs to r10. */
20134 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20135 gen_int_mode (0xba49, HImode));
20136 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20139 /* Jump to the r11 */
20140 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20141 gen_int_mode (0xff49, HImode));
20142 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
20143 gen_int_mode (0xe3, QImode));
20145 gcc_assert (offset <= TRAMPOLINE_SIZE);
20148 #ifdef ENABLE_EXECUTE_STACK
20149 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20150 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
20154 /* Codes for all the SSE/MMX builtins. */
20157 IX86_BUILTIN_ADDPS,
20158 IX86_BUILTIN_ADDSS,
20159 IX86_BUILTIN_DIVPS,
20160 IX86_BUILTIN_DIVSS,
20161 IX86_BUILTIN_MULPS,
20162 IX86_BUILTIN_MULSS,
20163 IX86_BUILTIN_SUBPS,
20164 IX86_BUILTIN_SUBSS,
20166 IX86_BUILTIN_CMPEQPS,
20167 IX86_BUILTIN_CMPLTPS,
20168 IX86_BUILTIN_CMPLEPS,
20169 IX86_BUILTIN_CMPGTPS,
20170 IX86_BUILTIN_CMPGEPS,
20171 IX86_BUILTIN_CMPNEQPS,
20172 IX86_BUILTIN_CMPNLTPS,
20173 IX86_BUILTIN_CMPNLEPS,
20174 IX86_BUILTIN_CMPNGTPS,
20175 IX86_BUILTIN_CMPNGEPS,
20176 IX86_BUILTIN_CMPORDPS,
20177 IX86_BUILTIN_CMPUNORDPS,
20178 IX86_BUILTIN_CMPEQSS,
20179 IX86_BUILTIN_CMPLTSS,
20180 IX86_BUILTIN_CMPLESS,
20181 IX86_BUILTIN_CMPNEQSS,
20182 IX86_BUILTIN_CMPNLTSS,
20183 IX86_BUILTIN_CMPNLESS,
20184 IX86_BUILTIN_CMPNGTSS,
20185 IX86_BUILTIN_CMPNGESS,
20186 IX86_BUILTIN_CMPORDSS,
20187 IX86_BUILTIN_CMPUNORDSS,
20189 IX86_BUILTIN_COMIEQSS,
20190 IX86_BUILTIN_COMILTSS,
20191 IX86_BUILTIN_COMILESS,
20192 IX86_BUILTIN_COMIGTSS,
20193 IX86_BUILTIN_COMIGESS,
20194 IX86_BUILTIN_COMINEQSS,
20195 IX86_BUILTIN_UCOMIEQSS,
20196 IX86_BUILTIN_UCOMILTSS,
20197 IX86_BUILTIN_UCOMILESS,
20198 IX86_BUILTIN_UCOMIGTSS,
20199 IX86_BUILTIN_UCOMIGESS,
20200 IX86_BUILTIN_UCOMINEQSS,
20202 IX86_BUILTIN_CVTPI2PS,
20203 IX86_BUILTIN_CVTPS2PI,
20204 IX86_BUILTIN_CVTSI2SS,
20205 IX86_BUILTIN_CVTSI642SS,
20206 IX86_BUILTIN_CVTSS2SI,
20207 IX86_BUILTIN_CVTSS2SI64,
20208 IX86_BUILTIN_CVTTPS2PI,
20209 IX86_BUILTIN_CVTTSS2SI,
20210 IX86_BUILTIN_CVTTSS2SI64,
20212 IX86_BUILTIN_MAXPS,
20213 IX86_BUILTIN_MAXSS,
20214 IX86_BUILTIN_MINPS,
20215 IX86_BUILTIN_MINSS,
20217 IX86_BUILTIN_LOADUPS,
20218 IX86_BUILTIN_STOREUPS,
20219 IX86_BUILTIN_MOVSS,
20221 IX86_BUILTIN_MOVHLPS,
20222 IX86_BUILTIN_MOVLHPS,
20223 IX86_BUILTIN_LOADHPS,
20224 IX86_BUILTIN_LOADLPS,
20225 IX86_BUILTIN_STOREHPS,
20226 IX86_BUILTIN_STORELPS,
20228 IX86_BUILTIN_MASKMOVQ,
20229 IX86_BUILTIN_MOVMSKPS,
20230 IX86_BUILTIN_PMOVMSKB,
20232 IX86_BUILTIN_MOVNTPS,
20233 IX86_BUILTIN_MOVNTQ,
20235 IX86_BUILTIN_LOADDQU,
20236 IX86_BUILTIN_STOREDQU,
20238 IX86_BUILTIN_PACKSSWB,
20239 IX86_BUILTIN_PACKSSDW,
20240 IX86_BUILTIN_PACKUSWB,
20242 IX86_BUILTIN_PADDB,
20243 IX86_BUILTIN_PADDW,
20244 IX86_BUILTIN_PADDD,
20245 IX86_BUILTIN_PADDQ,
20246 IX86_BUILTIN_PADDSB,
20247 IX86_BUILTIN_PADDSW,
20248 IX86_BUILTIN_PADDUSB,
20249 IX86_BUILTIN_PADDUSW,
20250 IX86_BUILTIN_PSUBB,
20251 IX86_BUILTIN_PSUBW,
20252 IX86_BUILTIN_PSUBD,
20253 IX86_BUILTIN_PSUBQ,
20254 IX86_BUILTIN_PSUBSB,
20255 IX86_BUILTIN_PSUBSW,
20256 IX86_BUILTIN_PSUBUSB,
20257 IX86_BUILTIN_PSUBUSW,
20260 IX86_BUILTIN_PANDN,
20264 IX86_BUILTIN_PAVGB,
20265 IX86_BUILTIN_PAVGW,
20267 IX86_BUILTIN_PCMPEQB,
20268 IX86_BUILTIN_PCMPEQW,
20269 IX86_BUILTIN_PCMPEQD,
20270 IX86_BUILTIN_PCMPGTB,
20271 IX86_BUILTIN_PCMPGTW,
20272 IX86_BUILTIN_PCMPGTD,
20274 IX86_BUILTIN_PMADDWD,
20276 IX86_BUILTIN_PMAXSW,
20277 IX86_BUILTIN_PMAXUB,
20278 IX86_BUILTIN_PMINSW,
20279 IX86_BUILTIN_PMINUB,
20281 IX86_BUILTIN_PMULHUW,
20282 IX86_BUILTIN_PMULHW,
20283 IX86_BUILTIN_PMULLW,
20285 IX86_BUILTIN_PSADBW,
20286 IX86_BUILTIN_PSHUFW,
20288 IX86_BUILTIN_PSLLW,
20289 IX86_BUILTIN_PSLLD,
20290 IX86_BUILTIN_PSLLQ,
20291 IX86_BUILTIN_PSRAW,
20292 IX86_BUILTIN_PSRAD,
20293 IX86_BUILTIN_PSRLW,
20294 IX86_BUILTIN_PSRLD,
20295 IX86_BUILTIN_PSRLQ,
20296 IX86_BUILTIN_PSLLWI,
20297 IX86_BUILTIN_PSLLDI,
20298 IX86_BUILTIN_PSLLQI,
20299 IX86_BUILTIN_PSRAWI,
20300 IX86_BUILTIN_PSRADI,
20301 IX86_BUILTIN_PSRLWI,
20302 IX86_BUILTIN_PSRLDI,
20303 IX86_BUILTIN_PSRLQI,
20305 IX86_BUILTIN_PUNPCKHBW,
20306 IX86_BUILTIN_PUNPCKHWD,
20307 IX86_BUILTIN_PUNPCKHDQ,
20308 IX86_BUILTIN_PUNPCKLBW,
20309 IX86_BUILTIN_PUNPCKLWD,
20310 IX86_BUILTIN_PUNPCKLDQ,
20312 IX86_BUILTIN_SHUFPS,
20314 IX86_BUILTIN_RCPPS,
20315 IX86_BUILTIN_RCPSS,
20316 IX86_BUILTIN_RSQRTPS,
20317 IX86_BUILTIN_RSQRTPS_NR,
20318 IX86_BUILTIN_RSQRTSS,
20319 IX86_BUILTIN_RSQRTF,
20320 IX86_BUILTIN_SQRTPS,
20321 IX86_BUILTIN_SQRTPS_NR,
20322 IX86_BUILTIN_SQRTSS,
20324 IX86_BUILTIN_UNPCKHPS,
20325 IX86_BUILTIN_UNPCKLPS,
20327 IX86_BUILTIN_ANDPS,
20328 IX86_BUILTIN_ANDNPS,
20330 IX86_BUILTIN_XORPS,
20333 IX86_BUILTIN_LDMXCSR,
20334 IX86_BUILTIN_STMXCSR,
20335 IX86_BUILTIN_SFENCE,
20337 /* 3DNow! Original */
20338 IX86_BUILTIN_FEMMS,
20339 IX86_BUILTIN_PAVGUSB,
20340 IX86_BUILTIN_PF2ID,
20341 IX86_BUILTIN_PFACC,
20342 IX86_BUILTIN_PFADD,
20343 IX86_BUILTIN_PFCMPEQ,
20344 IX86_BUILTIN_PFCMPGE,
20345 IX86_BUILTIN_PFCMPGT,
20346 IX86_BUILTIN_PFMAX,
20347 IX86_BUILTIN_PFMIN,
20348 IX86_BUILTIN_PFMUL,
20349 IX86_BUILTIN_PFRCP,
20350 IX86_BUILTIN_PFRCPIT1,
20351 IX86_BUILTIN_PFRCPIT2,
20352 IX86_BUILTIN_PFRSQIT1,
20353 IX86_BUILTIN_PFRSQRT,
20354 IX86_BUILTIN_PFSUB,
20355 IX86_BUILTIN_PFSUBR,
20356 IX86_BUILTIN_PI2FD,
20357 IX86_BUILTIN_PMULHRW,
20359 /* 3DNow! Athlon Extensions */
20360 IX86_BUILTIN_PF2IW,
20361 IX86_BUILTIN_PFNACC,
20362 IX86_BUILTIN_PFPNACC,
20363 IX86_BUILTIN_PI2FW,
20364 IX86_BUILTIN_PSWAPDSI,
20365 IX86_BUILTIN_PSWAPDSF,
20368 IX86_BUILTIN_ADDPD,
20369 IX86_BUILTIN_ADDSD,
20370 IX86_BUILTIN_DIVPD,
20371 IX86_BUILTIN_DIVSD,
20372 IX86_BUILTIN_MULPD,
20373 IX86_BUILTIN_MULSD,
20374 IX86_BUILTIN_SUBPD,
20375 IX86_BUILTIN_SUBSD,
20377 IX86_BUILTIN_CMPEQPD,
20378 IX86_BUILTIN_CMPLTPD,
20379 IX86_BUILTIN_CMPLEPD,
20380 IX86_BUILTIN_CMPGTPD,
20381 IX86_BUILTIN_CMPGEPD,
20382 IX86_BUILTIN_CMPNEQPD,
20383 IX86_BUILTIN_CMPNLTPD,
20384 IX86_BUILTIN_CMPNLEPD,
20385 IX86_BUILTIN_CMPNGTPD,
20386 IX86_BUILTIN_CMPNGEPD,
20387 IX86_BUILTIN_CMPORDPD,
20388 IX86_BUILTIN_CMPUNORDPD,
20389 IX86_BUILTIN_CMPEQSD,
20390 IX86_BUILTIN_CMPLTSD,
20391 IX86_BUILTIN_CMPLESD,
20392 IX86_BUILTIN_CMPNEQSD,
20393 IX86_BUILTIN_CMPNLTSD,
20394 IX86_BUILTIN_CMPNLESD,
20395 IX86_BUILTIN_CMPORDSD,
20396 IX86_BUILTIN_CMPUNORDSD,
20398 IX86_BUILTIN_COMIEQSD,
20399 IX86_BUILTIN_COMILTSD,
20400 IX86_BUILTIN_COMILESD,
20401 IX86_BUILTIN_COMIGTSD,
20402 IX86_BUILTIN_COMIGESD,
20403 IX86_BUILTIN_COMINEQSD,
20404 IX86_BUILTIN_UCOMIEQSD,
20405 IX86_BUILTIN_UCOMILTSD,
20406 IX86_BUILTIN_UCOMILESD,
20407 IX86_BUILTIN_UCOMIGTSD,
20408 IX86_BUILTIN_UCOMIGESD,
20409 IX86_BUILTIN_UCOMINEQSD,
20411 IX86_BUILTIN_MAXPD,
20412 IX86_BUILTIN_MAXSD,
20413 IX86_BUILTIN_MINPD,
20414 IX86_BUILTIN_MINSD,
20416 IX86_BUILTIN_ANDPD,
20417 IX86_BUILTIN_ANDNPD,
20419 IX86_BUILTIN_XORPD,
20421 IX86_BUILTIN_SQRTPD,
20422 IX86_BUILTIN_SQRTSD,
20424 IX86_BUILTIN_UNPCKHPD,
20425 IX86_BUILTIN_UNPCKLPD,
20427 IX86_BUILTIN_SHUFPD,
20429 IX86_BUILTIN_LOADUPD,
20430 IX86_BUILTIN_STOREUPD,
20431 IX86_BUILTIN_MOVSD,
20433 IX86_BUILTIN_LOADHPD,
20434 IX86_BUILTIN_LOADLPD,
20436 IX86_BUILTIN_CVTDQ2PD,
20437 IX86_BUILTIN_CVTDQ2PS,
20439 IX86_BUILTIN_CVTPD2DQ,
20440 IX86_BUILTIN_CVTPD2PI,
20441 IX86_BUILTIN_CVTPD2PS,
20442 IX86_BUILTIN_CVTTPD2DQ,
20443 IX86_BUILTIN_CVTTPD2PI,
20445 IX86_BUILTIN_CVTPI2PD,
20446 IX86_BUILTIN_CVTSI2SD,
20447 IX86_BUILTIN_CVTSI642SD,
20449 IX86_BUILTIN_CVTSD2SI,
20450 IX86_BUILTIN_CVTSD2SI64,
20451 IX86_BUILTIN_CVTSD2SS,
20452 IX86_BUILTIN_CVTSS2SD,
20453 IX86_BUILTIN_CVTTSD2SI,
20454 IX86_BUILTIN_CVTTSD2SI64,
20456 IX86_BUILTIN_CVTPS2DQ,
20457 IX86_BUILTIN_CVTPS2PD,
20458 IX86_BUILTIN_CVTTPS2DQ,
20460 IX86_BUILTIN_MOVNTI,
20461 IX86_BUILTIN_MOVNTPD,
20462 IX86_BUILTIN_MOVNTDQ,
20464 IX86_BUILTIN_MOVQ128,
20467 IX86_BUILTIN_MASKMOVDQU,
20468 IX86_BUILTIN_MOVMSKPD,
20469 IX86_BUILTIN_PMOVMSKB128,
20471 IX86_BUILTIN_PACKSSWB128,
20472 IX86_BUILTIN_PACKSSDW128,
20473 IX86_BUILTIN_PACKUSWB128,
20475 IX86_BUILTIN_PADDB128,
20476 IX86_BUILTIN_PADDW128,
20477 IX86_BUILTIN_PADDD128,
20478 IX86_BUILTIN_PADDQ128,
20479 IX86_BUILTIN_PADDSB128,
20480 IX86_BUILTIN_PADDSW128,
20481 IX86_BUILTIN_PADDUSB128,
20482 IX86_BUILTIN_PADDUSW128,
20483 IX86_BUILTIN_PSUBB128,
20484 IX86_BUILTIN_PSUBW128,
20485 IX86_BUILTIN_PSUBD128,
20486 IX86_BUILTIN_PSUBQ128,
20487 IX86_BUILTIN_PSUBSB128,
20488 IX86_BUILTIN_PSUBSW128,
20489 IX86_BUILTIN_PSUBUSB128,
20490 IX86_BUILTIN_PSUBUSW128,
20492 IX86_BUILTIN_PAND128,
20493 IX86_BUILTIN_PANDN128,
20494 IX86_BUILTIN_POR128,
20495 IX86_BUILTIN_PXOR128,
20497 IX86_BUILTIN_PAVGB128,
20498 IX86_BUILTIN_PAVGW128,
20500 IX86_BUILTIN_PCMPEQB128,
20501 IX86_BUILTIN_PCMPEQW128,
20502 IX86_BUILTIN_PCMPEQD128,
20503 IX86_BUILTIN_PCMPGTB128,
20504 IX86_BUILTIN_PCMPGTW128,
20505 IX86_BUILTIN_PCMPGTD128,
20507 IX86_BUILTIN_PMADDWD128,
20509 IX86_BUILTIN_PMAXSW128,
20510 IX86_BUILTIN_PMAXUB128,
20511 IX86_BUILTIN_PMINSW128,
20512 IX86_BUILTIN_PMINUB128,
20514 IX86_BUILTIN_PMULUDQ,
20515 IX86_BUILTIN_PMULUDQ128,
20516 IX86_BUILTIN_PMULHUW128,
20517 IX86_BUILTIN_PMULHW128,
20518 IX86_BUILTIN_PMULLW128,
20520 IX86_BUILTIN_PSADBW128,
20521 IX86_BUILTIN_PSHUFHW,
20522 IX86_BUILTIN_PSHUFLW,
20523 IX86_BUILTIN_PSHUFD,
20525 IX86_BUILTIN_PSLLDQI128,
20526 IX86_BUILTIN_PSLLWI128,
20527 IX86_BUILTIN_PSLLDI128,
20528 IX86_BUILTIN_PSLLQI128,
20529 IX86_BUILTIN_PSRAWI128,
20530 IX86_BUILTIN_PSRADI128,
20531 IX86_BUILTIN_PSRLDQI128,
20532 IX86_BUILTIN_PSRLWI128,
20533 IX86_BUILTIN_PSRLDI128,
20534 IX86_BUILTIN_PSRLQI128,
20536 IX86_BUILTIN_PSLLDQ128,
20537 IX86_BUILTIN_PSLLW128,
20538 IX86_BUILTIN_PSLLD128,
20539 IX86_BUILTIN_PSLLQ128,
20540 IX86_BUILTIN_PSRAW128,
20541 IX86_BUILTIN_PSRAD128,
20542 IX86_BUILTIN_PSRLW128,
20543 IX86_BUILTIN_PSRLD128,
20544 IX86_BUILTIN_PSRLQ128,
20546 IX86_BUILTIN_PUNPCKHBW128,
20547 IX86_BUILTIN_PUNPCKHWD128,
20548 IX86_BUILTIN_PUNPCKHDQ128,
20549 IX86_BUILTIN_PUNPCKHQDQ128,
20550 IX86_BUILTIN_PUNPCKLBW128,
20551 IX86_BUILTIN_PUNPCKLWD128,
20552 IX86_BUILTIN_PUNPCKLDQ128,
20553 IX86_BUILTIN_PUNPCKLQDQ128,
20555 IX86_BUILTIN_CLFLUSH,
20556 IX86_BUILTIN_MFENCE,
20557 IX86_BUILTIN_LFENCE,
20559 IX86_BUILTIN_BSRSI,
20560 IX86_BUILTIN_BSRDI,
20561 IX86_BUILTIN_RDPMC,
20562 IX86_BUILTIN_RDTSC,
20563 IX86_BUILTIN_RDTSCP,
20564 IX86_BUILTIN_ROLQI,
20565 IX86_BUILTIN_ROLHI,
20566 IX86_BUILTIN_RORQI,
20567 IX86_BUILTIN_RORHI,
20570 IX86_BUILTIN_ADDSUBPS,
20571 IX86_BUILTIN_HADDPS,
20572 IX86_BUILTIN_HSUBPS,
20573 IX86_BUILTIN_MOVSHDUP,
20574 IX86_BUILTIN_MOVSLDUP,
20575 IX86_BUILTIN_ADDSUBPD,
20576 IX86_BUILTIN_HADDPD,
20577 IX86_BUILTIN_HSUBPD,
20578 IX86_BUILTIN_LDDQU,
20580 IX86_BUILTIN_MONITOR,
20581 IX86_BUILTIN_MWAIT,
20584 IX86_BUILTIN_PHADDW,
20585 IX86_BUILTIN_PHADDD,
20586 IX86_BUILTIN_PHADDSW,
20587 IX86_BUILTIN_PHSUBW,
20588 IX86_BUILTIN_PHSUBD,
20589 IX86_BUILTIN_PHSUBSW,
20590 IX86_BUILTIN_PMADDUBSW,
20591 IX86_BUILTIN_PMULHRSW,
20592 IX86_BUILTIN_PSHUFB,
20593 IX86_BUILTIN_PSIGNB,
20594 IX86_BUILTIN_PSIGNW,
20595 IX86_BUILTIN_PSIGND,
20596 IX86_BUILTIN_PALIGNR,
20597 IX86_BUILTIN_PABSB,
20598 IX86_BUILTIN_PABSW,
20599 IX86_BUILTIN_PABSD,
20601 IX86_BUILTIN_PHADDW128,
20602 IX86_BUILTIN_PHADDD128,
20603 IX86_BUILTIN_PHADDSW128,
20604 IX86_BUILTIN_PHSUBW128,
20605 IX86_BUILTIN_PHSUBD128,
20606 IX86_BUILTIN_PHSUBSW128,
20607 IX86_BUILTIN_PMADDUBSW128,
20608 IX86_BUILTIN_PMULHRSW128,
20609 IX86_BUILTIN_PSHUFB128,
20610 IX86_BUILTIN_PSIGNB128,
20611 IX86_BUILTIN_PSIGNW128,
20612 IX86_BUILTIN_PSIGND128,
20613 IX86_BUILTIN_PALIGNR128,
20614 IX86_BUILTIN_PABSB128,
20615 IX86_BUILTIN_PABSW128,
20616 IX86_BUILTIN_PABSD128,
20618 /* AMDFAM10 - SSE4A New Instructions. */
20619 IX86_BUILTIN_MOVNTSD,
20620 IX86_BUILTIN_MOVNTSS,
20621 IX86_BUILTIN_EXTRQI,
20622 IX86_BUILTIN_EXTRQ,
20623 IX86_BUILTIN_INSERTQI,
20624 IX86_BUILTIN_INSERTQ,
20627 IX86_BUILTIN_BLENDPD,
20628 IX86_BUILTIN_BLENDPS,
20629 IX86_BUILTIN_BLENDVPD,
20630 IX86_BUILTIN_BLENDVPS,
20631 IX86_BUILTIN_PBLENDVB128,
20632 IX86_BUILTIN_PBLENDW128,
20637 IX86_BUILTIN_INSERTPS128,
20639 IX86_BUILTIN_MOVNTDQA,
20640 IX86_BUILTIN_MPSADBW128,
20641 IX86_BUILTIN_PACKUSDW128,
20642 IX86_BUILTIN_PCMPEQQ,
20643 IX86_BUILTIN_PHMINPOSUW128,
20645 IX86_BUILTIN_PMAXSB128,
20646 IX86_BUILTIN_PMAXSD128,
20647 IX86_BUILTIN_PMAXUD128,
20648 IX86_BUILTIN_PMAXUW128,
20650 IX86_BUILTIN_PMINSB128,
20651 IX86_BUILTIN_PMINSD128,
20652 IX86_BUILTIN_PMINUD128,
20653 IX86_BUILTIN_PMINUW128,
20655 IX86_BUILTIN_PMOVSXBW128,
20656 IX86_BUILTIN_PMOVSXBD128,
20657 IX86_BUILTIN_PMOVSXBQ128,
20658 IX86_BUILTIN_PMOVSXWD128,
20659 IX86_BUILTIN_PMOVSXWQ128,
20660 IX86_BUILTIN_PMOVSXDQ128,
20662 IX86_BUILTIN_PMOVZXBW128,
20663 IX86_BUILTIN_PMOVZXBD128,
20664 IX86_BUILTIN_PMOVZXBQ128,
20665 IX86_BUILTIN_PMOVZXWD128,
20666 IX86_BUILTIN_PMOVZXWQ128,
20667 IX86_BUILTIN_PMOVZXDQ128,
20669 IX86_BUILTIN_PMULDQ128,
20670 IX86_BUILTIN_PMULLD128,
20672 IX86_BUILTIN_ROUNDPD,
20673 IX86_BUILTIN_ROUNDPS,
20674 IX86_BUILTIN_ROUNDSD,
20675 IX86_BUILTIN_ROUNDSS,
20677 IX86_BUILTIN_PTESTZ,
20678 IX86_BUILTIN_PTESTC,
20679 IX86_BUILTIN_PTESTNZC,
20681 IX86_BUILTIN_VEC_INIT_V2SI,
20682 IX86_BUILTIN_VEC_INIT_V4HI,
20683 IX86_BUILTIN_VEC_INIT_V8QI,
20684 IX86_BUILTIN_VEC_EXT_V2DF,
20685 IX86_BUILTIN_VEC_EXT_V2DI,
20686 IX86_BUILTIN_VEC_EXT_V4SF,
20687 IX86_BUILTIN_VEC_EXT_V4SI,
20688 IX86_BUILTIN_VEC_EXT_V8HI,
20689 IX86_BUILTIN_VEC_EXT_V2SI,
20690 IX86_BUILTIN_VEC_EXT_V4HI,
20691 IX86_BUILTIN_VEC_EXT_V16QI,
20692 IX86_BUILTIN_VEC_SET_V2DI,
20693 IX86_BUILTIN_VEC_SET_V4SF,
20694 IX86_BUILTIN_VEC_SET_V4SI,
20695 IX86_BUILTIN_VEC_SET_V8HI,
20696 IX86_BUILTIN_VEC_SET_V4HI,
20697 IX86_BUILTIN_VEC_SET_V16QI,
20699 IX86_BUILTIN_VEC_PACK_SFIX,
20702 IX86_BUILTIN_CRC32QI,
20703 IX86_BUILTIN_CRC32HI,
20704 IX86_BUILTIN_CRC32SI,
20705 IX86_BUILTIN_CRC32DI,
20707 IX86_BUILTIN_PCMPESTRI128,
20708 IX86_BUILTIN_PCMPESTRM128,
20709 IX86_BUILTIN_PCMPESTRA128,
20710 IX86_BUILTIN_PCMPESTRC128,
20711 IX86_BUILTIN_PCMPESTRO128,
20712 IX86_BUILTIN_PCMPESTRS128,
20713 IX86_BUILTIN_PCMPESTRZ128,
20714 IX86_BUILTIN_PCMPISTRI128,
20715 IX86_BUILTIN_PCMPISTRM128,
20716 IX86_BUILTIN_PCMPISTRA128,
20717 IX86_BUILTIN_PCMPISTRC128,
20718 IX86_BUILTIN_PCMPISTRO128,
20719 IX86_BUILTIN_PCMPISTRS128,
20720 IX86_BUILTIN_PCMPISTRZ128,
20722 IX86_BUILTIN_PCMPGTQ,
20724 /* AES instructions */
20725 IX86_BUILTIN_AESENC128,
20726 IX86_BUILTIN_AESENCLAST128,
20727 IX86_BUILTIN_AESDEC128,
20728 IX86_BUILTIN_AESDECLAST128,
20729 IX86_BUILTIN_AESIMC128,
20730 IX86_BUILTIN_AESKEYGENASSIST128,
20732 /* PCLMUL instruction */
20733 IX86_BUILTIN_PCLMULQDQ128,
20736 IX86_BUILTIN_ADDPD256,
20737 IX86_BUILTIN_ADDPS256,
20738 IX86_BUILTIN_ADDSUBPD256,
20739 IX86_BUILTIN_ADDSUBPS256,
20740 IX86_BUILTIN_ANDPD256,
20741 IX86_BUILTIN_ANDPS256,
20742 IX86_BUILTIN_ANDNPD256,
20743 IX86_BUILTIN_ANDNPS256,
20744 IX86_BUILTIN_BLENDPD256,
20745 IX86_BUILTIN_BLENDPS256,
20746 IX86_BUILTIN_BLENDVPD256,
20747 IX86_BUILTIN_BLENDVPS256,
20748 IX86_BUILTIN_DIVPD256,
20749 IX86_BUILTIN_DIVPS256,
20750 IX86_BUILTIN_DPPS256,
20751 IX86_BUILTIN_HADDPD256,
20752 IX86_BUILTIN_HADDPS256,
20753 IX86_BUILTIN_HSUBPD256,
20754 IX86_BUILTIN_HSUBPS256,
20755 IX86_BUILTIN_MAXPD256,
20756 IX86_BUILTIN_MAXPS256,
20757 IX86_BUILTIN_MINPD256,
20758 IX86_BUILTIN_MINPS256,
20759 IX86_BUILTIN_MULPD256,
20760 IX86_BUILTIN_MULPS256,
20761 IX86_BUILTIN_ORPD256,
20762 IX86_BUILTIN_ORPS256,
20763 IX86_BUILTIN_SHUFPD256,
20764 IX86_BUILTIN_SHUFPS256,
20765 IX86_BUILTIN_SUBPD256,
20766 IX86_BUILTIN_SUBPS256,
20767 IX86_BUILTIN_XORPD256,
20768 IX86_BUILTIN_XORPS256,
20769 IX86_BUILTIN_CMPSD,
20770 IX86_BUILTIN_CMPSS,
20771 IX86_BUILTIN_CMPPD,
20772 IX86_BUILTIN_CMPPS,
20773 IX86_BUILTIN_CMPPD256,
20774 IX86_BUILTIN_CMPPS256,
20775 IX86_BUILTIN_CVTDQ2PD256,
20776 IX86_BUILTIN_CVTDQ2PS256,
20777 IX86_BUILTIN_CVTPD2PS256,
20778 IX86_BUILTIN_CVTPS2DQ256,
20779 IX86_BUILTIN_CVTPS2PD256,
20780 IX86_BUILTIN_CVTTPD2DQ256,
20781 IX86_BUILTIN_CVTPD2DQ256,
20782 IX86_BUILTIN_CVTTPS2DQ256,
20783 IX86_BUILTIN_EXTRACTF128PD256,
20784 IX86_BUILTIN_EXTRACTF128PS256,
20785 IX86_BUILTIN_EXTRACTF128SI256,
20786 IX86_BUILTIN_VZEROALL,
20787 IX86_BUILTIN_VZEROUPPER,
20788 IX86_BUILTIN_VZEROUPPER_REX64,
20789 IX86_BUILTIN_VPERMILVARPD,
20790 IX86_BUILTIN_VPERMILVARPS,
20791 IX86_BUILTIN_VPERMILVARPD256,
20792 IX86_BUILTIN_VPERMILVARPS256,
20793 IX86_BUILTIN_VPERMILPD,
20794 IX86_BUILTIN_VPERMILPS,
20795 IX86_BUILTIN_VPERMILPD256,
20796 IX86_BUILTIN_VPERMILPS256,
20797 IX86_BUILTIN_VPERM2F128PD256,
20798 IX86_BUILTIN_VPERM2F128PS256,
20799 IX86_BUILTIN_VPERM2F128SI256,
20800 IX86_BUILTIN_VBROADCASTSS,
20801 IX86_BUILTIN_VBROADCASTSD256,
20802 IX86_BUILTIN_VBROADCASTSS256,
20803 IX86_BUILTIN_VBROADCASTPD256,
20804 IX86_BUILTIN_VBROADCASTPS256,
20805 IX86_BUILTIN_VINSERTF128PD256,
20806 IX86_BUILTIN_VINSERTF128PS256,
20807 IX86_BUILTIN_VINSERTF128SI256,
20808 IX86_BUILTIN_LOADUPD256,
20809 IX86_BUILTIN_LOADUPS256,
20810 IX86_BUILTIN_STOREUPD256,
20811 IX86_BUILTIN_STOREUPS256,
20812 IX86_BUILTIN_LDDQU256,
20813 IX86_BUILTIN_MOVNTDQ256,
20814 IX86_BUILTIN_MOVNTPD256,
20815 IX86_BUILTIN_MOVNTPS256,
20816 IX86_BUILTIN_LOADDQU256,
20817 IX86_BUILTIN_STOREDQU256,
20818 IX86_BUILTIN_MASKLOADPD,
20819 IX86_BUILTIN_MASKLOADPS,
20820 IX86_BUILTIN_MASKSTOREPD,
20821 IX86_BUILTIN_MASKSTOREPS,
20822 IX86_BUILTIN_MASKLOADPD256,
20823 IX86_BUILTIN_MASKLOADPS256,
20824 IX86_BUILTIN_MASKSTOREPD256,
20825 IX86_BUILTIN_MASKSTOREPS256,
20826 IX86_BUILTIN_MOVSHDUP256,
20827 IX86_BUILTIN_MOVSLDUP256,
20828 IX86_BUILTIN_MOVDDUP256,
20830 IX86_BUILTIN_SQRTPD256,
20831 IX86_BUILTIN_SQRTPS256,
20832 IX86_BUILTIN_SQRTPS_NR256,
20833 IX86_BUILTIN_RSQRTPS256,
20834 IX86_BUILTIN_RSQRTPS_NR256,
20836 IX86_BUILTIN_RCPPS256,
20838 IX86_BUILTIN_ROUNDPD256,
20839 IX86_BUILTIN_ROUNDPS256,
20841 IX86_BUILTIN_UNPCKHPD256,
20842 IX86_BUILTIN_UNPCKLPD256,
20843 IX86_BUILTIN_UNPCKHPS256,
20844 IX86_BUILTIN_UNPCKLPS256,
20846 IX86_BUILTIN_SI256_SI,
20847 IX86_BUILTIN_PS256_PS,
20848 IX86_BUILTIN_PD256_PD,
20849 IX86_BUILTIN_SI_SI256,
20850 IX86_BUILTIN_PS_PS256,
20851 IX86_BUILTIN_PD_PD256,
20853 IX86_BUILTIN_VTESTZPD,
20854 IX86_BUILTIN_VTESTCPD,
20855 IX86_BUILTIN_VTESTNZCPD,
20856 IX86_BUILTIN_VTESTZPS,
20857 IX86_BUILTIN_VTESTCPS,
20858 IX86_BUILTIN_VTESTNZCPS,
20859 IX86_BUILTIN_VTESTZPD256,
20860 IX86_BUILTIN_VTESTCPD256,
20861 IX86_BUILTIN_VTESTNZCPD256,
20862 IX86_BUILTIN_VTESTZPS256,
20863 IX86_BUILTIN_VTESTCPS256,
20864 IX86_BUILTIN_VTESTNZCPS256,
20865 IX86_BUILTIN_PTESTZ256,
20866 IX86_BUILTIN_PTESTC256,
20867 IX86_BUILTIN_PTESTNZC256,
20869 IX86_BUILTIN_MOVMSKPD256,
20870 IX86_BUILTIN_MOVMSKPS256,
20872 /* TFmode support builtins. */
20874 IX86_BUILTIN_HUGE_VALQ,
20875 IX86_BUILTIN_FABSQ,
20876 IX86_BUILTIN_COPYSIGNQ,
20878 /* SSE5 instructions */
20879 IX86_BUILTIN_FMADDSS,
20880 IX86_BUILTIN_FMADDSD,
20881 IX86_BUILTIN_FMADDPS,
20882 IX86_BUILTIN_FMADDPD,
20883 IX86_BUILTIN_FMSUBSS,
20884 IX86_BUILTIN_FMSUBSD,
20885 IX86_BUILTIN_FMSUBPS,
20886 IX86_BUILTIN_FMSUBPD,
20887 IX86_BUILTIN_FNMADDSS,
20888 IX86_BUILTIN_FNMADDSD,
20889 IX86_BUILTIN_FNMADDPS,
20890 IX86_BUILTIN_FNMADDPD,
20891 IX86_BUILTIN_FNMSUBSS,
20892 IX86_BUILTIN_FNMSUBSD,
20893 IX86_BUILTIN_FNMSUBPS,
20894 IX86_BUILTIN_FNMSUBPD,
20895 IX86_BUILTIN_PCMOV,
20896 IX86_BUILTIN_PCMOV_V2DI,
20897 IX86_BUILTIN_PCMOV_V4SI,
20898 IX86_BUILTIN_PCMOV_V8HI,
20899 IX86_BUILTIN_PCMOV_V16QI,
20900 IX86_BUILTIN_PCMOV_V4SF,
20901 IX86_BUILTIN_PCMOV_V2DF,
20902 IX86_BUILTIN_PPERM,
20903 IX86_BUILTIN_PERMPS,
20904 IX86_BUILTIN_PERMPD,
20905 IX86_BUILTIN_PMACSSWW,
20906 IX86_BUILTIN_PMACSWW,
20907 IX86_BUILTIN_PMACSSWD,
20908 IX86_BUILTIN_PMACSWD,
20909 IX86_BUILTIN_PMACSSDD,
20910 IX86_BUILTIN_PMACSDD,
20911 IX86_BUILTIN_PMACSSDQL,
20912 IX86_BUILTIN_PMACSSDQH,
20913 IX86_BUILTIN_PMACSDQL,
20914 IX86_BUILTIN_PMACSDQH,
20915 IX86_BUILTIN_PMADCSSWD,
20916 IX86_BUILTIN_PMADCSWD,
20917 IX86_BUILTIN_PHADDBW,
20918 IX86_BUILTIN_PHADDBD,
20919 IX86_BUILTIN_PHADDBQ,
20920 IX86_BUILTIN_PHADDWD,
20921 IX86_BUILTIN_PHADDWQ,
20922 IX86_BUILTIN_PHADDDQ,
20923 IX86_BUILTIN_PHADDUBW,
20924 IX86_BUILTIN_PHADDUBD,
20925 IX86_BUILTIN_PHADDUBQ,
20926 IX86_BUILTIN_PHADDUWD,
20927 IX86_BUILTIN_PHADDUWQ,
20928 IX86_BUILTIN_PHADDUDQ,
20929 IX86_BUILTIN_PHSUBBW,
20930 IX86_BUILTIN_PHSUBWD,
20931 IX86_BUILTIN_PHSUBDQ,
20932 IX86_BUILTIN_PROTB,
20933 IX86_BUILTIN_PROTW,
20934 IX86_BUILTIN_PROTD,
20935 IX86_BUILTIN_PROTQ,
20936 IX86_BUILTIN_PROTB_IMM,
20937 IX86_BUILTIN_PROTW_IMM,
20938 IX86_BUILTIN_PROTD_IMM,
20939 IX86_BUILTIN_PROTQ_IMM,
20940 IX86_BUILTIN_PSHLB,
20941 IX86_BUILTIN_PSHLW,
20942 IX86_BUILTIN_PSHLD,
20943 IX86_BUILTIN_PSHLQ,
20944 IX86_BUILTIN_PSHAB,
20945 IX86_BUILTIN_PSHAW,
20946 IX86_BUILTIN_PSHAD,
20947 IX86_BUILTIN_PSHAQ,
20948 IX86_BUILTIN_FRCZSS,
20949 IX86_BUILTIN_FRCZSD,
20950 IX86_BUILTIN_FRCZPS,
20951 IX86_BUILTIN_FRCZPD,
20952 IX86_BUILTIN_CVTPH2PS,
20953 IX86_BUILTIN_CVTPS2PH,
20955 IX86_BUILTIN_COMEQSS,
20956 IX86_BUILTIN_COMNESS,
20957 IX86_BUILTIN_COMLTSS,
20958 IX86_BUILTIN_COMLESS,
20959 IX86_BUILTIN_COMGTSS,
20960 IX86_BUILTIN_COMGESS,
20961 IX86_BUILTIN_COMUEQSS,
20962 IX86_BUILTIN_COMUNESS,
20963 IX86_BUILTIN_COMULTSS,
20964 IX86_BUILTIN_COMULESS,
20965 IX86_BUILTIN_COMUGTSS,
20966 IX86_BUILTIN_COMUGESS,
20967 IX86_BUILTIN_COMORDSS,
20968 IX86_BUILTIN_COMUNORDSS,
20969 IX86_BUILTIN_COMFALSESS,
20970 IX86_BUILTIN_COMTRUESS,
20972 IX86_BUILTIN_COMEQSD,
20973 IX86_BUILTIN_COMNESD,
20974 IX86_BUILTIN_COMLTSD,
20975 IX86_BUILTIN_COMLESD,
20976 IX86_BUILTIN_COMGTSD,
20977 IX86_BUILTIN_COMGESD,
20978 IX86_BUILTIN_COMUEQSD,
20979 IX86_BUILTIN_COMUNESD,
20980 IX86_BUILTIN_COMULTSD,
20981 IX86_BUILTIN_COMULESD,
20982 IX86_BUILTIN_COMUGTSD,
20983 IX86_BUILTIN_COMUGESD,
20984 IX86_BUILTIN_COMORDSD,
20985 IX86_BUILTIN_COMUNORDSD,
20986 IX86_BUILTIN_COMFALSESD,
20987 IX86_BUILTIN_COMTRUESD,
20989 IX86_BUILTIN_COMEQPS,
20990 IX86_BUILTIN_COMNEPS,
20991 IX86_BUILTIN_COMLTPS,
20992 IX86_BUILTIN_COMLEPS,
20993 IX86_BUILTIN_COMGTPS,
20994 IX86_BUILTIN_COMGEPS,
20995 IX86_BUILTIN_COMUEQPS,
20996 IX86_BUILTIN_COMUNEPS,
20997 IX86_BUILTIN_COMULTPS,
20998 IX86_BUILTIN_COMULEPS,
20999 IX86_BUILTIN_COMUGTPS,
21000 IX86_BUILTIN_COMUGEPS,
21001 IX86_BUILTIN_COMORDPS,
21002 IX86_BUILTIN_COMUNORDPS,
21003 IX86_BUILTIN_COMFALSEPS,
21004 IX86_BUILTIN_COMTRUEPS,
21006 IX86_BUILTIN_COMEQPD,
21007 IX86_BUILTIN_COMNEPD,
21008 IX86_BUILTIN_COMLTPD,
21009 IX86_BUILTIN_COMLEPD,
21010 IX86_BUILTIN_COMGTPD,
21011 IX86_BUILTIN_COMGEPD,
21012 IX86_BUILTIN_COMUEQPD,
21013 IX86_BUILTIN_COMUNEPD,
21014 IX86_BUILTIN_COMULTPD,
21015 IX86_BUILTIN_COMULEPD,
21016 IX86_BUILTIN_COMUGTPD,
21017 IX86_BUILTIN_COMUGEPD,
21018 IX86_BUILTIN_COMORDPD,
21019 IX86_BUILTIN_COMUNORDPD,
21020 IX86_BUILTIN_COMFALSEPD,
21021 IX86_BUILTIN_COMTRUEPD,
21023 IX86_BUILTIN_PCOMEQUB,
21024 IX86_BUILTIN_PCOMNEUB,
21025 IX86_BUILTIN_PCOMLTUB,
21026 IX86_BUILTIN_PCOMLEUB,
21027 IX86_BUILTIN_PCOMGTUB,
21028 IX86_BUILTIN_PCOMGEUB,
21029 IX86_BUILTIN_PCOMFALSEUB,
21030 IX86_BUILTIN_PCOMTRUEUB,
21031 IX86_BUILTIN_PCOMEQUW,
21032 IX86_BUILTIN_PCOMNEUW,
21033 IX86_BUILTIN_PCOMLTUW,
21034 IX86_BUILTIN_PCOMLEUW,
21035 IX86_BUILTIN_PCOMGTUW,
21036 IX86_BUILTIN_PCOMGEUW,
21037 IX86_BUILTIN_PCOMFALSEUW,
21038 IX86_BUILTIN_PCOMTRUEUW,
21039 IX86_BUILTIN_PCOMEQUD,
21040 IX86_BUILTIN_PCOMNEUD,
21041 IX86_BUILTIN_PCOMLTUD,
21042 IX86_BUILTIN_PCOMLEUD,
21043 IX86_BUILTIN_PCOMGTUD,
21044 IX86_BUILTIN_PCOMGEUD,
21045 IX86_BUILTIN_PCOMFALSEUD,
21046 IX86_BUILTIN_PCOMTRUEUD,
21047 IX86_BUILTIN_PCOMEQUQ,
21048 IX86_BUILTIN_PCOMNEUQ,
21049 IX86_BUILTIN_PCOMLTUQ,
21050 IX86_BUILTIN_PCOMLEUQ,
21051 IX86_BUILTIN_PCOMGTUQ,
21052 IX86_BUILTIN_PCOMGEUQ,
21053 IX86_BUILTIN_PCOMFALSEUQ,
21054 IX86_BUILTIN_PCOMTRUEUQ,
21056 IX86_BUILTIN_PCOMEQB,
21057 IX86_BUILTIN_PCOMNEB,
21058 IX86_BUILTIN_PCOMLTB,
21059 IX86_BUILTIN_PCOMLEB,
21060 IX86_BUILTIN_PCOMGTB,
21061 IX86_BUILTIN_PCOMGEB,
21062 IX86_BUILTIN_PCOMFALSEB,
21063 IX86_BUILTIN_PCOMTRUEB,
21064 IX86_BUILTIN_PCOMEQW,
21065 IX86_BUILTIN_PCOMNEW,
21066 IX86_BUILTIN_PCOMLTW,
21067 IX86_BUILTIN_PCOMLEW,
21068 IX86_BUILTIN_PCOMGTW,
21069 IX86_BUILTIN_PCOMGEW,
21070 IX86_BUILTIN_PCOMFALSEW,
21071 IX86_BUILTIN_PCOMTRUEW,
21072 IX86_BUILTIN_PCOMEQD,
21073 IX86_BUILTIN_PCOMNED,
21074 IX86_BUILTIN_PCOMLTD,
21075 IX86_BUILTIN_PCOMLED,
21076 IX86_BUILTIN_PCOMGTD,
21077 IX86_BUILTIN_PCOMGED,
21078 IX86_BUILTIN_PCOMFALSED,
21079 IX86_BUILTIN_PCOMTRUED,
21080 IX86_BUILTIN_PCOMEQQ,
21081 IX86_BUILTIN_PCOMNEQ,
21082 IX86_BUILTIN_PCOMLTQ,
21083 IX86_BUILTIN_PCOMLEQ,
21084 IX86_BUILTIN_PCOMGTQ,
21085 IX86_BUILTIN_PCOMGEQ,
21086 IX86_BUILTIN_PCOMFALSEQ,
21087 IX86_BUILTIN_PCOMTRUEQ,
21092 /* Table for the ix86 builtin decls. */
21093 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21095 /* Table of all of the builtin functions that are possible with different ISA's
21096 but are waiting to be built until a function is declared to use that
21098 struct GTY(()) builtin_isa {
21099 tree type; /* builtin type to use in the declaration */
21100 const char *name; /* function name */
21101 int isa; /* isa_flags this builtin is defined for */
21102 bool const_p; /* true if the declaration is constant */
21105 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21108 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
21109 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
21110 * function decl in the ix86_builtins array. Returns the function decl or
21111 * NULL_TREE, if the builtin was not added.
21113 * If the front end has a special hook for builtin functions, delay adding
21114 * builtin functions that aren't in the current ISA until the ISA is changed
21115 * with function specific optimization. Doing so, can save about 300K for the
21116 * default compiler. When the builtin is expanded, check at that time whether
21119 * If the front end doesn't have a special hook, record all builtins, even if
21120 * it isn't an instruction set in the current ISA in case the user uses
21121 * function specific options for a different ISA, so that we don't get scope
21122 * errors if a builtin is added in the middle of a function scope. */
21125 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
21127 tree decl = NULL_TREE;
21129 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21131 ix86_builtins_isa[(int) code].isa = mask;
21133 if ((mask & ix86_isa_flags) != 0
21134 || (lang_hooks.builtin_function
21135 == lang_hooks.builtin_function_ext_scope))
21138 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
21140 ix86_builtins[(int) code] = decl;
21141 ix86_builtins_isa[(int) code].type = NULL_TREE;
21145 ix86_builtins[(int) code] = NULL_TREE;
21146 ix86_builtins_isa[(int) code].const_p = false;
21147 ix86_builtins_isa[(int) code].type = type;
21148 ix86_builtins_isa[(int) code].name = name;
21155 /* Like def_builtin, but also marks the function decl "const". */
21158 def_builtin_const (int mask, const char *name, tree type,
21159 enum ix86_builtins code)
21161 tree decl = def_builtin (mask, name, type, code);
21163 TREE_READONLY (decl) = 1;
21165 ix86_builtins_isa[(int) code].const_p = true;
21170 /* Add any new builtin functions for a given ISA that may not have been
21171 declared. This saves a bit of space compared to adding all of the
21172 declarations to the tree, even if we didn't use them. */
21175 ix86_add_new_builtins (int isa)
21180 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21182 if ((ix86_builtins_isa[i].isa & isa) != 0
21183 && ix86_builtins_isa[i].type != NULL_TREE)
21185 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21186 ix86_builtins_isa[i].type,
21187 i, BUILT_IN_MD, NULL,
21190 ix86_builtins[i] = decl;
21191 ix86_builtins_isa[i].type = NULL_TREE;
21192 if (ix86_builtins_isa[i].const_p)
21193 TREE_READONLY (decl) = 1;
21198 /* Bits for builtin_description.flag. */
21200 /* Set when we don't support the comparison natively, and should
21201 swap_comparison in order to support it. */
21202 #define BUILTIN_DESC_SWAP_OPERANDS 1
21204 struct builtin_description
21206 const unsigned int mask;
21207 const enum insn_code icode;
21208 const char *const name;
21209 const enum ix86_builtins code;
21210 const enum rtx_code comparison;
21214 static const struct builtin_description bdesc_comi[] =
21216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21242 static const struct builtin_description bdesc_pcmpestr[] =
21245 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21246 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21247 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21248 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21249 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21250 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21251 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21254 static const struct builtin_description bdesc_pcmpistr[] =
21257 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21258 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21259 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21260 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21261 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21262 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21263 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21266 /* Special builtin types */
21267 enum ix86_special_builtin_type
21269 SPECIAL_FTYPE_UNKNOWN,
21272 UINT64_FTYPE_PUNSIGNED,
21273 V32QI_FTYPE_PCCHAR,
21274 V16QI_FTYPE_PCCHAR,
21276 V8SF_FTYPE_PCFLOAT,
21278 V4DF_FTYPE_PCDOUBLE,
21279 V4SF_FTYPE_PCFLOAT,
21280 V2DF_FTYPE_PCDOUBLE,
21281 V8SF_FTYPE_PCV8SF_V8SF,
21282 V4DF_FTYPE_PCV4DF_V4DF,
21283 V4SF_FTYPE_V4SF_PCV2SF,
21284 V4SF_FTYPE_PCV4SF_V4SF,
21285 V2DF_FTYPE_V2DF_PCDOUBLE,
21286 V2DF_FTYPE_PCV2DF_V2DF,
21288 VOID_FTYPE_PV2SF_V4SF,
21289 VOID_FTYPE_PV4DI_V4DI,
21290 VOID_FTYPE_PV2DI_V2DI,
21291 VOID_FTYPE_PCHAR_V32QI,
21292 VOID_FTYPE_PCHAR_V16QI,
21293 VOID_FTYPE_PFLOAT_V8SF,
21294 VOID_FTYPE_PFLOAT_V4SF,
21295 VOID_FTYPE_PDOUBLE_V4DF,
21296 VOID_FTYPE_PDOUBLE_V2DF,
21298 VOID_FTYPE_PINT_INT,
21299 VOID_FTYPE_PV8SF_V8SF_V8SF,
21300 VOID_FTYPE_PV4DF_V4DF_V4DF,
21301 VOID_FTYPE_PV4SF_V4SF_V4SF,
21302 VOID_FTYPE_PV2DF_V2DF_V2DF
21305 /* Builtin types */
21306 enum ix86_builtin_type
21309 FLOAT128_FTYPE_FLOAT128,
21311 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21312 INT_FTYPE_V8SF_V8SF_PTEST,
21313 INT_FTYPE_V4DI_V4DI_PTEST,
21314 INT_FTYPE_V4DF_V4DF_PTEST,
21315 INT_FTYPE_V4SF_V4SF_PTEST,
21316 INT_FTYPE_V2DI_V2DI_PTEST,
21317 INT_FTYPE_V2DF_V2DF_PTEST,
21352 V4SF_FTYPE_V4SF_VEC_MERGE,
21361 V2DF_FTYPE_V2DF_VEC_MERGE,
21372 V16QI_FTYPE_V16QI_V16QI,
21373 V16QI_FTYPE_V8HI_V8HI,
21374 V8QI_FTYPE_V8QI_V8QI,
21375 V8QI_FTYPE_V4HI_V4HI,
21376 V8HI_FTYPE_V8HI_V8HI,
21377 V8HI_FTYPE_V8HI_V8HI_COUNT,
21378 V8HI_FTYPE_V16QI_V16QI,
21379 V8HI_FTYPE_V4SI_V4SI,
21380 V8HI_FTYPE_V8HI_SI_COUNT,
21381 V8SF_FTYPE_V8SF_V8SF,
21382 V8SF_FTYPE_V8SF_V8SI,
21383 V4SI_FTYPE_V4SI_V4SI,
21384 V4SI_FTYPE_V4SI_V4SI_COUNT,
21385 V4SI_FTYPE_V8HI_V8HI,
21386 V4SI_FTYPE_V4SF_V4SF,
21387 V4SI_FTYPE_V2DF_V2DF,
21388 V4SI_FTYPE_V4SI_SI_COUNT,
21389 V4HI_FTYPE_V4HI_V4HI,
21390 V4HI_FTYPE_V4HI_V4HI_COUNT,
21391 V4HI_FTYPE_V8QI_V8QI,
21392 V4HI_FTYPE_V2SI_V2SI,
21393 V4HI_FTYPE_V4HI_SI_COUNT,
21394 V4DF_FTYPE_V4DF_V4DF,
21395 V4DF_FTYPE_V4DF_V4DI,
21396 V4SF_FTYPE_V4SF_V4SF,
21397 V4SF_FTYPE_V4SF_V4SF_SWAP,
21398 V4SF_FTYPE_V4SF_V4SI,
21399 V4SF_FTYPE_V4SF_V2SI,
21400 V4SF_FTYPE_V4SF_V2DF,
21401 V4SF_FTYPE_V4SF_DI,
21402 V4SF_FTYPE_V4SF_SI,
21403 V2DI_FTYPE_V2DI_V2DI,
21404 V2DI_FTYPE_V2DI_V2DI_COUNT,
21405 V2DI_FTYPE_V16QI_V16QI,
21406 V2DI_FTYPE_V4SI_V4SI,
21407 V2DI_FTYPE_V2DI_V16QI,
21408 V2DI_FTYPE_V2DF_V2DF,
21409 V2DI_FTYPE_V2DI_SI_COUNT,
21410 V2SI_FTYPE_V2SI_V2SI,
21411 V2SI_FTYPE_V2SI_V2SI_COUNT,
21412 V2SI_FTYPE_V4HI_V4HI,
21413 V2SI_FTYPE_V2SF_V2SF,
21414 V2SI_FTYPE_V2SI_SI_COUNT,
21415 V2DF_FTYPE_V2DF_V2DF,
21416 V2DF_FTYPE_V2DF_V2DF_SWAP,
21417 V2DF_FTYPE_V2DF_V4SF,
21418 V2DF_FTYPE_V2DF_V2DI,
21419 V2DF_FTYPE_V2DF_DI,
21420 V2DF_FTYPE_V2DF_SI,
21421 V2SF_FTYPE_V2SF_V2SF,
21422 V1DI_FTYPE_V1DI_V1DI,
21423 V1DI_FTYPE_V1DI_V1DI_COUNT,
21424 V1DI_FTYPE_V8QI_V8QI,
21425 V1DI_FTYPE_V2SI_V2SI,
21426 V1DI_FTYPE_V1DI_SI_COUNT,
21427 UINT64_FTYPE_UINT64_UINT64,
21428 UINT_FTYPE_UINT_UINT,
21429 UINT_FTYPE_UINT_USHORT,
21430 UINT_FTYPE_UINT_UCHAR,
21431 UINT16_FTYPE_UINT16_INT,
21432 UINT8_FTYPE_UINT8_INT,
21433 V8HI_FTYPE_V8HI_INT,
21434 V4SI_FTYPE_V4SI_INT,
21435 V4HI_FTYPE_V4HI_INT,
21436 V8SF_FTYPE_V8SF_INT,
21437 V4SI_FTYPE_V8SI_INT,
21438 V4SF_FTYPE_V8SF_INT,
21439 V2DF_FTYPE_V4DF_INT,
21440 V4DF_FTYPE_V4DF_INT,
21441 V4SF_FTYPE_V4SF_INT,
21442 V2DI_FTYPE_V2DI_INT,
21443 V2DI2TI_FTYPE_V2DI_INT,
21444 V2DF_FTYPE_V2DF_INT,
21445 V16QI_FTYPE_V16QI_V16QI_V16QI,
21446 V8SF_FTYPE_V8SF_V8SF_V8SF,
21447 V4DF_FTYPE_V4DF_V4DF_V4DF,
21448 V4SF_FTYPE_V4SF_V4SF_V4SF,
21449 V2DF_FTYPE_V2DF_V2DF_V2DF,
21450 V16QI_FTYPE_V16QI_V16QI_INT,
21451 V8SI_FTYPE_V8SI_V8SI_INT,
21452 V8SI_FTYPE_V8SI_V4SI_INT,
21453 V8HI_FTYPE_V8HI_V8HI_INT,
21454 V8SF_FTYPE_V8SF_V8SF_INT,
21455 V8SF_FTYPE_V8SF_V4SF_INT,
21456 V4SI_FTYPE_V4SI_V4SI_INT,
21457 V4DF_FTYPE_V4DF_V4DF_INT,
21458 V4DF_FTYPE_V4DF_V2DF_INT,
21459 V4SF_FTYPE_V4SF_V4SF_INT,
21460 V2DI_FTYPE_V2DI_V2DI_INT,
21461 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21462 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21463 V2DF_FTYPE_V2DF_V2DF_INT,
21464 V2DI_FTYPE_V2DI_UINT_UINT,
21465 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21468 /* Special builtins with variable number of arguments. */
21469 static const struct builtin_description bdesc_special_args[] =
21471 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
21472 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
21475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21478 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21490 /* SSE or 3DNow!A */
21491 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21492 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21509 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21512 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21515 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21516 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21519 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21520 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21521 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21523 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21524 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21525 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21526 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21527 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21533 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21551 /* Builtins with variable number of arguments. */
21552 static const struct builtin_description bdesc_args[] =
21554 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
21555 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
21556 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
21557 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21558 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21559 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21560 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21563 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21564 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21565 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21566 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21567 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21568 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21570 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21571 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21572 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21573 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21574 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21575 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21576 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21577 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21579 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21580 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21582 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21583 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21584 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21585 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21587 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21588 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21589 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21590 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21591 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21592 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21594 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21595 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21596 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21597 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21598 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21599 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21601 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21602 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21603 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21605 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21607 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21608 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21609 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21610 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21611 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21612 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21614 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21615 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21616 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21617 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21618 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21619 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21621 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21622 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21623 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21624 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21627 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21628 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21629 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21630 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21632 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21633 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21634 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21635 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21636 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21637 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21638 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21639 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21640 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21641 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21642 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21643 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21644 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21645 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21646 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21649 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21650 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21651 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21652 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21653 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21654 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21659 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21660 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21661 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21662 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21663 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21664 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21665 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21666 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21667 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21668 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21670 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21672 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21673 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21674 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21675 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21676 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21677 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21678 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21679 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21681 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21682 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21683 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21684 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21685 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21686 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21687 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21688 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21689 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21690 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21691 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21692 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21693 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21694 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21695 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21696 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21697 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21698 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21699 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21700 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21701 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21702 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21704 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21705 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21706 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21707 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21709 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21710 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21711 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21712 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21714 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21715 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21716 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21717 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21718 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21720 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21721 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21722 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21724 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21726 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21727 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21730 /* SSE MMX or 3Dnow!A */
21731 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21732 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21733 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21735 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21736 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21737 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21738 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21740 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21741 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21743 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21764 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21765 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21768 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21771 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21772 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21773 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21774 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21791 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21792 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21793 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21794 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21795 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21796 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21799 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21801 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21802 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21803 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21804 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21806 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21808 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21809 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21815 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21817 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21818 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21819 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21820 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21821 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21822 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21823 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21824 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21830 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21831 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21832 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21835 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21836 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21838 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21839 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21840 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21841 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21843 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21844 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21846 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21847 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21848 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21849 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21850 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21851 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21853 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21854 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21855 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21871 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21880 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21885 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21886 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21887 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21888 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21889 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21890 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21893 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21894 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21895 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21896 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21897 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21898 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21900 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21901 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21902 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21903 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21911 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21912 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21917 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21918 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21921 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21922 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21924 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21925 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21926 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21927 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21928 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21929 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21932 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21933 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21934 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21935 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21936 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21937 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21939 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21940 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21941 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21942 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21943 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21944 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21945 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21946 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21947 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21948 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21949 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21950 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21951 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21952 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21953 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21954 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21955 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21956 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21957 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21958 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21959 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21960 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21961 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21962 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21965 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21966 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21969 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21970 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21971 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21972 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21973 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21974 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21975 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21976 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21977 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21978 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21980 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21981 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21982 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21983 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21984 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21985 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21986 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21987 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21988 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21989 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21990 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21991 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21992 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21994 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21995 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21996 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21997 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21998 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21999 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22000 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22001 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22002 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22003 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22004 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22005 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22007 /* SSE4.1 and SSE5 */
22008 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22009 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22010 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22011 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22013 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22014 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22015 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22018 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22019 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22020 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22021 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22022 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22025 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22026 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22027 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22028 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22031 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22032 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22034 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22035 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22036 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22037 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22040 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22043 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22044 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22047 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22048 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22051 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22057 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22058 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22059 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22060 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22061 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22062 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22063 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22064 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22065 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22066 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22067 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22068 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
22072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
22078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
22079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22088 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
22089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
22090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
22091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
22092 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
22093 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
22094 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22095 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
22096 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22114 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22116 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22118 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22120 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22130 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22132 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22139 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22140 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22141 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22145 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22146 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22147 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22148 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22150 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22151 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22153 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
22154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22158 enum multi_arg_type {
22168 MULTI_ARG_3_PERMPS,
22169 MULTI_ARG_3_PERMPD,
22176 MULTI_ARG_2_DI_IMM,
22177 MULTI_ARG_2_SI_IMM,
22178 MULTI_ARG_2_HI_IMM,
22179 MULTI_ARG_2_QI_IMM,
22180 MULTI_ARG_2_SF_CMP,
22181 MULTI_ARG_2_DF_CMP,
22182 MULTI_ARG_2_DI_CMP,
22183 MULTI_ARG_2_SI_CMP,
22184 MULTI_ARG_2_HI_CMP,
22185 MULTI_ARG_2_QI_CMP,
22208 static const struct builtin_description bdesc_multi_arg[] =
22210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22219 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22228 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22229 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22232 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22236 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22237 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22238 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22239 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22240 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22241 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22242 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22243 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22244 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22245 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22246 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22247 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22248 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22249 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22250 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22251 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22252 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22253 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22254 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22255 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22256 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22257 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22258 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22259 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22260 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22261 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22263 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22266 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22269 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22270 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22275 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22276 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22277 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22278 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22279 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22280 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22281 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22282 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22283 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22284 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22288 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22304 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22370 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22412 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22446 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22447 in the current target ISA to allow the user to compile particular modules
22448 with different target specific options that differ from the command line
22451 ix86_init_mmx_sse_builtins (void)
22453 const struct builtin_description * d;
22456 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22457 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22458 tree V1DI_type_node
22459 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22460 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22461 tree V2DI_type_node
22462 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22463 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22464 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22465 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22466 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22467 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22468 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22470 tree pchar_type_node = build_pointer_type (char_type_node);
22471 tree pcchar_type_node
22472 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22473 tree pfloat_type_node = build_pointer_type (float_type_node);
22474 tree pcfloat_type_node
22475 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22476 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22477 tree pcv2sf_type_node
22478 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22479 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22480 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22483 tree int_ftype_v4sf_v4sf
22484 = build_function_type_list (integer_type_node,
22485 V4SF_type_node, V4SF_type_node, NULL_TREE);
22486 tree v4si_ftype_v4sf_v4sf
22487 = build_function_type_list (V4SI_type_node,
22488 V4SF_type_node, V4SF_type_node, NULL_TREE);
22489 /* MMX/SSE/integer conversions. */
22490 tree int_ftype_v4sf
22491 = build_function_type_list (integer_type_node,
22492 V4SF_type_node, NULL_TREE);
22493 tree int64_ftype_v4sf
22494 = build_function_type_list (long_long_integer_type_node,
22495 V4SF_type_node, NULL_TREE);
22496 tree int_ftype_v8qi
22497 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22498 tree v4sf_ftype_v4sf_int
22499 = build_function_type_list (V4SF_type_node,
22500 V4SF_type_node, integer_type_node, NULL_TREE);
22501 tree v4sf_ftype_v4sf_int64
22502 = build_function_type_list (V4SF_type_node,
22503 V4SF_type_node, long_long_integer_type_node,
22505 tree v4sf_ftype_v4sf_v2si
22506 = build_function_type_list (V4SF_type_node,
22507 V4SF_type_node, V2SI_type_node, NULL_TREE);
22509 /* Miscellaneous. */
22510 tree v8qi_ftype_v4hi_v4hi
22511 = build_function_type_list (V8QI_type_node,
22512 V4HI_type_node, V4HI_type_node, NULL_TREE);
22513 tree v4hi_ftype_v2si_v2si
22514 = build_function_type_list (V4HI_type_node,
22515 V2SI_type_node, V2SI_type_node, NULL_TREE);
22516 tree v4sf_ftype_v4sf_v4sf_int
22517 = build_function_type_list (V4SF_type_node,
22518 V4SF_type_node, V4SF_type_node,
22519 integer_type_node, NULL_TREE);
22520 tree v2si_ftype_v4hi_v4hi
22521 = build_function_type_list (V2SI_type_node,
22522 V4HI_type_node, V4HI_type_node, NULL_TREE);
22523 tree v4hi_ftype_v4hi_int
22524 = build_function_type_list (V4HI_type_node,
22525 V4HI_type_node, integer_type_node, NULL_TREE);
22526 tree v2si_ftype_v2si_int
22527 = build_function_type_list (V2SI_type_node,
22528 V2SI_type_node, integer_type_node, NULL_TREE);
22529 tree v1di_ftype_v1di_int
22530 = build_function_type_list (V1DI_type_node,
22531 V1DI_type_node, integer_type_node, NULL_TREE);
22533 tree void_ftype_void
22534 = build_function_type (void_type_node, void_list_node);
22535 tree void_ftype_unsigned
22536 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22537 tree void_ftype_unsigned_unsigned
22538 = build_function_type_list (void_type_node, unsigned_type_node,
22539 unsigned_type_node, NULL_TREE);
22540 tree void_ftype_pcvoid_unsigned_unsigned
22541 = build_function_type_list (void_type_node, const_ptr_type_node,
22542 unsigned_type_node, unsigned_type_node,
22544 tree unsigned_ftype_void
22545 = build_function_type (unsigned_type_node, void_list_node);
22546 tree v2si_ftype_v4sf
22547 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22548 /* Loads/stores. */
22549 tree void_ftype_v8qi_v8qi_pchar
22550 = build_function_type_list (void_type_node,
22551 V8QI_type_node, V8QI_type_node,
22552 pchar_type_node, NULL_TREE);
22553 tree v4sf_ftype_pcfloat
22554 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22555 tree v4sf_ftype_v4sf_pcv2sf
22556 = build_function_type_list (V4SF_type_node,
22557 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22558 tree void_ftype_pv2sf_v4sf
22559 = build_function_type_list (void_type_node,
22560 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22561 tree void_ftype_pfloat_v4sf
22562 = build_function_type_list (void_type_node,
22563 pfloat_type_node, V4SF_type_node, NULL_TREE);
22564 tree void_ftype_pdi_di
22565 = build_function_type_list (void_type_node,
22566 pdi_type_node, long_long_unsigned_type_node,
22568 tree void_ftype_pv2di_v2di
22569 = build_function_type_list (void_type_node,
22570 pv2di_type_node, V2DI_type_node, NULL_TREE);
22571 /* Normal vector unops. */
22572 tree v4sf_ftype_v4sf
22573 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22574 tree v16qi_ftype_v16qi
22575 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22576 tree v8hi_ftype_v8hi
22577 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22578 tree v4si_ftype_v4si
22579 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22580 tree v8qi_ftype_v8qi
22581 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22582 tree v4hi_ftype_v4hi
22583 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22585 /* Normal vector binops. */
22586 tree v4sf_ftype_v4sf_v4sf
22587 = build_function_type_list (V4SF_type_node,
22588 V4SF_type_node, V4SF_type_node, NULL_TREE);
22589 tree v8qi_ftype_v8qi_v8qi
22590 = build_function_type_list (V8QI_type_node,
22591 V8QI_type_node, V8QI_type_node, NULL_TREE);
22592 tree v4hi_ftype_v4hi_v4hi
22593 = build_function_type_list (V4HI_type_node,
22594 V4HI_type_node, V4HI_type_node, NULL_TREE);
22595 tree v2si_ftype_v2si_v2si
22596 = build_function_type_list (V2SI_type_node,
22597 V2SI_type_node, V2SI_type_node, NULL_TREE);
22598 tree v1di_ftype_v1di_v1di
22599 = build_function_type_list (V1DI_type_node,
22600 V1DI_type_node, V1DI_type_node, NULL_TREE);
22601 tree v1di_ftype_v1di_v1di_int
22602 = build_function_type_list (V1DI_type_node,
22603 V1DI_type_node, V1DI_type_node,
22604 integer_type_node, NULL_TREE);
22605 tree v2si_ftype_v2sf
22606 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22607 tree v2sf_ftype_v2si
22608 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22609 tree v2si_ftype_v2si
22610 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22611 tree v2sf_ftype_v2sf
22612 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22613 tree v2sf_ftype_v2sf_v2sf
22614 = build_function_type_list (V2SF_type_node,
22615 V2SF_type_node, V2SF_type_node, NULL_TREE);
22616 tree v2si_ftype_v2sf_v2sf
22617 = build_function_type_list (V2SI_type_node,
22618 V2SF_type_node, V2SF_type_node, NULL_TREE);
22619 tree pint_type_node = build_pointer_type (integer_type_node);
22620 tree pdouble_type_node = build_pointer_type (double_type_node);
22621 tree pcdouble_type_node = build_pointer_type (
22622 build_type_variant (double_type_node, 1, 0));
22623 tree int_ftype_v2df_v2df
22624 = build_function_type_list (integer_type_node,
22625 V2DF_type_node, V2DF_type_node, NULL_TREE);
22627 tree void_ftype_pcvoid
22628 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22629 tree v4sf_ftype_v4si
22630 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22631 tree v4si_ftype_v4sf
22632 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22633 tree v2df_ftype_v4si
22634 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22635 tree v4si_ftype_v2df
22636 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22637 tree v4si_ftype_v2df_v2df
22638 = build_function_type_list (V4SI_type_node,
22639 V2DF_type_node, V2DF_type_node, NULL_TREE);
22640 tree v2si_ftype_v2df
22641 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22642 tree v4sf_ftype_v2df
22643 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22644 tree v2df_ftype_v2si
22645 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22646 tree v2df_ftype_v4sf
22647 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22648 tree int_ftype_v2df
22649 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22650 tree int64_ftype_v2df
22651 = build_function_type_list (long_long_integer_type_node,
22652 V2DF_type_node, NULL_TREE);
22653 tree v2df_ftype_v2df_int
22654 = build_function_type_list (V2DF_type_node,
22655 V2DF_type_node, integer_type_node, NULL_TREE);
22656 tree v2df_ftype_v2df_int64
22657 = build_function_type_list (V2DF_type_node,
22658 V2DF_type_node, long_long_integer_type_node,
22660 tree v4sf_ftype_v4sf_v2df
22661 = build_function_type_list (V4SF_type_node,
22662 V4SF_type_node, V2DF_type_node, NULL_TREE);
22663 tree v2df_ftype_v2df_v4sf
22664 = build_function_type_list (V2DF_type_node,
22665 V2DF_type_node, V4SF_type_node, NULL_TREE);
22666 tree v2df_ftype_v2df_v2df_int
22667 = build_function_type_list (V2DF_type_node,
22668 V2DF_type_node, V2DF_type_node,
22671 tree v2df_ftype_v2df_pcdouble
22672 = build_function_type_list (V2DF_type_node,
22673 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22674 tree void_ftype_pdouble_v2df
22675 = build_function_type_list (void_type_node,
22676 pdouble_type_node, V2DF_type_node, NULL_TREE);
22677 tree void_ftype_pint_int
22678 = build_function_type_list (void_type_node,
22679 pint_type_node, integer_type_node, NULL_TREE);
22680 tree void_ftype_v16qi_v16qi_pchar
22681 = build_function_type_list (void_type_node,
22682 V16QI_type_node, V16QI_type_node,
22683 pchar_type_node, NULL_TREE);
22684 tree v2df_ftype_pcdouble
22685 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22686 tree v2df_ftype_v2df_v2df
22687 = build_function_type_list (V2DF_type_node,
22688 V2DF_type_node, V2DF_type_node, NULL_TREE);
22689 tree v16qi_ftype_v16qi_v16qi
22690 = build_function_type_list (V16QI_type_node,
22691 V16QI_type_node, V16QI_type_node, NULL_TREE);
22692 tree v8hi_ftype_v8hi_v8hi
22693 = build_function_type_list (V8HI_type_node,
22694 V8HI_type_node, V8HI_type_node, NULL_TREE);
22695 tree v4si_ftype_v4si_v4si
22696 = build_function_type_list (V4SI_type_node,
22697 V4SI_type_node, V4SI_type_node, NULL_TREE);
22698 tree v2di_ftype_v2di_v2di
22699 = build_function_type_list (V2DI_type_node,
22700 V2DI_type_node, V2DI_type_node, NULL_TREE);
22701 tree v2di_ftype_v2df_v2df
22702 = build_function_type_list (V2DI_type_node,
22703 V2DF_type_node, V2DF_type_node, NULL_TREE);
22704 tree v2df_ftype_v2df
22705 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22706 tree v2di_ftype_v2di_int
22707 = build_function_type_list (V2DI_type_node,
22708 V2DI_type_node, integer_type_node, NULL_TREE);
22709 tree v2di_ftype_v2di_v2di_int
22710 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22711 V2DI_type_node, integer_type_node, NULL_TREE);
22712 tree v4si_ftype_v4si_int
22713 = build_function_type_list (V4SI_type_node,
22714 V4SI_type_node, integer_type_node, NULL_TREE);
22715 tree v8hi_ftype_v8hi_int
22716 = build_function_type_list (V8HI_type_node,
22717 V8HI_type_node, integer_type_node, NULL_TREE);
22718 tree v4si_ftype_v8hi_v8hi
22719 = build_function_type_list (V4SI_type_node,
22720 V8HI_type_node, V8HI_type_node, NULL_TREE);
22721 tree v1di_ftype_v8qi_v8qi
22722 = build_function_type_list (V1DI_type_node,
22723 V8QI_type_node, V8QI_type_node, NULL_TREE);
22724 tree v1di_ftype_v2si_v2si
22725 = build_function_type_list (V1DI_type_node,
22726 V2SI_type_node, V2SI_type_node, NULL_TREE);
22727 tree v2di_ftype_v16qi_v16qi
22728 = build_function_type_list (V2DI_type_node,
22729 V16QI_type_node, V16QI_type_node, NULL_TREE);
22730 tree v2di_ftype_v4si_v4si
22731 = build_function_type_list (V2DI_type_node,
22732 V4SI_type_node, V4SI_type_node, NULL_TREE);
22733 tree int_ftype_v16qi
22734 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22735 tree v16qi_ftype_pcchar
22736 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22737 tree void_ftype_pchar_v16qi
22738 = build_function_type_list (void_type_node,
22739 pchar_type_node, V16QI_type_node, NULL_TREE);
22741 tree v2di_ftype_v2di_unsigned_unsigned
22742 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22743 unsigned_type_node, unsigned_type_node,
22745 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22746 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22747 unsigned_type_node, unsigned_type_node,
22749 tree v2di_ftype_v2di_v16qi
22750 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22752 tree v2df_ftype_v2df_v2df_v2df
22753 = build_function_type_list (V2DF_type_node,
22754 V2DF_type_node, V2DF_type_node,
22755 V2DF_type_node, NULL_TREE);
22756 tree v4sf_ftype_v4sf_v4sf_v4sf
22757 = build_function_type_list (V4SF_type_node,
22758 V4SF_type_node, V4SF_type_node,
22759 V4SF_type_node, NULL_TREE);
22760 tree v8hi_ftype_v16qi
22761 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22763 tree v4si_ftype_v16qi
22764 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22766 tree v2di_ftype_v16qi
22767 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22769 tree v4si_ftype_v8hi
22770 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22772 tree v2di_ftype_v8hi
22773 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22775 tree v2di_ftype_v4si
22776 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22778 tree v2di_ftype_pv2di
22779 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22781 tree v16qi_ftype_v16qi_v16qi_int
22782 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22783 V16QI_type_node, integer_type_node,
22785 tree v16qi_ftype_v16qi_v16qi_v16qi
22786 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22787 V16QI_type_node, V16QI_type_node,
22789 tree v8hi_ftype_v8hi_v8hi_int
22790 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22791 V8HI_type_node, integer_type_node,
22793 tree v4si_ftype_v4si_v4si_int
22794 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22795 V4SI_type_node, integer_type_node,
22797 tree int_ftype_v2di_v2di
22798 = build_function_type_list (integer_type_node,
22799 V2DI_type_node, V2DI_type_node,
22801 tree int_ftype_v16qi_int_v16qi_int_int
22802 = build_function_type_list (integer_type_node,
22809 tree v16qi_ftype_v16qi_int_v16qi_int_int
22810 = build_function_type_list (V16QI_type_node,
22817 tree int_ftype_v16qi_v16qi_int
22818 = build_function_type_list (integer_type_node,
22824 /* SSE5 instructions */
22825 tree v2di_ftype_v2di_v2di_v2di
22826 = build_function_type_list (V2DI_type_node,
22832 tree v4si_ftype_v4si_v4si_v4si
22833 = build_function_type_list (V4SI_type_node,
22839 tree v4si_ftype_v4si_v4si_v2di
22840 = build_function_type_list (V4SI_type_node,
22846 tree v8hi_ftype_v8hi_v8hi_v8hi
22847 = build_function_type_list (V8HI_type_node,
22853 tree v8hi_ftype_v8hi_v8hi_v4si
22854 = build_function_type_list (V8HI_type_node,
22860 tree v2df_ftype_v2df_v2df_v16qi
22861 = build_function_type_list (V2DF_type_node,
22867 tree v4sf_ftype_v4sf_v4sf_v16qi
22868 = build_function_type_list (V4SF_type_node,
22874 tree v2di_ftype_v2di_si
22875 = build_function_type_list (V2DI_type_node,
22880 tree v4si_ftype_v4si_si
22881 = build_function_type_list (V4SI_type_node,
22886 tree v8hi_ftype_v8hi_si
22887 = build_function_type_list (V8HI_type_node,
22892 tree v16qi_ftype_v16qi_si
22893 = build_function_type_list (V16QI_type_node,
22897 tree v4sf_ftype_v4hi
22898 = build_function_type_list (V4SF_type_node,
22902 tree v4hi_ftype_v4sf
22903 = build_function_type_list (V4HI_type_node,
22907 tree v2di_ftype_v2di
22908 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22910 tree v16qi_ftype_v8hi_v8hi
22911 = build_function_type_list (V16QI_type_node,
22912 V8HI_type_node, V8HI_type_node,
22914 tree v8hi_ftype_v4si_v4si
22915 = build_function_type_list (V8HI_type_node,
22916 V4SI_type_node, V4SI_type_node,
22918 tree v8hi_ftype_v16qi_v16qi
22919 = build_function_type_list (V8HI_type_node,
22920 V16QI_type_node, V16QI_type_node,
22922 tree v4hi_ftype_v8qi_v8qi
22923 = build_function_type_list (V4HI_type_node,
22924 V8QI_type_node, V8QI_type_node,
22926 tree unsigned_ftype_unsigned_uchar
22927 = build_function_type_list (unsigned_type_node,
22928 unsigned_type_node,
22929 unsigned_char_type_node,
22931 tree unsigned_ftype_unsigned_ushort
22932 = build_function_type_list (unsigned_type_node,
22933 unsigned_type_node,
22934 short_unsigned_type_node,
22936 tree unsigned_ftype_unsigned_unsigned
22937 = build_function_type_list (unsigned_type_node,
22938 unsigned_type_node,
22939 unsigned_type_node,
22941 tree uint64_ftype_uint64_uint64
22942 = build_function_type_list (long_long_unsigned_type_node,
22943 long_long_unsigned_type_node,
22944 long_long_unsigned_type_node,
22946 tree float_ftype_float
22947 = build_function_type_list (float_type_node,
22952 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22954 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22956 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22958 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22960 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22962 tree v8sf_ftype_v8sf
22963 = build_function_type_list (V8SF_type_node,
22966 tree v8si_ftype_v8sf
22967 = build_function_type_list (V8SI_type_node,
22970 tree v8sf_ftype_v8si
22971 = build_function_type_list (V8SF_type_node,
22974 tree v4si_ftype_v4df
22975 = build_function_type_list (V4SI_type_node,
22978 tree v4df_ftype_v4df
22979 = build_function_type_list (V4DF_type_node,
22982 tree v4df_ftype_v4si
22983 = build_function_type_list (V4DF_type_node,
22986 tree v4df_ftype_v4sf
22987 = build_function_type_list (V4DF_type_node,
22990 tree v4sf_ftype_v4df
22991 = build_function_type_list (V4SF_type_node,
22994 tree v8sf_ftype_v8sf_v8sf
22995 = build_function_type_list (V8SF_type_node,
22996 V8SF_type_node, V8SF_type_node,
22998 tree v4df_ftype_v4df_v4df
22999 = build_function_type_list (V4DF_type_node,
23000 V4DF_type_node, V4DF_type_node,
23002 tree v8sf_ftype_v8sf_int
23003 = build_function_type_list (V8SF_type_node,
23004 V8SF_type_node, integer_type_node,
23006 tree v4si_ftype_v8si_int
23007 = build_function_type_list (V4SI_type_node,
23008 V8SI_type_node, integer_type_node,
23010 tree v4df_ftype_v4df_int
23011 = build_function_type_list (V4DF_type_node,
23012 V4DF_type_node, integer_type_node,
23014 tree v4sf_ftype_v8sf_int
23015 = build_function_type_list (V4SF_type_node,
23016 V8SF_type_node, integer_type_node,
23018 tree v2df_ftype_v4df_int
23019 = build_function_type_list (V2DF_type_node,
23020 V4DF_type_node, integer_type_node,
23022 tree v8sf_ftype_v8sf_v8sf_int
23023 = build_function_type_list (V8SF_type_node,
23024 V8SF_type_node, V8SF_type_node,
23027 tree v8sf_ftype_v8sf_v8sf_v8sf
23028 = build_function_type_list (V8SF_type_node,
23029 V8SF_type_node, V8SF_type_node,
23032 tree v4df_ftype_v4df_v4df_v4df
23033 = build_function_type_list (V4DF_type_node,
23034 V4DF_type_node, V4DF_type_node,
23037 tree v8si_ftype_v8si_v8si_int
23038 = build_function_type_list (V8SI_type_node,
23039 V8SI_type_node, V8SI_type_node,
23042 tree v4df_ftype_v4df_v4df_int
23043 = build_function_type_list (V4DF_type_node,
23044 V4DF_type_node, V4DF_type_node,
23047 tree v8sf_ftype_pcfloat
23048 = build_function_type_list (V8SF_type_node,
23051 tree v4df_ftype_pcdouble
23052 = build_function_type_list (V4DF_type_node,
23053 pcdouble_type_node,
23055 tree pcv4sf_type_node
23056 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
23057 tree pcv2df_type_node
23058 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
23059 tree v8sf_ftype_pcv4sf
23060 = build_function_type_list (V8SF_type_node,
23063 tree v4df_ftype_pcv2df
23064 = build_function_type_list (V4DF_type_node,
23067 tree v32qi_ftype_pcchar
23068 = build_function_type_list (V32QI_type_node,
23071 tree void_ftype_pchar_v32qi
23072 = build_function_type_list (void_type_node,
23073 pchar_type_node, V32QI_type_node,
23075 tree v8si_ftype_v8si_v4si_int
23076 = build_function_type_list (V8SI_type_node,
23077 V8SI_type_node, V4SI_type_node,
23080 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
23081 tree void_ftype_pv4di_v4di
23082 = build_function_type_list (void_type_node,
23083 pv4di_type_node, V4DI_type_node,
23085 tree v8sf_ftype_v8sf_v4sf_int
23086 = build_function_type_list (V8SF_type_node,
23087 V8SF_type_node, V4SF_type_node,
23090 tree v4df_ftype_v4df_v2df_int
23091 = build_function_type_list (V4DF_type_node,
23092 V4DF_type_node, V2DF_type_node,
23095 tree void_ftype_pfloat_v8sf
23096 = build_function_type_list (void_type_node,
23097 pfloat_type_node, V8SF_type_node,
23099 tree void_ftype_pdouble_v4df
23100 = build_function_type_list (void_type_node,
23101 pdouble_type_node, V4DF_type_node,
23103 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
23104 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
23105 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
23106 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
23107 tree pcv8sf_type_node
23108 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
23109 tree pcv4df_type_node
23110 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
23111 tree v8sf_ftype_pcv8sf_v8sf
23112 = build_function_type_list (V8SF_type_node,
23113 pcv8sf_type_node, V8SF_type_node,
23115 tree v4df_ftype_pcv4df_v4df
23116 = build_function_type_list (V4DF_type_node,
23117 pcv4df_type_node, V4DF_type_node,
23119 tree v4sf_ftype_pcv4sf_v4sf
23120 = build_function_type_list (V4SF_type_node,
23121 pcv4sf_type_node, V4SF_type_node,
23123 tree v2df_ftype_pcv2df_v2df
23124 = build_function_type_list (V2DF_type_node,
23125 pcv2df_type_node, V2DF_type_node,
23127 tree void_ftype_pv8sf_v8sf_v8sf
23128 = build_function_type_list (void_type_node,
23129 pv8sf_type_node, V8SF_type_node,
23132 tree void_ftype_pv4df_v4df_v4df
23133 = build_function_type_list (void_type_node,
23134 pv4df_type_node, V4DF_type_node,
23137 tree void_ftype_pv4sf_v4sf_v4sf
23138 = build_function_type_list (void_type_node,
23139 pv4sf_type_node, V4SF_type_node,
23142 tree void_ftype_pv2df_v2df_v2df
23143 = build_function_type_list (void_type_node,
23144 pv2df_type_node, V2DF_type_node,
23147 tree v4df_ftype_v2df
23148 = build_function_type_list (V4DF_type_node,
23151 tree v8sf_ftype_v4sf
23152 = build_function_type_list (V8SF_type_node,
23155 tree v8si_ftype_v4si
23156 = build_function_type_list (V8SI_type_node,
23159 tree v2df_ftype_v4df
23160 = build_function_type_list (V2DF_type_node,
23163 tree v4sf_ftype_v8sf
23164 = build_function_type_list (V4SF_type_node,
23167 tree v4si_ftype_v8si
23168 = build_function_type_list (V4SI_type_node,
23171 tree int_ftype_v4df
23172 = build_function_type_list (integer_type_node,
23175 tree int_ftype_v8sf
23176 = build_function_type_list (integer_type_node,
23179 tree int_ftype_v8sf_v8sf
23180 = build_function_type_list (integer_type_node,
23181 V8SF_type_node, V8SF_type_node,
23183 tree int_ftype_v4di_v4di
23184 = build_function_type_list (integer_type_node,
23185 V4DI_type_node, V4DI_type_node,
23187 tree int_ftype_v4df_v4df
23188 = build_function_type_list (integer_type_node,
23189 V4DF_type_node, V4DF_type_node,
23191 tree v8sf_ftype_v8sf_v8si
23192 = build_function_type_list (V8SF_type_node,
23193 V8SF_type_node, V8SI_type_node,
23195 tree v4df_ftype_v4df_v4di
23196 = build_function_type_list (V4DF_type_node,
23197 V4DF_type_node, V4DI_type_node,
23199 tree v4sf_ftype_v4sf_v4si
23200 = build_function_type_list (V4SF_type_node,
23201 V4SF_type_node, V4SI_type_node, NULL_TREE);
23202 tree v2df_ftype_v2df_v2di
23203 = build_function_type_list (V2DF_type_node,
23204 V2DF_type_node, V2DI_type_node, NULL_TREE);
23206 /* Integer intrinsics. */
23207 tree uint64_ftype_void
23208 = build_function_type (long_long_unsigned_type_node,
23211 = build_function_type_list (integer_type_node,
23212 integer_type_node, NULL_TREE);
23213 tree int64_ftype_int64
23214 = build_function_type_list (long_long_integer_type_node,
23215 long_long_integer_type_node,
23217 tree uint64_ftype_int
23218 = build_function_type_list (long_long_unsigned_type_node,
23219 integer_type_node, NULL_TREE);
23220 tree punsigned_type_node = build_pointer_type (unsigned_type_node);
23221 tree uint64_ftype_punsigned
23222 = build_function_type_list (long_long_unsigned_type_node,
23223 punsigned_type_node, NULL_TREE);
23224 tree ushort_ftype_ushort_int
23225 = build_function_type_list (short_unsigned_type_node,
23226 short_unsigned_type_node,
23229 tree uchar_ftype_uchar_int
23230 = build_function_type_list (unsigned_char_type_node,
23231 unsigned_char_type_node,
23237 /* Add all special builtins with variable number of operands. */
23238 for (i = 0, d = bdesc_special_args;
23239 i < ARRAY_SIZE (bdesc_special_args);
23247 switch ((enum ix86_special_builtin_type) d->flag)
23249 case VOID_FTYPE_VOID:
23250 type = void_ftype_void;
23252 case UINT64_FTYPE_VOID:
23253 type = uint64_ftype_void;
23255 case UINT64_FTYPE_PUNSIGNED:
23256 type = uint64_ftype_punsigned;
23258 case V32QI_FTYPE_PCCHAR:
23259 type = v32qi_ftype_pcchar;
23261 case V16QI_FTYPE_PCCHAR:
23262 type = v16qi_ftype_pcchar;
23264 case V8SF_FTYPE_PCV4SF:
23265 type = v8sf_ftype_pcv4sf;
23267 case V8SF_FTYPE_PCFLOAT:
23268 type = v8sf_ftype_pcfloat;
23270 case V4DF_FTYPE_PCV2DF:
23271 type = v4df_ftype_pcv2df;
23273 case V4DF_FTYPE_PCDOUBLE:
23274 type = v4df_ftype_pcdouble;
23276 case V4SF_FTYPE_PCFLOAT:
23277 type = v4sf_ftype_pcfloat;
23279 case V2DI_FTYPE_PV2DI:
23280 type = v2di_ftype_pv2di;
23282 case V2DF_FTYPE_PCDOUBLE:
23283 type = v2df_ftype_pcdouble;
23285 case V8SF_FTYPE_PCV8SF_V8SF:
23286 type = v8sf_ftype_pcv8sf_v8sf;
23288 case V4DF_FTYPE_PCV4DF_V4DF:
23289 type = v4df_ftype_pcv4df_v4df;
23291 case V4SF_FTYPE_V4SF_PCV2SF:
23292 type = v4sf_ftype_v4sf_pcv2sf;
23294 case V4SF_FTYPE_PCV4SF_V4SF:
23295 type = v4sf_ftype_pcv4sf_v4sf;
23297 case V2DF_FTYPE_V2DF_PCDOUBLE:
23298 type = v2df_ftype_v2df_pcdouble;
23300 case V2DF_FTYPE_PCV2DF_V2DF:
23301 type = v2df_ftype_pcv2df_v2df;
23303 case VOID_FTYPE_PV2SF_V4SF:
23304 type = void_ftype_pv2sf_v4sf;
23306 case VOID_FTYPE_PV4DI_V4DI:
23307 type = void_ftype_pv4di_v4di;
23309 case VOID_FTYPE_PV2DI_V2DI:
23310 type = void_ftype_pv2di_v2di;
23312 case VOID_FTYPE_PCHAR_V32QI:
23313 type = void_ftype_pchar_v32qi;
23315 case VOID_FTYPE_PCHAR_V16QI:
23316 type = void_ftype_pchar_v16qi;
23318 case VOID_FTYPE_PFLOAT_V8SF:
23319 type = void_ftype_pfloat_v8sf;
23321 case VOID_FTYPE_PFLOAT_V4SF:
23322 type = void_ftype_pfloat_v4sf;
23324 case VOID_FTYPE_PDOUBLE_V4DF:
23325 type = void_ftype_pdouble_v4df;
23327 case VOID_FTYPE_PDOUBLE_V2DF:
23328 type = void_ftype_pdouble_v2df;
23330 case VOID_FTYPE_PDI_DI:
23331 type = void_ftype_pdi_di;
23333 case VOID_FTYPE_PINT_INT:
23334 type = void_ftype_pint_int;
23336 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23337 type = void_ftype_pv8sf_v8sf_v8sf;
23339 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23340 type = void_ftype_pv4df_v4df_v4df;
23342 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23343 type = void_ftype_pv4sf_v4sf_v4sf;
23345 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23346 type = void_ftype_pv2df_v2df_v2df;
23349 gcc_unreachable ();
23352 def_builtin (d->mask, d->name, type, d->code);
23355 /* Add all builtins with variable number of operands. */
23356 for (i = 0, d = bdesc_args;
23357 i < ARRAY_SIZE (bdesc_args);
23365 switch ((enum ix86_builtin_type) d->flag)
23367 case FLOAT_FTYPE_FLOAT:
23368 type = float_ftype_float;
23370 case INT_FTYPE_V8SF_V8SF_PTEST:
23371 type = int_ftype_v8sf_v8sf;
23373 case INT_FTYPE_V4DI_V4DI_PTEST:
23374 type = int_ftype_v4di_v4di;
23376 case INT_FTYPE_V4DF_V4DF_PTEST:
23377 type = int_ftype_v4df_v4df;
23379 case INT_FTYPE_V4SF_V4SF_PTEST:
23380 type = int_ftype_v4sf_v4sf;
23382 case INT_FTYPE_V2DI_V2DI_PTEST:
23383 type = int_ftype_v2di_v2di;
23385 case INT_FTYPE_V2DF_V2DF_PTEST:
23386 type = int_ftype_v2df_v2df;
23388 case INT_FTYPE_INT:
23389 type = int_ftype_int;
23391 case UINT64_FTYPE_INT:
23392 type = uint64_ftype_int;
23394 case INT64_FTYPE_INT64:
23395 type = int64_ftype_int64;
23397 case INT64_FTYPE_V4SF:
23398 type = int64_ftype_v4sf;
23400 case INT64_FTYPE_V2DF:
23401 type = int64_ftype_v2df;
23403 case INT_FTYPE_V16QI:
23404 type = int_ftype_v16qi;
23406 case INT_FTYPE_V8QI:
23407 type = int_ftype_v8qi;
23409 case INT_FTYPE_V8SF:
23410 type = int_ftype_v8sf;
23412 case INT_FTYPE_V4DF:
23413 type = int_ftype_v4df;
23415 case INT_FTYPE_V4SF:
23416 type = int_ftype_v4sf;
23418 case INT_FTYPE_V2DF:
23419 type = int_ftype_v2df;
23421 case V16QI_FTYPE_V16QI:
23422 type = v16qi_ftype_v16qi;
23424 case V8SI_FTYPE_V8SF:
23425 type = v8si_ftype_v8sf;
23427 case V8SI_FTYPE_V4SI:
23428 type = v8si_ftype_v4si;
23430 case V8HI_FTYPE_V8HI:
23431 type = v8hi_ftype_v8hi;
23433 case V8HI_FTYPE_V16QI:
23434 type = v8hi_ftype_v16qi;
23436 case V8QI_FTYPE_V8QI:
23437 type = v8qi_ftype_v8qi;
23439 case V8SF_FTYPE_V8SF:
23440 type = v8sf_ftype_v8sf;
23442 case V8SF_FTYPE_V8SI:
23443 type = v8sf_ftype_v8si;
23445 case V8SF_FTYPE_V4SF:
23446 type = v8sf_ftype_v4sf;
23448 case V4SI_FTYPE_V4DF:
23449 type = v4si_ftype_v4df;
23451 case V4SI_FTYPE_V4SI:
23452 type = v4si_ftype_v4si;
23454 case V4SI_FTYPE_V16QI:
23455 type = v4si_ftype_v16qi;
23457 case V4SI_FTYPE_V8SI:
23458 type = v4si_ftype_v8si;
23460 case V4SI_FTYPE_V8HI:
23461 type = v4si_ftype_v8hi;
23463 case V4SI_FTYPE_V4SF:
23464 type = v4si_ftype_v4sf;
23466 case V4SI_FTYPE_V2DF:
23467 type = v4si_ftype_v2df;
23469 case V4HI_FTYPE_V4HI:
23470 type = v4hi_ftype_v4hi;
23472 case V4DF_FTYPE_V4DF:
23473 type = v4df_ftype_v4df;
23475 case V4DF_FTYPE_V4SI:
23476 type = v4df_ftype_v4si;
23478 case V4DF_FTYPE_V4SF:
23479 type = v4df_ftype_v4sf;
23481 case V4DF_FTYPE_V2DF:
23482 type = v4df_ftype_v2df;
23484 case V4SF_FTYPE_V4SF:
23485 case V4SF_FTYPE_V4SF_VEC_MERGE:
23486 type = v4sf_ftype_v4sf;
23488 case V4SF_FTYPE_V8SF:
23489 type = v4sf_ftype_v8sf;
23491 case V4SF_FTYPE_V4SI:
23492 type = v4sf_ftype_v4si;
23494 case V4SF_FTYPE_V4DF:
23495 type = v4sf_ftype_v4df;
23497 case V4SF_FTYPE_V2DF:
23498 type = v4sf_ftype_v2df;
23500 case V2DI_FTYPE_V2DI:
23501 type = v2di_ftype_v2di;
23503 case V2DI_FTYPE_V16QI:
23504 type = v2di_ftype_v16qi;
23506 case V2DI_FTYPE_V8HI:
23507 type = v2di_ftype_v8hi;
23509 case V2DI_FTYPE_V4SI:
23510 type = v2di_ftype_v4si;
23512 case V2SI_FTYPE_V2SI:
23513 type = v2si_ftype_v2si;
23515 case V2SI_FTYPE_V4SF:
23516 type = v2si_ftype_v4sf;
23518 case V2SI_FTYPE_V2DF:
23519 type = v2si_ftype_v2df;
23521 case V2SI_FTYPE_V2SF:
23522 type = v2si_ftype_v2sf;
23524 case V2DF_FTYPE_V4DF:
23525 type = v2df_ftype_v4df;
23527 case V2DF_FTYPE_V4SF:
23528 type = v2df_ftype_v4sf;
23530 case V2DF_FTYPE_V2DF:
23531 case V2DF_FTYPE_V2DF_VEC_MERGE:
23532 type = v2df_ftype_v2df;
23534 case V2DF_FTYPE_V2SI:
23535 type = v2df_ftype_v2si;
23537 case V2DF_FTYPE_V4SI:
23538 type = v2df_ftype_v4si;
23540 case V2SF_FTYPE_V2SF:
23541 type = v2sf_ftype_v2sf;
23543 case V2SF_FTYPE_V2SI:
23544 type = v2sf_ftype_v2si;
23546 case V16QI_FTYPE_V16QI_V16QI:
23547 type = v16qi_ftype_v16qi_v16qi;
23549 case V16QI_FTYPE_V8HI_V8HI:
23550 type = v16qi_ftype_v8hi_v8hi;
23552 case V8QI_FTYPE_V8QI_V8QI:
23553 type = v8qi_ftype_v8qi_v8qi;
23555 case V8QI_FTYPE_V4HI_V4HI:
23556 type = v8qi_ftype_v4hi_v4hi;
23558 case V8HI_FTYPE_V8HI_V8HI:
23559 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23560 type = v8hi_ftype_v8hi_v8hi;
23562 case V8HI_FTYPE_V16QI_V16QI:
23563 type = v8hi_ftype_v16qi_v16qi;
23565 case V8HI_FTYPE_V4SI_V4SI:
23566 type = v8hi_ftype_v4si_v4si;
23568 case V8HI_FTYPE_V8HI_SI_COUNT:
23569 type = v8hi_ftype_v8hi_int;
23571 case V8SF_FTYPE_V8SF_V8SF:
23572 type = v8sf_ftype_v8sf_v8sf;
23574 case V8SF_FTYPE_V8SF_V8SI:
23575 type = v8sf_ftype_v8sf_v8si;
23577 case V4SI_FTYPE_V4SI_V4SI:
23578 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23579 type = v4si_ftype_v4si_v4si;
23581 case V4SI_FTYPE_V8HI_V8HI:
23582 type = v4si_ftype_v8hi_v8hi;
23584 case V4SI_FTYPE_V4SF_V4SF:
23585 type = v4si_ftype_v4sf_v4sf;
23587 case V4SI_FTYPE_V2DF_V2DF:
23588 type = v4si_ftype_v2df_v2df;
23590 case V4SI_FTYPE_V4SI_SI_COUNT:
23591 type = v4si_ftype_v4si_int;
23593 case V4HI_FTYPE_V4HI_V4HI:
23594 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23595 type = v4hi_ftype_v4hi_v4hi;
23597 case V4HI_FTYPE_V8QI_V8QI:
23598 type = v4hi_ftype_v8qi_v8qi;
23600 case V4HI_FTYPE_V2SI_V2SI:
23601 type = v4hi_ftype_v2si_v2si;
23603 case V4HI_FTYPE_V4HI_SI_COUNT:
23604 type = v4hi_ftype_v4hi_int;
23606 case V4DF_FTYPE_V4DF_V4DF:
23607 type = v4df_ftype_v4df_v4df;
23609 case V4DF_FTYPE_V4DF_V4DI:
23610 type = v4df_ftype_v4df_v4di;
23612 case V4SF_FTYPE_V4SF_V4SF:
23613 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23614 type = v4sf_ftype_v4sf_v4sf;
23616 case V4SF_FTYPE_V4SF_V4SI:
23617 type = v4sf_ftype_v4sf_v4si;
23619 case V4SF_FTYPE_V4SF_V2SI:
23620 type = v4sf_ftype_v4sf_v2si;
23622 case V4SF_FTYPE_V4SF_V2DF:
23623 type = v4sf_ftype_v4sf_v2df;
23625 case V4SF_FTYPE_V4SF_DI:
23626 type = v4sf_ftype_v4sf_int64;
23628 case V4SF_FTYPE_V4SF_SI:
23629 type = v4sf_ftype_v4sf_int;
23631 case V2DI_FTYPE_V2DI_V2DI:
23632 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23633 type = v2di_ftype_v2di_v2di;
23635 case V2DI_FTYPE_V16QI_V16QI:
23636 type = v2di_ftype_v16qi_v16qi;
23638 case V2DI_FTYPE_V4SI_V4SI:
23639 type = v2di_ftype_v4si_v4si;
23641 case V2DI_FTYPE_V2DI_V16QI:
23642 type = v2di_ftype_v2di_v16qi;
23644 case V2DI_FTYPE_V2DF_V2DF:
23645 type = v2di_ftype_v2df_v2df;
23647 case V2DI_FTYPE_V2DI_SI_COUNT:
23648 type = v2di_ftype_v2di_int;
23650 case V2SI_FTYPE_V2SI_V2SI:
23651 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23652 type = v2si_ftype_v2si_v2si;
23654 case V2SI_FTYPE_V4HI_V4HI:
23655 type = v2si_ftype_v4hi_v4hi;
23657 case V2SI_FTYPE_V2SF_V2SF:
23658 type = v2si_ftype_v2sf_v2sf;
23660 case V2SI_FTYPE_V2SI_SI_COUNT:
23661 type = v2si_ftype_v2si_int;
23663 case V2DF_FTYPE_V2DF_V2DF:
23664 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23665 type = v2df_ftype_v2df_v2df;
23667 case V2DF_FTYPE_V2DF_V4SF:
23668 type = v2df_ftype_v2df_v4sf;
23670 case V2DF_FTYPE_V2DF_V2DI:
23671 type = v2df_ftype_v2df_v2di;
23673 case V2DF_FTYPE_V2DF_DI:
23674 type = v2df_ftype_v2df_int64;
23676 case V2DF_FTYPE_V2DF_SI:
23677 type = v2df_ftype_v2df_int;
23679 case V2SF_FTYPE_V2SF_V2SF:
23680 type = v2sf_ftype_v2sf_v2sf;
23682 case V1DI_FTYPE_V1DI_V1DI:
23683 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23684 type = v1di_ftype_v1di_v1di;
23686 case V1DI_FTYPE_V8QI_V8QI:
23687 type = v1di_ftype_v8qi_v8qi;
23689 case V1DI_FTYPE_V2SI_V2SI:
23690 type = v1di_ftype_v2si_v2si;
23692 case V1DI_FTYPE_V1DI_SI_COUNT:
23693 type = v1di_ftype_v1di_int;
23695 case UINT64_FTYPE_UINT64_UINT64:
23696 type = uint64_ftype_uint64_uint64;
23698 case UINT_FTYPE_UINT_UINT:
23699 type = unsigned_ftype_unsigned_unsigned;
23701 case UINT_FTYPE_UINT_USHORT:
23702 type = unsigned_ftype_unsigned_ushort;
23704 case UINT_FTYPE_UINT_UCHAR:
23705 type = unsigned_ftype_unsigned_uchar;
23707 case UINT16_FTYPE_UINT16_INT:
23708 type = ushort_ftype_ushort_int;
23710 case UINT8_FTYPE_UINT8_INT:
23711 type = uchar_ftype_uchar_int;
23713 case V8HI_FTYPE_V8HI_INT:
23714 type = v8hi_ftype_v8hi_int;
23716 case V8SF_FTYPE_V8SF_INT:
23717 type = v8sf_ftype_v8sf_int;
23719 case V4SI_FTYPE_V4SI_INT:
23720 type = v4si_ftype_v4si_int;
23722 case V4SI_FTYPE_V8SI_INT:
23723 type = v4si_ftype_v8si_int;
23725 case V4HI_FTYPE_V4HI_INT:
23726 type = v4hi_ftype_v4hi_int;
23728 case V4DF_FTYPE_V4DF_INT:
23729 type = v4df_ftype_v4df_int;
23731 case V4SF_FTYPE_V4SF_INT:
23732 type = v4sf_ftype_v4sf_int;
23734 case V4SF_FTYPE_V8SF_INT:
23735 type = v4sf_ftype_v8sf_int;
23737 case V2DI_FTYPE_V2DI_INT:
23738 case V2DI2TI_FTYPE_V2DI_INT:
23739 type = v2di_ftype_v2di_int;
23741 case V2DF_FTYPE_V2DF_INT:
23742 type = v2df_ftype_v2df_int;
23744 case V2DF_FTYPE_V4DF_INT:
23745 type = v2df_ftype_v4df_int;
23747 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23748 type = v16qi_ftype_v16qi_v16qi_v16qi;
23750 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23751 type = v8sf_ftype_v8sf_v8sf_v8sf;
23753 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23754 type = v4df_ftype_v4df_v4df_v4df;
23756 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23757 type = v4sf_ftype_v4sf_v4sf_v4sf;
23759 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23760 type = v2df_ftype_v2df_v2df_v2df;
23762 case V16QI_FTYPE_V16QI_V16QI_INT:
23763 type = v16qi_ftype_v16qi_v16qi_int;
23765 case V8SI_FTYPE_V8SI_V8SI_INT:
23766 type = v8si_ftype_v8si_v8si_int;
23768 case V8SI_FTYPE_V8SI_V4SI_INT:
23769 type = v8si_ftype_v8si_v4si_int;
23771 case V8HI_FTYPE_V8HI_V8HI_INT:
23772 type = v8hi_ftype_v8hi_v8hi_int;
23774 case V8SF_FTYPE_V8SF_V8SF_INT:
23775 type = v8sf_ftype_v8sf_v8sf_int;
23777 case V8SF_FTYPE_V8SF_V4SF_INT:
23778 type = v8sf_ftype_v8sf_v4sf_int;
23780 case V4SI_FTYPE_V4SI_V4SI_INT:
23781 type = v4si_ftype_v4si_v4si_int;
23783 case V4DF_FTYPE_V4DF_V4DF_INT:
23784 type = v4df_ftype_v4df_v4df_int;
23786 case V4DF_FTYPE_V4DF_V2DF_INT:
23787 type = v4df_ftype_v4df_v2df_int;
23789 case V4SF_FTYPE_V4SF_V4SF_INT:
23790 type = v4sf_ftype_v4sf_v4sf_int;
23792 case V2DI_FTYPE_V2DI_V2DI_INT:
23793 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23794 type = v2di_ftype_v2di_v2di_int;
23796 case V2DF_FTYPE_V2DF_V2DF_INT:
23797 type = v2df_ftype_v2df_v2df_int;
23799 case V2DI_FTYPE_V2DI_UINT_UINT:
23800 type = v2di_ftype_v2di_unsigned_unsigned;
23802 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23803 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23805 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23806 type = v1di_ftype_v1di_v1di_int;
23809 gcc_unreachable ();
23812 def_builtin_const (d->mask, d->name, type, d->code);
23815 /* pcmpestr[im] insns. */
23816 for (i = 0, d = bdesc_pcmpestr;
23817 i < ARRAY_SIZE (bdesc_pcmpestr);
23820 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23821 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23823 ftype = int_ftype_v16qi_int_v16qi_int_int;
23824 def_builtin_const (d->mask, d->name, ftype, d->code);
23827 /* pcmpistr[im] insns. */
23828 for (i = 0, d = bdesc_pcmpistr;
23829 i < ARRAY_SIZE (bdesc_pcmpistr);
23832 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23833 ftype = v16qi_ftype_v16qi_v16qi_int;
23835 ftype = int_ftype_v16qi_v16qi_int;
23836 def_builtin_const (d->mask, d->name, ftype, d->code);
23839 /* comi/ucomi insns. */
23840 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23841 if (d->mask == OPTION_MASK_ISA_SSE2)
23842 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23844 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23847 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23848 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23850 /* SSE or 3DNow!A */
23851 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23854 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23856 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23857 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23860 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23861 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23864 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23865 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23866 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23867 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23868 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23869 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23872 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23875 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23876 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23878 /* Access to the vec_init patterns. */
23879 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23880 integer_type_node, NULL_TREE);
23881 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23883 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23884 short_integer_type_node,
23885 short_integer_type_node,
23886 short_integer_type_node, NULL_TREE);
23887 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23889 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23890 char_type_node, char_type_node,
23891 char_type_node, char_type_node,
23892 char_type_node, char_type_node,
23893 char_type_node, NULL_TREE);
23894 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23896 /* Access to the vec_extract patterns. */
23897 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23898 integer_type_node, NULL_TREE);
23899 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23901 ftype = build_function_type_list (long_long_integer_type_node,
23902 V2DI_type_node, integer_type_node,
23904 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23906 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23907 integer_type_node, NULL_TREE);
23908 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23910 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23911 integer_type_node, NULL_TREE);
23912 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23914 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23915 integer_type_node, NULL_TREE);
23916 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23918 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23919 integer_type_node, NULL_TREE);
23920 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23922 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23923 integer_type_node, NULL_TREE);
23924 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23926 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23927 integer_type_node, NULL_TREE);
23928 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23930 /* Access to the vec_set patterns. */
23931 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23933 integer_type_node, NULL_TREE);
23934 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23936 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23938 integer_type_node, NULL_TREE);
23939 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23941 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23943 integer_type_node, NULL_TREE);
23944 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23946 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23948 integer_type_node, NULL_TREE);
23949 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23951 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23953 integer_type_node, NULL_TREE);
23954 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23956 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23958 integer_type_node, NULL_TREE);
23959 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23961 /* Add SSE5 multi-arg argument instructions */
23962 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23964 tree mtype = NULL_TREE;
23969 switch ((enum multi_arg_type)d->flag)
23971 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23972 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23973 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23974 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23975 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23976 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23977 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23978 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23979 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23980 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23981 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23982 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23983 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23984 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23985 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23986 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23987 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23988 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23989 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23990 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23991 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23992 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23993 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23994 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23995 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23996 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23997 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23998 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23999 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
24000 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
24001 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
24002 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
24003 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
24004 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
24005 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
24006 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
24007 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
24008 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
24009 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
24010 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
24011 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
24012 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
24013 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
24014 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
24015 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
24016 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
24017 case MULTI_ARG_UNKNOWN:
24019 gcc_unreachable ();
24023 def_builtin_const (d->mask, d->name, mtype, d->code);
24027 /* Internal method for ix86_init_builtins. */
24030 ix86_init_builtins_va_builtins_abi (void)
24032 tree ms_va_ref, sysv_va_ref;
24033 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24034 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24035 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24036 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24040 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24041 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24042 ms_va_ref = build_reference_type (ms_va_list_type_node);
24044 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24047 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24048 fnvoid_va_start_ms =
24049 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24050 fnvoid_va_end_sysv =
24051 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24052 fnvoid_va_start_sysv =
24053 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24055 fnvoid_va_copy_ms =
24056 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24058 fnvoid_va_copy_sysv =
24059 build_function_type_list (void_type_node, sysv_va_ref,
24060 sysv_va_ref, NULL_TREE);
24062 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24063 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24064 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24065 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24066 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24067 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24068 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24069 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24070 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24071 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24072 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24073 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24077 ix86_init_builtins (void)
24079 tree float128_type_node = make_node (REAL_TYPE);
24082 /* The __float80 type. */
24083 if (TYPE_MODE (long_double_type_node) == XFmode)
24084 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
24088 /* The __float80 type. */
24089 tree float80_type_node = make_node (REAL_TYPE);
24091 TYPE_PRECISION (float80_type_node) = 80;
24092 layout_type (float80_type_node);
24093 (*lang_hooks.types.register_builtin_type) (float80_type_node,
24097 /* The __float128 type. */
24098 TYPE_PRECISION (float128_type_node) = 128;
24099 layout_type (float128_type_node);
24100 (*lang_hooks.types.register_builtin_type) (float128_type_node,
24103 /* TFmode support builtins. */
24104 ftype = build_function_type (float128_type_node, void_list_node);
24105 decl = add_builtin_function ("__builtin_infq", ftype,
24106 IX86_BUILTIN_INFQ, BUILT_IN_MD,
24108 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
24110 decl = add_builtin_function ("__builtin_huge_valq", ftype,
24111 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
24113 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
24115 /* We will expand them to normal call if SSE2 isn't available since
24116 they are used by libgcc. */
24117 ftype = build_function_type_list (float128_type_node,
24118 float128_type_node,
24120 decl = add_builtin_function ("__builtin_fabsq", ftype,
24121 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
24122 "__fabstf2", NULL_TREE);
24123 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
24124 TREE_READONLY (decl) = 1;
24126 ftype = build_function_type_list (float128_type_node,
24127 float128_type_node,
24128 float128_type_node,
24130 decl = add_builtin_function ("__builtin_copysignq", ftype,
24131 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
24132 "__copysigntf3", NULL_TREE);
24133 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
24134 TREE_READONLY (decl) = 1;
24136 ix86_init_mmx_sse_builtins ();
24138 ix86_init_builtins_va_builtins_abi ();
24141 /* Errors in the source file can cause expand_expr to return const0_rtx
24142 where we expect a vector. To avoid crashing, use one of the vector
24143 clear instructions. */
24145 safe_vector_operand (rtx x, enum machine_mode mode)
24147 if (x == const0_rtx)
24148 x = CONST0_RTX (mode);
24152 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24155 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24158 tree arg0 = CALL_EXPR_ARG (exp, 0);
24159 tree arg1 = CALL_EXPR_ARG (exp, 1);
24160 rtx op0 = expand_normal (arg0);
24161 rtx op1 = expand_normal (arg1);
24162 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24163 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24164 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24166 if (VECTOR_MODE_P (mode0))
24167 op0 = safe_vector_operand (op0, mode0);
24168 if (VECTOR_MODE_P (mode1))
24169 op1 = safe_vector_operand (op1, mode1);
24171 if (optimize || !target
24172 || GET_MODE (target) != tmode
24173 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24174 target = gen_reg_rtx (tmode);
24176 if (GET_MODE (op1) == SImode && mode1 == TImode)
24178 rtx x = gen_reg_rtx (V4SImode);
24179 emit_insn (gen_sse2_loadd (x, op1));
24180 op1 = gen_lowpart (TImode, x);
24183 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24184 op0 = copy_to_mode_reg (mode0, op0);
24185 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24186 op1 = copy_to_mode_reg (mode1, op1);
24188 pat = GEN_FCN (icode) (target, op0, op1);
24197 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24200 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24201 enum multi_arg_type m_type,
24202 enum rtx_code sub_code)
24207 bool comparison_p = false;
24209 bool last_arg_constant = false;
24210 int num_memory = 0;
24213 enum machine_mode mode;
24216 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24220 case MULTI_ARG_3_SF:
24221 case MULTI_ARG_3_DF:
24222 case MULTI_ARG_3_DI:
24223 case MULTI_ARG_3_SI:
24224 case MULTI_ARG_3_SI_DI:
24225 case MULTI_ARG_3_HI:
24226 case MULTI_ARG_3_HI_SI:
24227 case MULTI_ARG_3_QI:
24228 case MULTI_ARG_3_PERMPS:
24229 case MULTI_ARG_3_PERMPD:
24233 case MULTI_ARG_2_SF:
24234 case MULTI_ARG_2_DF:
24235 case MULTI_ARG_2_DI:
24236 case MULTI_ARG_2_SI:
24237 case MULTI_ARG_2_HI:
24238 case MULTI_ARG_2_QI:
24242 case MULTI_ARG_2_DI_IMM:
24243 case MULTI_ARG_2_SI_IMM:
24244 case MULTI_ARG_2_HI_IMM:
24245 case MULTI_ARG_2_QI_IMM:
24247 last_arg_constant = true;
24250 case MULTI_ARG_1_SF:
24251 case MULTI_ARG_1_DF:
24252 case MULTI_ARG_1_DI:
24253 case MULTI_ARG_1_SI:
24254 case MULTI_ARG_1_HI:
24255 case MULTI_ARG_1_QI:
24256 case MULTI_ARG_1_SI_DI:
24257 case MULTI_ARG_1_HI_DI:
24258 case MULTI_ARG_1_HI_SI:
24259 case MULTI_ARG_1_QI_DI:
24260 case MULTI_ARG_1_QI_SI:
24261 case MULTI_ARG_1_QI_HI:
24262 case MULTI_ARG_1_PH2PS:
24263 case MULTI_ARG_1_PS2PH:
24267 case MULTI_ARG_2_SF_CMP:
24268 case MULTI_ARG_2_DF_CMP:
24269 case MULTI_ARG_2_DI_CMP:
24270 case MULTI_ARG_2_SI_CMP:
24271 case MULTI_ARG_2_HI_CMP:
24272 case MULTI_ARG_2_QI_CMP:
24274 comparison_p = true;
24277 case MULTI_ARG_2_SF_TF:
24278 case MULTI_ARG_2_DF_TF:
24279 case MULTI_ARG_2_DI_TF:
24280 case MULTI_ARG_2_SI_TF:
24281 case MULTI_ARG_2_HI_TF:
24282 case MULTI_ARG_2_QI_TF:
24287 case MULTI_ARG_UNKNOWN:
24289 gcc_unreachable ();
24292 if (optimize || !target
24293 || GET_MODE (target) != tmode
24294 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24295 target = gen_reg_rtx (tmode);
24297 gcc_assert (nargs <= 4);
24299 for (i = 0; i < nargs; i++)
24301 tree arg = CALL_EXPR_ARG (exp, i);
24302 rtx op = expand_normal (arg);
24303 int adjust = (comparison_p) ? 1 : 0;
24304 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24306 if (last_arg_constant && i == nargs-1)
24308 if (!CONST_INT_P (op))
24310 error ("last argument must be an immediate");
24311 return gen_reg_rtx (tmode);
24316 if (VECTOR_MODE_P (mode))
24317 op = safe_vector_operand (op, mode);
24319 /* If we aren't optimizing, only allow one memory operand to be
24321 if (memory_operand (op, mode))
24324 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24327 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24329 op = force_reg (mode, op);
24333 args[i].mode = mode;
24339 pat = GEN_FCN (icode) (target, args[0].op);
24344 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24345 GEN_INT ((int)sub_code));
24346 else if (! comparison_p)
24347 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24350 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24354 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24359 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24363 gcc_unreachable ();
24373 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24374 insns with vec_merge. */
24377 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24381 tree arg0 = CALL_EXPR_ARG (exp, 0);
24382 rtx op1, op0 = expand_normal (arg0);
24383 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24384 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24386 if (optimize || !target
24387 || GET_MODE (target) != tmode
24388 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24389 target = gen_reg_rtx (tmode);
24391 if (VECTOR_MODE_P (mode0))
24392 op0 = safe_vector_operand (op0, mode0);
24394 if ((optimize && !register_operand (op0, mode0))
24395 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24396 op0 = copy_to_mode_reg (mode0, op0);
24399 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24400 op1 = copy_to_mode_reg (mode0, op1);
24402 pat = GEN_FCN (icode) (target, op0, op1);
24409 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24412 ix86_expand_sse_compare (const struct builtin_description *d,
24413 tree exp, rtx target, bool swap)
24416 tree arg0 = CALL_EXPR_ARG (exp, 0);
24417 tree arg1 = CALL_EXPR_ARG (exp, 1);
24418 rtx op0 = expand_normal (arg0);
24419 rtx op1 = expand_normal (arg1);
24421 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24422 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24423 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24424 enum rtx_code comparison = d->comparison;
24426 if (VECTOR_MODE_P (mode0))
24427 op0 = safe_vector_operand (op0, mode0);
24428 if (VECTOR_MODE_P (mode1))
24429 op1 = safe_vector_operand (op1, mode1);
24431 /* Swap operands if we have a comparison that isn't available in
24435 rtx tmp = gen_reg_rtx (mode1);
24436 emit_move_insn (tmp, op1);
24441 if (optimize || !target
24442 || GET_MODE (target) != tmode
24443 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24444 target = gen_reg_rtx (tmode);
24446 if ((optimize && !register_operand (op0, mode0))
24447 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24448 op0 = copy_to_mode_reg (mode0, op0);
24449 if ((optimize && !register_operand (op1, mode1))
24450 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24451 op1 = copy_to_mode_reg (mode1, op1);
24453 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24454 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24461 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24464 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24468 tree arg0 = CALL_EXPR_ARG (exp, 0);
24469 tree arg1 = CALL_EXPR_ARG (exp, 1);
24470 rtx op0 = expand_normal (arg0);
24471 rtx op1 = expand_normal (arg1);
24472 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24473 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24474 enum rtx_code comparison = d->comparison;
24476 if (VECTOR_MODE_P (mode0))
24477 op0 = safe_vector_operand (op0, mode0);
24478 if (VECTOR_MODE_P (mode1))
24479 op1 = safe_vector_operand (op1, mode1);
24481 /* Swap operands if we have a comparison that isn't available in
24483 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24490 target = gen_reg_rtx (SImode);
24491 emit_move_insn (target, const0_rtx);
24492 target = gen_rtx_SUBREG (QImode, target, 0);
24494 if ((optimize && !register_operand (op0, mode0))
24495 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24496 op0 = copy_to_mode_reg (mode0, op0);
24497 if ((optimize && !register_operand (op1, mode1))
24498 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24499 op1 = copy_to_mode_reg (mode1, op1);
24501 pat = GEN_FCN (d->icode) (op0, op1);
24505 emit_insn (gen_rtx_SET (VOIDmode,
24506 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24507 gen_rtx_fmt_ee (comparison, QImode,
24511 return SUBREG_REG (target);
24514 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24517 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24521 tree arg0 = CALL_EXPR_ARG (exp, 0);
24522 tree arg1 = CALL_EXPR_ARG (exp, 1);
24523 rtx op0 = expand_normal (arg0);
24524 rtx op1 = expand_normal (arg1);
24525 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24526 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24527 enum rtx_code comparison = d->comparison;
24529 if (VECTOR_MODE_P (mode0))
24530 op0 = safe_vector_operand (op0, mode0);
24531 if (VECTOR_MODE_P (mode1))
24532 op1 = safe_vector_operand (op1, mode1);
24534 target = gen_reg_rtx (SImode);
24535 emit_move_insn (target, const0_rtx);
24536 target = gen_rtx_SUBREG (QImode, target, 0);
24538 if ((optimize && !register_operand (op0, mode0))
24539 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24540 op0 = copy_to_mode_reg (mode0, op0);
24541 if ((optimize && !register_operand (op1, mode1))
24542 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24543 op1 = copy_to_mode_reg (mode1, op1);
24545 pat = GEN_FCN (d->icode) (op0, op1);
24549 emit_insn (gen_rtx_SET (VOIDmode,
24550 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24551 gen_rtx_fmt_ee (comparison, QImode,
24555 return SUBREG_REG (target);
24558 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24561 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24562 tree exp, rtx target)
24565 tree arg0 = CALL_EXPR_ARG (exp, 0);
24566 tree arg1 = CALL_EXPR_ARG (exp, 1);
24567 tree arg2 = CALL_EXPR_ARG (exp, 2);
24568 tree arg3 = CALL_EXPR_ARG (exp, 3);
24569 tree arg4 = CALL_EXPR_ARG (exp, 4);
24570 rtx scratch0, scratch1;
24571 rtx op0 = expand_normal (arg0);
24572 rtx op1 = expand_normal (arg1);
24573 rtx op2 = expand_normal (arg2);
24574 rtx op3 = expand_normal (arg3);
24575 rtx op4 = expand_normal (arg4);
24576 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24578 tmode0 = insn_data[d->icode].operand[0].mode;
24579 tmode1 = insn_data[d->icode].operand[1].mode;
24580 modev2 = insn_data[d->icode].operand[2].mode;
24581 modei3 = insn_data[d->icode].operand[3].mode;
24582 modev4 = insn_data[d->icode].operand[4].mode;
24583 modei5 = insn_data[d->icode].operand[5].mode;
24584 modeimm = insn_data[d->icode].operand[6].mode;
24586 if (VECTOR_MODE_P (modev2))
24587 op0 = safe_vector_operand (op0, modev2);
24588 if (VECTOR_MODE_P (modev4))
24589 op2 = safe_vector_operand (op2, modev4);
24591 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24592 op0 = copy_to_mode_reg (modev2, op0);
24593 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24594 op1 = copy_to_mode_reg (modei3, op1);
24595 if ((optimize && !register_operand (op2, modev4))
24596 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24597 op2 = copy_to_mode_reg (modev4, op2);
24598 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24599 op3 = copy_to_mode_reg (modei5, op3);
24601 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24603 error ("the fifth argument must be a 8-bit immediate");
24607 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24609 if (optimize || !target
24610 || GET_MODE (target) != tmode0
24611 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24612 target = gen_reg_rtx (tmode0);
24614 scratch1 = gen_reg_rtx (tmode1);
24616 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24618 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24620 if (optimize || !target
24621 || GET_MODE (target) != tmode1
24622 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24623 target = gen_reg_rtx (tmode1);
24625 scratch0 = gen_reg_rtx (tmode0);
24627 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24631 gcc_assert (d->flag);
24633 scratch0 = gen_reg_rtx (tmode0);
24634 scratch1 = gen_reg_rtx (tmode1);
24636 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24646 target = gen_reg_rtx (SImode);
24647 emit_move_insn (target, const0_rtx);
24648 target = gen_rtx_SUBREG (QImode, target, 0);
24651 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24652 gen_rtx_fmt_ee (EQ, QImode,
24653 gen_rtx_REG ((enum machine_mode) d->flag,
24656 return SUBREG_REG (target);
24663 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24666 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24667 tree exp, rtx target)
24670 tree arg0 = CALL_EXPR_ARG (exp, 0);
24671 tree arg1 = CALL_EXPR_ARG (exp, 1);
24672 tree arg2 = CALL_EXPR_ARG (exp, 2);
24673 rtx scratch0, scratch1;
24674 rtx op0 = expand_normal (arg0);
24675 rtx op1 = expand_normal (arg1);
24676 rtx op2 = expand_normal (arg2);
24677 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24679 tmode0 = insn_data[d->icode].operand[0].mode;
24680 tmode1 = insn_data[d->icode].operand[1].mode;
24681 modev2 = insn_data[d->icode].operand[2].mode;
24682 modev3 = insn_data[d->icode].operand[3].mode;
24683 modeimm = insn_data[d->icode].operand[4].mode;
24685 if (VECTOR_MODE_P (modev2))
24686 op0 = safe_vector_operand (op0, modev2);
24687 if (VECTOR_MODE_P (modev3))
24688 op1 = safe_vector_operand (op1, modev3);
24690 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24691 op0 = copy_to_mode_reg (modev2, op0);
24692 if ((optimize && !register_operand (op1, modev3))
24693 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24694 op1 = copy_to_mode_reg (modev3, op1);
24696 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24698 error ("the third argument must be a 8-bit immediate");
24702 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24704 if (optimize || !target
24705 || GET_MODE (target) != tmode0
24706 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24707 target = gen_reg_rtx (tmode0);
24709 scratch1 = gen_reg_rtx (tmode1);
24711 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24713 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24715 if (optimize || !target
24716 || GET_MODE (target) != tmode1
24717 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24718 target = gen_reg_rtx (tmode1);
24720 scratch0 = gen_reg_rtx (tmode0);
24722 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24726 gcc_assert (d->flag);
24728 scratch0 = gen_reg_rtx (tmode0);
24729 scratch1 = gen_reg_rtx (tmode1);
24731 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24741 target = gen_reg_rtx (SImode);
24742 emit_move_insn (target, const0_rtx);
24743 target = gen_rtx_SUBREG (QImode, target, 0);
24746 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24747 gen_rtx_fmt_ee (EQ, QImode,
24748 gen_rtx_REG ((enum machine_mode) d->flag,
24751 return SUBREG_REG (target);
24757 /* Subroutine of ix86_expand_builtin to take care of insns with
24758 variable number of operands. */
24761 ix86_expand_args_builtin (const struct builtin_description *d,
24762 tree exp, rtx target)
24764 rtx pat, real_target;
24765 unsigned int i, nargs;
24766 unsigned int nargs_constant = 0;
24767 int num_memory = 0;
24771 enum machine_mode mode;
24773 bool last_arg_count = false;
24774 enum insn_code icode = d->icode;
24775 const struct insn_data *insn_p = &insn_data[icode];
24776 enum machine_mode tmode = insn_p->operand[0].mode;
24777 enum machine_mode rmode = VOIDmode;
24779 enum rtx_code comparison = d->comparison;
24781 switch ((enum ix86_builtin_type) d->flag)
24783 case INT_FTYPE_V8SF_V8SF_PTEST:
24784 case INT_FTYPE_V4DI_V4DI_PTEST:
24785 case INT_FTYPE_V4DF_V4DF_PTEST:
24786 case INT_FTYPE_V4SF_V4SF_PTEST:
24787 case INT_FTYPE_V2DI_V2DI_PTEST:
24788 case INT_FTYPE_V2DF_V2DF_PTEST:
24789 return ix86_expand_sse_ptest (d, exp, target);
24790 case FLOAT128_FTYPE_FLOAT128:
24791 case FLOAT_FTYPE_FLOAT:
24792 case INT_FTYPE_INT:
24793 case UINT64_FTYPE_INT:
24794 case INT64_FTYPE_INT64:
24795 case INT64_FTYPE_V4SF:
24796 case INT64_FTYPE_V2DF:
24797 case INT_FTYPE_V16QI:
24798 case INT_FTYPE_V8QI:
24799 case INT_FTYPE_V8SF:
24800 case INT_FTYPE_V4DF:
24801 case INT_FTYPE_V4SF:
24802 case INT_FTYPE_V2DF:
24803 case V16QI_FTYPE_V16QI:
24804 case V8SI_FTYPE_V8SF:
24805 case V8SI_FTYPE_V4SI:
24806 case V8HI_FTYPE_V8HI:
24807 case V8HI_FTYPE_V16QI:
24808 case V8QI_FTYPE_V8QI:
24809 case V8SF_FTYPE_V8SF:
24810 case V8SF_FTYPE_V8SI:
24811 case V8SF_FTYPE_V4SF:
24812 case V4SI_FTYPE_V4SI:
24813 case V4SI_FTYPE_V16QI:
24814 case V4SI_FTYPE_V4SF:
24815 case V4SI_FTYPE_V8SI:
24816 case V4SI_FTYPE_V8HI:
24817 case V4SI_FTYPE_V4DF:
24818 case V4SI_FTYPE_V2DF:
24819 case V4HI_FTYPE_V4HI:
24820 case V4DF_FTYPE_V4DF:
24821 case V4DF_FTYPE_V4SI:
24822 case V4DF_FTYPE_V4SF:
24823 case V4DF_FTYPE_V2DF:
24824 case V4SF_FTYPE_V4SF:
24825 case V4SF_FTYPE_V4SI:
24826 case V4SF_FTYPE_V8SF:
24827 case V4SF_FTYPE_V4DF:
24828 case V4SF_FTYPE_V2DF:
24829 case V2DI_FTYPE_V2DI:
24830 case V2DI_FTYPE_V16QI:
24831 case V2DI_FTYPE_V8HI:
24832 case V2DI_FTYPE_V4SI:
24833 case V2DF_FTYPE_V2DF:
24834 case V2DF_FTYPE_V4SI:
24835 case V2DF_FTYPE_V4DF:
24836 case V2DF_FTYPE_V4SF:
24837 case V2DF_FTYPE_V2SI:
24838 case V2SI_FTYPE_V2SI:
24839 case V2SI_FTYPE_V4SF:
24840 case V2SI_FTYPE_V2SF:
24841 case V2SI_FTYPE_V2DF:
24842 case V2SF_FTYPE_V2SF:
24843 case V2SF_FTYPE_V2SI:
24846 case V4SF_FTYPE_V4SF_VEC_MERGE:
24847 case V2DF_FTYPE_V2DF_VEC_MERGE:
24848 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24849 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24850 case V16QI_FTYPE_V16QI_V16QI:
24851 case V16QI_FTYPE_V8HI_V8HI:
24852 case V8QI_FTYPE_V8QI_V8QI:
24853 case V8QI_FTYPE_V4HI_V4HI:
24854 case V8HI_FTYPE_V8HI_V8HI:
24855 case V8HI_FTYPE_V16QI_V16QI:
24856 case V8HI_FTYPE_V4SI_V4SI:
24857 case V8SF_FTYPE_V8SF_V8SF:
24858 case V8SF_FTYPE_V8SF_V8SI:
24859 case V4SI_FTYPE_V4SI_V4SI:
24860 case V4SI_FTYPE_V8HI_V8HI:
24861 case V4SI_FTYPE_V4SF_V4SF:
24862 case V4SI_FTYPE_V2DF_V2DF:
24863 case V4HI_FTYPE_V4HI_V4HI:
24864 case V4HI_FTYPE_V8QI_V8QI:
24865 case V4HI_FTYPE_V2SI_V2SI:
24866 case V4DF_FTYPE_V4DF_V4DF:
24867 case V4DF_FTYPE_V4DF_V4DI:
24868 case V4SF_FTYPE_V4SF_V4SF:
24869 case V4SF_FTYPE_V4SF_V4SI:
24870 case V4SF_FTYPE_V4SF_V2SI:
24871 case V4SF_FTYPE_V4SF_V2DF:
24872 case V4SF_FTYPE_V4SF_DI:
24873 case V4SF_FTYPE_V4SF_SI:
24874 case V2DI_FTYPE_V2DI_V2DI:
24875 case V2DI_FTYPE_V16QI_V16QI:
24876 case V2DI_FTYPE_V4SI_V4SI:
24877 case V2DI_FTYPE_V2DI_V16QI:
24878 case V2DI_FTYPE_V2DF_V2DF:
24879 case V2SI_FTYPE_V2SI_V2SI:
24880 case V2SI_FTYPE_V4HI_V4HI:
24881 case V2SI_FTYPE_V2SF_V2SF:
24882 case V2DF_FTYPE_V2DF_V2DF:
24883 case V2DF_FTYPE_V2DF_V4SF:
24884 case V2DF_FTYPE_V2DF_V2DI:
24885 case V2DF_FTYPE_V2DF_DI:
24886 case V2DF_FTYPE_V2DF_SI:
24887 case V2SF_FTYPE_V2SF_V2SF:
24888 case V1DI_FTYPE_V1DI_V1DI:
24889 case V1DI_FTYPE_V8QI_V8QI:
24890 case V1DI_FTYPE_V2SI_V2SI:
24891 if (comparison == UNKNOWN)
24892 return ix86_expand_binop_builtin (icode, exp, target);
24895 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24896 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24897 gcc_assert (comparison != UNKNOWN);
24901 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24902 case V8HI_FTYPE_V8HI_SI_COUNT:
24903 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24904 case V4SI_FTYPE_V4SI_SI_COUNT:
24905 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24906 case V4HI_FTYPE_V4HI_SI_COUNT:
24907 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24908 case V2DI_FTYPE_V2DI_SI_COUNT:
24909 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24910 case V2SI_FTYPE_V2SI_SI_COUNT:
24911 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24912 case V1DI_FTYPE_V1DI_SI_COUNT:
24914 last_arg_count = true;
24916 case UINT64_FTYPE_UINT64_UINT64:
24917 case UINT_FTYPE_UINT_UINT:
24918 case UINT_FTYPE_UINT_USHORT:
24919 case UINT_FTYPE_UINT_UCHAR:
24920 case UINT16_FTYPE_UINT16_INT:
24921 case UINT8_FTYPE_UINT8_INT:
24924 case V2DI2TI_FTYPE_V2DI_INT:
24927 nargs_constant = 1;
24929 case V8HI_FTYPE_V8HI_INT:
24930 case V8SF_FTYPE_V8SF_INT:
24931 case V4SI_FTYPE_V4SI_INT:
24932 case V4SI_FTYPE_V8SI_INT:
24933 case V4HI_FTYPE_V4HI_INT:
24934 case V4DF_FTYPE_V4DF_INT:
24935 case V4SF_FTYPE_V4SF_INT:
24936 case V4SF_FTYPE_V8SF_INT:
24937 case V2DI_FTYPE_V2DI_INT:
24938 case V2DF_FTYPE_V2DF_INT:
24939 case V2DF_FTYPE_V4DF_INT:
24941 nargs_constant = 1;
24943 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24944 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24945 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24946 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24947 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24950 case V16QI_FTYPE_V16QI_V16QI_INT:
24951 case V8HI_FTYPE_V8HI_V8HI_INT:
24952 case V8SI_FTYPE_V8SI_V8SI_INT:
24953 case V8SI_FTYPE_V8SI_V4SI_INT:
24954 case V8SF_FTYPE_V8SF_V8SF_INT:
24955 case V8SF_FTYPE_V8SF_V4SF_INT:
24956 case V4SI_FTYPE_V4SI_V4SI_INT:
24957 case V4DF_FTYPE_V4DF_V4DF_INT:
24958 case V4DF_FTYPE_V4DF_V2DF_INT:
24959 case V4SF_FTYPE_V4SF_V4SF_INT:
24960 case V2DI_FTYPE_V2DI_V2DI_INT:
24961 case V2DF_FTYPE_V2DF_V2DF_INT:
24963 nargs_constant = 1;
24965 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24968 nargs_constant = 1;
24970 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24973 nargs_constant = 1;
24975 case V2DI_FTYPE_V2DI_UINT_UINT:
24977 nargs_constant = 2;
24979 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24981 nargs_constant = 2;
24984 gcc_unreachable ();
24987 gcc_assert (nargs <= ARRAY_SIZE (args));
24989 if (comparison != UNKNOWN)
24991 gcc_assert (nargs == 2);
24992 return ix86_expand_sse_compare (d, exp, target, swap);
24995 if (rmode == VOIDmode || rmode == tmode)
24999 || GET_MODE (target) != tmode
25000 || ! (*insn_p->operand[0].predicate) (target, tmode))
25001 target = gen_reg_rtx (tmode);
25002 real_target = target;
25006 target = gen_reg_rtx (rmode);
25007 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25010 for (i = 0; i < nargs; i++)
25012 tree arg = CALL_EXPR_ARG (exp, i);
25013 rtx op = expand_normal (arg);
25014 enum machine_mode mode = insn_p->operand[i + 1].mode;
25015 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
25017 if (last_arg_count && (i + 1) == nargs)
25019 /* SIMD shift insns take either an 8-bit immediate or
25020 register as count. But builtin functions take int as
25021 count. If count doesn't match, we put it in register. */
25024 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25025 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
25026 op = copy_to_reg (op);
25029 else if ((nargs - i) <= nargs_constant)
25034 case CODE_FOR_sse4_1_roundpd:
25035 case CODE_FOR_sse4_1_roundps:
25036 case CODE_FOR_sse4_1_roundsd:
25037 case CODE_FOR_sse4_1_roundss:
25038 case CODE_FOR_sse4_1_blendps:
25039 case CODE_FOR_avx_blendpd256:
25040 case CODE_FOR_avx_vpermilv4df:
25041 case CODE_FOR_avx_roundpd256:
25042 case CODE_FOR_avx_roundps256:
25043 error ("the last argument must be a 4-bit immediate");
25046 case CODE_FOR_sse4_1_blendpd:
25047 case CODE_FOR_avx_vpermilv2df:
25048 error ("the last argument must be a 2-bit immediate");
25051 case CODE_FOR_avx_vextractf128v4df:
25052 case CODE_FOR_avx_vextractf128v8sf:
25053 case CODE_FOR_avx_vextractf128v8si:
25054 case CODE_FOR_avx_vinsertf128v4df:
25055 case CODE_FOR_avx_vinsertf128v8sf:
25056 case CODE_FOR_avx_vinsertf128v8si:
25057 error ("the last argument must be a 1-bit immediate");
25060 case CODE_FOR_avx_cmpsdv2df3:
25061 case CODE_FOR_avx_cmpssv4sf3:
25062 case CODE_FOR_avx_cmppdv2df3:
25063 case CODE_FOR_avx_cmppsv4sf3:
25064 case CODE_FOR_avx_cmppdv4df3:
25065 case CODE_FOR_avx_cmppsv8sf3:
25066 error ("the last argument must be a 5-bit immediate");
25070 switch (nargs_constant)
25073 if ((nargs - i) == nargs_constant)
25075 error ("the next to last argument must be an 8-bit immediate");
25079 error ("the last argument must be an 8-bit immediate");
25082 gcc_unreachable ();
25089 if (VECTOR_MODE_P (mode))
25090 op = safe_vector_operand (op, mode);
25092 /* If we aren't optimizing, only allow one memory operand to
25094 if (memory_operand (op, mode))
25097 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25099 if (optimize || !match || num_memory > 1)
25100 op = copy_to_mode_reg (mode, op);
25104 op = copy_to_reg (op);
25105 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25110 args[i].mode = mode;
25116 pat = GEN_FCN (icode) (real_target, args[0].op);
25119 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25122 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25126 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25127 args[2].op, args[3].op);
25130 gcc_unreachable ();
25140 /* Subroutine of ix86_expand_builtin to take care of special insns
25141 with variable number of operands. */
25144 ix86_expand_special_args_builtin (const struct builtin_description *d,
25145 tree exp, rtx target)
25149 unsigned int i, nargs, arg_adjust, memory;
25153 enum machine_mode mode;
25155 enum insn_code icode = d->icode;
25156 bool last_arg_constant = false;
25157 const struct insn_data *insn_p = &insn_data[icode];
25158 enum machine_mode tmode = insn_p->operand[0].mode;
25159 enum { load, store } klass;
25161 switch ((enum ix86_special_builtin_type) d->flag)
25163 case VOID_FTYPE_VOID:
25164 emit_insn (GEN_FCN (icode) (target));
25166 case UINT64_FTYPE_VOID:
25171 case UINT64_FTYPE_PUNSIGNED:
25172 case V2DI_FTYPE_PV2DI:
25173 case V32QI_FTYPE_PCCHAR:
25174 case V16QI_FTYPE_PCCHAR:
25175 case V8SF_FTYPE_PCV4SF:
25176 case V8SF_FTYPE_PCFLOAT:
25177 case V4SF_FTYPE_PCFLOAT:
25178 case V4DF_FTYPE_PCV2DF:
25179 case V4DF_FTYPE_PCDOUBLE:
25180 case V2DF_FTYPE_PCDOUBLE:
25185 case VOID_FTYPE_PV2SF_V4SF:
25186 case VOID_FTYPE_PV4DI_V4DI:
25187 case VOID_FTYPE_PV2DI_V2DI:
25188 case VOID_FTYPE_PCHAR_V32QI:
25189 case VOID_FTYPE_PCHAR_V16QI:
25190 case VOID_FTYPE_PFLOAT_V8SF:
25191 case VOID_FTYPE_PFLOAT_V4SF:
25192 case VOID_FTYPE_PDOUBLE_V4DF:
25193 case VOID_FTYPE_PDOUBLE_V2DF:
25194 case VOID_FTYPE_PDI_DI:
25195 case VOID_FTYPE_PINT_INT:
25198 /* Reserve memory operand for target. */
25199 memory = ARRAY_SIZE (args);
25201 case V4SF_FTYPE_V4SF_PCV2SF:
25202 case V2DF_FTYPE_V2DF_PCDOUBLE:
25207 case V8SF_FTYPE_PCV8SF_V8SF:
25208 case V4DF_FTYPE_PCV4DF_V4DF:
25209 case V4SF_FTYPE_PCV4SF_V4SF:
25210 case V2DF_FTYPE_PCV2DF_V2DF:
25215 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25216 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25217 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25218 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25221 /* Reserve memory operand for target. */
25222 memory = ARRAY_SIZE (args);
25225 gcc_unreachable ();
25228 gcc_assert (nargs <= ARRAY_SIZE (args));
25230 if (klass == store)
25232 arg = CALL_EXPR_ARG (exp, 0);
25233 op = expand_normal (arg);
25234 gcc_assert (target == 0);
25235 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25243 || GET_MODE (target) != tmode
25244 || ! (*insn_p->operand[0].predicate) (target, tmode))
25245 target = gen_reg_rtx (tmode);
25248 for (i = 0; i < nargs; i++)
25250 enum machine_mode mode = insn_p->operand[i + 1].mode;
25253 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25254 op = expand_normal (arg);
25255 match = (*insn_p->operand[i + 1].predicate) (op, mode);
25257 if (last_arg_constant && (i + 1) == nargs)
25263 error ("the last argument must be an 8-bit immediate");
25271 /* This must be the memory operand. */
25272 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25273 gcc_assert (GET_MODE (op) == mode
25274 || GET_MODE (op) == VOIDmode);
25278 /* This must be register. */
25279 if (VECTOR_MODE_P (mode))
25280 op = safe_vector_operand (op, mode);
25282 gcc_assert (GET_MODE (op) == mode
25283 || GET_MODE (op) == VOIDmode);
25284 op = copy_to_mode_reg (mode, op);
25289 args[i].mode = mode;
25295 pat = GEN_FCN (icode) (target);
25298 pat = GEN_FCN (icode) (target, args[0].op);
25301 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25304 gcc_unreachable ();
25310 return klass == store ? 0 : target;
25313 /* Return the integer constant in ARG. Constrain it to be in the range
25314 of the subparts of VEC_TYPE; issue an error if not. */
25317 get_element_number (tree vec_type, tree arg)
25319 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25321 if (!host_integerp (arg, 1)
25322 || (elt = tree_low_cst (arg, 1), elt > max))
25324 error ("selector must be an integer constant in the range 0..%wi", max);
25331 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25332 ix86_expand_vector_init. We DO have language-level syntax for this, in
25333 the form of (type){ init-list }. Except that since we can't place emms
25334 instructions from inside the compiler, we can't allow the use of MMX
25335 registers unless the user explicitly asks for it. So we do *not* define
25336 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25337 we have builtins invoked by mmintrin.h that gives us license to emit
25338 these sorts of instructions. */
25341 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25343 enum machine_mode tmode = TYPE_MODE (type);
25344 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25345 int i, n_elt = GET_MODE_NUNITS (tmode);
25346 rtvec v = rtvec_alloc (n_elt);
25348 gcc_assert (VECTOR_MODE_P (tmode));
25349 gcc_assert (call_expr_nargs (exp) == n_elt);
25351 for (i = 0; i < n_elt; ++i)
25353 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25354 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25357 if (!target || !register_operand (target, tmode))
25358 target = gen_reg_rtx (tmode);
25360 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25364 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25365 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25366 had a language-level syntax for referencing vector elements. */
25369 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25371 enum machine_mode tmode, mode0;
25376 arg0 = CALL_EXPR_ARG (exp, 0);
25377 arg1 = CALL_EXPR_ARG (exp, 1);
25379 op0 = expand_normal (arg0);
25380 elt = get_element_number (TREE_TYPE (arg0), arg1);
25382 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25383 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25384 gcc_assert (VECTOR_MODE_P (mode0));
25386 op0 = force_reg (mode0, op0);
25388 if (optimize || !target || !register_operand (target, tmode))
25389 target = gen_reg_rtx (tmode);
25391 ix86_expand_vector_extract (true, target, op0, elt);
25396 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25397 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25398 a language-level syntax for referencing vector elements. */
25401 ix86_expand_vec_set_builtin (tree exp)
25403 enum machine_mode tmode, mode1;
25404 tree arg0, arg1, arg2;
25406 rtx op0, op1, target;
25408 arg0 = CALL_EXPR_ARG (exp, 0);
25409 arg1 = CALL_EXPR_ARG (exp, 1);
25410 arg2 = CALL_EXPR_ARG (exp, 2);
25412 tmode = TYPE_MODE (TREE_TYPE (arg0));
25413 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25414 gcc_assert (VECTOR_MODE_P (tmode));
25416 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25417 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25418 elt = get_element_number (TREE_TYPE (arg0), arg2);
25420 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25421 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25423 op0 = force_reg (tmode, op0);
25424 op1 = force_reg (mode1, op1);
25426 /* OP0 is the source of these builtin functions and shouldn't be
25427 modified. Create a copy, use it and return it as target. */
25428 target = gen_reg_rtx (tmode);
25429 emit_move_insn (target, op0);
25430 ix86_expand_vector_set (true, target, op1, elt);
25435 /* Expand an expression EXP that calls a built-in function,
25436 with result going to TARGET if that's convenient
25437 (and in mode MODE if that's convenient).
25438 SUBTARGET may be used as the target for computing one of EXP's operands.
25439 IGNORE is nonzero if the value is to be ignored. */
25442 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25443 enum machine_mode mode ATTRIBUTE_UNUSED,
25444 int ignore ATTRIBUTE_UNUSED)
25446 const struct builtin_description *d;
25448 enum insn_code icode;
25449 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25450 tree arg0, arg1, arg2;
25451 rtx op0, op1, op2, pat;
25452 enum machine_mode mode0, mode1, mode2;
25453 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25455 /* Determine whether the builtin function is available under the current ISA.
25456 Originally the builtin was not created if it wasn't applicable to the
25457 current ISA based on the command line switches. With function specific
25458 options, we need to check in the context of the function making the call
25459 whether it is supported. */
25460 if (ix86_builtins_isa[fcode].isa
25461 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25463 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25464 NULL, NULL, false);
25467 error ("%qE needs unknown isa option", fndecl);
25470 gcc_assert (opts != NULL);
25471 error ("%qE needs isa option %s", fndecl, opts);
25479 case IX86_BUILTIN_MASKMOVQ:
25480 case IX86_BUILTIN_MASKMOVDQU:
25481 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25482 ? CODE_FOR_mmx_maskmovq
25483 : CODE_FOR_sse2_maskmovdqu);
25484 /* Note the arg order is different from the operand order. */
25485 arg1 = CALL_EXPR_ARG (exp, 0);
25486 arg2 = CALL_EXPR_ARG (exp, 1);
25487 arg0 = CALL_EXPR_ARG (exp, 2);
25488 op0 = expand_normal (arg0);
25489 op1 = expand_normal (arg1);
25490 op2 = expand_normal (arg2);
25491 mode0 = insn_data[icode].operand[0].mode;
25492 mode1 = insn_data[icode].operand[1].mode;
25493 mode2 = insn_data[icode].operand[2].mode;
25495 op0 = force_reg (Pmode, op0);
25496 op0 = gen_rtx_MEM (mode1, op0);
25498 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25499 op0 = copy_to_mode_reg (mode0, op0);
25500 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25501 op1 = copy_to_mode_reg (mode1, op1);
25502 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25503 op2 = copy_to_mode_reg (mode2, op2);
25504 pat = GEN_FCN (icode) (op0, op1, op2);
25510 case IX86_BUILTIN_LDMXCSR:
25511 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25512 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25513 emit_move_insn (target, op0);
25514 emit_insn (gen_sse_ldmxcsr (target));
25517 case IX86_BUILTIN_STMXCSR:
25518 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25519 emit_insn (gen_sse_stmxcsr (target));
25520 return copy_to_mode_reg (SImode, target);
25522 case IX86_BUILTIN_CLFLUSH:
25523 arg0 = CALL_EXPR_ARG (exp, 0);
25524 op0 = expand_normal (arg0);
25525 icode = CODE_FOR_sse2_clflush;
25526 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25527 op0 = copy_to_mode_reg (Pmode, op0);
25529 emit_insn (gen_sse2_clflush (op0));
25532 case IX86_BUILTIN_MONITOR:
25533 arg0 = CALL_EXPR_ARG (exp, 0);
25534 arg1 = CALL_EXPR_ARG (exp, 1);
25535 arg2 = CALL_EXPR_ARG (exp, 2);
25536 op0 = expand_normal (arg0);
25537 op1 = expand_normal (arg1);
25538 op2 = expand_normal (arg2);
25540 op0 = copy_to_mode_reg (Pmode, op0);
25542 op1 = copy_to_mode_reg (SImode, op1);
25544 op2 = copy_to_mode_reg (SImode, op2);
25545 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25548 case IX86_BUILTIN_MWAIT:
25549 arg0 = CALL_EXPR_ARG (exp, 0);
25550 arg1 = CALL_EXPR_ARG (exp, 1);
25551 op0 = expand_normal (arg0);
25552 op1 = expand_normal (arg1);
25554 op0 = copy_to_mode_reg (SImode, op0);
25556 op1 = copy_to_mode_reg (SImode, op1);
25557 emit_insn (gen_sse3_mwait (op0, op1));
25560 case IX86_BUILTIN_VEC_INIT_V2SI:
25561 case IX86_BUILTIN_VEC_INIT_V4HI:
25562 case IX86_BUILTIN_VEC_INIT_V8QI:
25563 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25565 case IX86_BUILTIN_VEC_EXT_V2DF:
25566 case IX86_BUILTIN_VEC_EXT_V2DI:
25567 case IX86_BUILTIN_VEC_EXT_V4SF:
25568 case IX86_BUILTIN_VEC_EXT_V4SI:
25569 case IX86_BUILTIN_VEC_EXT_V8HI:
25570 case IX86_BUILTIN_VEC_EXT_V2SI:
25571 case IX86_BUILTIN_VEC_EXT_V4HI:
25572 case IX86_BUILTIN_VEC_EXT_V16QI:
25573 return ix86_expand_vec_ext_builtin (exp, target);
25575 case IX86_BUILTIN_VEC_SET_V2DI:
25576 case IX86_BUILTIN_VEC_SET_V4SF:
25577 case IX86_BUILTIN_VEC_SET_V4SI:
25578 case IX86_BUILTIN_VEC_SET_V8HI:
25579 case IX86_BUILTIN_VEC_SET_V4HI:
25580 case IX86_BUILTIN_VEC_SET_V16QI:
25581 return ix86_expand_vec_set_builtin (exp);
25583 case IX86_BUILTIN_INFQ:
25584 case IX86_BUILTIN_HUGE_VALQ:
25586 REAL_VALUE_TYPE inf;
25590 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25592 tmp = validize_mem (force_const_mem (mode, tmp));
25595 target = gen_reg_rtx (mode);
25597 emit_move_insn (target, tmp);
25605 for (i = 0, d = bdesc_special_args;
25606 i < ARRAY_SIZE (bdesc_special_args);
25608 if (d->code == fcode)
25609 return ix86_expand_special_args_builtin (d, exp, target);
25611 for (i = 0, d = bdesc_args;
25612 i < ARRAY_SIZE (bdesc_args);
25614 if (d->code == fcode)
25617 case IX86_BUILTIN_FABSQ:
25618 case IX86_BUILTIN_COPYSIGNQ:
25620 /* Emit a normal call if SSE2 isn't available. */
25621 return expand_call (exp, target, ignore);
25623 return ix86_expand_args_builtin (d, exp, target);
25626 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25627 if (d->code == fcode)
25628 return ix86_expand_sse_comi (d, exp, target);
25630 for (i = 0, d = bdesc_pcmpestr;
25631 i < ARRAY_SIZE (bdesc_pcmpestr);
25633 if (d->code == fcode)
25634 return ix86_expand_sse_pcmpestr (d, exp, target);
25636 for (i = 0, d = bdesc_pcmpistr;
25637 i < ARRAY_SIZE (bdesc_pcmpistr);
25639 if (d->code == fcode)
25640 return ix86_expand_sse_pcmpistr (d, exp, target);
25642 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25643 if (d->code == fcode)
25644 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25645 (enum multi_arg_type)d->flag,
25648 gcc_unreachable ();
25651 /* Returns a function decl for a vectorized version of the builtin function
25652 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25653 if it is not available. */
25656 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25659 enum machine_mode in_mode, out_mode;
25662 if (TREE_CODE (type_out) != VECTOR_TYPE
25663 || TREE_CODE (type_in) != VECTOR_TYPE)
25666 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25667 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25668 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25669 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25673 case BUILT_IN_SQRT:
25674 if (out_mode == DFmode && out_n == 2
25675 && in_mode == DFmode && in_n == 2)
25676 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25679 case BUILT_IN_SQRTF:
25680 if (out_mode == SFmode && out_n == 4
25681 && in_mode == SFmode && in_n == 4)
25682 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25685 case BUILT_IN_LRINT:
25686 if (out_mode == SImode && out_n == 4
25687 && in_mode == DFmode && in_n == 2)
25688 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25691 case BUILT_IN_LRINTF:
25692 if (out_mode == SImode && out_n == 4
25693 && in_mode == SFmode && in_n == 4)
25694 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25701 /* Dispatch to a handler for a vectorization library. */
25702 if (ix86_veclib_handler)
25703 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25709 /* Handler for an SVML-style interface to
25710 a library with vectorized intrinsics. */
25713 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25716 tree fntype, new_fndecl, args;
25719 enum machine_mode el_mode, in_mode;
25722 /* The SVML is suitable for unsafe math only. */
25723 if (!flag_unsafe_math_optimizations)
25726 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25727 n = TYPE_VECTOR_SUBPARTS (type_out);
25728 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25729 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25730 if (el_mode != in_mode
25738 case BUILT_IN_LOG10:
25740 case BUILT_IN_TANH:
25742 case BUILT_IN_ATAN:
25743 case BUILT_IN_ATAN2:
25744 case BUILT_IN_ATANH:
25745 case BUILT_IN_CBRT:
25746 case BUILT_IN_SINH:
25748 case BUILT_IN_ASINH:
25749 case BUILT_IN_ASIN:
25750 case BUILT_IN_COSH:
25752 case BUILT_IN_ACOSH:
25753 case BUILT_IN_ACOS:
25754 if (el_mode != DFmode || n != 2)
25758 case BUILT_IN_EXPF:
25759 case BUILT_IN_LOGF:
25760 case BUILT_IN_LOG10F:
25761 case BUILT_IN_POWF:
25762 case BUILT_IN_TANHF:
25763 case BUILT_IN_TANF:
25764 case BUILT_IN_ATANF:
25765 case BUILT_IN_ATAN2F:
25766 case BUILT_IN_ATANHF:
25767 case BUILT_IN_CBRTF:
25768 case BUILT_IN_SINHF:
25769 case BUILT_IN_SINF:
25770 case BUILT_IN_ASINHF:
25771 case BUILT_IN_ASINF:
25772 case BUILT_IN_COSHF:
25773 case BUILT_IN_COSF:
25774 case BUILT_IN_ACOSHF:
25775 case BUILT_IN_ACOSF:
25776 if (el_mode != SFmode || n != 4)
25784 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25786 if (fn == BUILT_IN_LOGF)
25787 strcpy (name, "vmlsLn4");
25788 else if (fn == BUILT_IN_LOG)
25789 strcpy (name, "vmldLn2");
25792 sprintf (name, "vmls%s", bname+10);
25793 name[strlen (name)-1] = '4';
25796 sprintf (name, "vmld%s2", bname+10);
25798 /* Convert to uppercase. */
25802 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25803 args = TREE_CHAIN (args))
25807 fntype = build_function_type_list (type_out, type_in, NULL);
25809 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25811 /* Build a function declaration for the vectorized function. */
25812 new_fndecl = build_decl (BUILTINS_LOCATION,
25813 FUNCTION_DECL, get_identifier (name), fntype);
25814 TREE_PUBLIC (new_fndecl) = 1;
25815 DECL_EXTERNAL (new_fndecl) = 1;
25816 DECL_IS_NOVOPS (new_fndecl) = 1;
25817 TREE_READONLY (new_fndecl) = 1;
25822 /* Handler for an ACML-style interface to
25823 a library with vectorized intrinsics. */
25826 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25828 char name[20] = "__vr.._";
25829 tree fntype, new_fndecl, args;
25832 enum machine_mode el_mode, in_mode;
25835 /* The ACML is 64bits only and suitable for unsafe math only as
25836 it does not correctly support parts of IEEE with the required
25837 precision such as denormals. */
25839 || !flag_unsafe_math_optimizations)
25842 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25843 n = TYPE_VECTOR_SUBPARTS (type_out);
25844 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25845 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25846 if (el_mode != in_mode
25856 case BUILT_IN_LOG2:
25857 case BUILT_IN_LOG10:
25860 if (el_mode != DFmode
25865 case BUILT_IN_SINF:
25866 case BUILT_IN_COSF:
25867 case BUILT_IN_EXPF:
25868 case BUILT_IN_POWF:
25869 case BUILT_IN_LOGF:
25870 case BUILT_IN_LOG2F:
25871 case BUILT_IN_LOG10F:
25874 if (el_mode != SFmode
25883 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25884 sprintf (name + 7, "%s", bname+10);
25887 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25888 args = TREE_CHAIN (args))
25892 fntype = build_function_type_list (type_out, type_in, NULL);
25894 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25896 /* Build a function declaration for the vectorized function. */
25897 new_fndecl = build_decl (BUILTINS_LOCATION,
25898 FUNCTION_DECL, get_identifier (name), fntype);
25899 TREE_PUBLIC (new_fndecl) = 1;
25900 DECL_EXTERNAL (new_fndecl) = 1;
25901 DECL_IS_NOVOPS (new_fndecl) = 1;
25902 TREE_READONLY (new_fndecl) = 1;
25908 /* Returns a decl of a function that implements conversion of an integer vector
25909 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25910 side of the conversion.
25911 Return NULL_TREE if it is not available. */
25914 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25916 if (TREE_CODE (type) != VECTOR_TYPE
25917 /* There are only conversions from/to signed integers. */
25918 || TYPE_UNSIGNED (TREE_TYPE (type)))
25924 switch (TYPE_MODE (type))
25927 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25932 case FIX_TRUNC_EXPR:
25933 switch (TYPE_MODE (type))
25936 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25946 /* Returns a code for a target-specific builtin that implements
25947 reciprocal of the function, or NULL_TREE if not available. */
25950 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25951 bool sqrt ATTRIBUTE_UNUSED)
25953 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25954 && flag_finite_math_only && !flag_trapping_math
25955 && flag_unsafe_math_optimizations))
25959 /* Machine dependent builtins. */
25962 /* Vectorized version of sqrt to rsqrt conversion. */
25963 case IX86_BUILTIN_SQRTPS_NR:
25964 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25970 /* Normal builtins. */
25973 /* Sqrt to rsqrt conversion. */
25974 case BUILT_IN_SQRTF:
25975 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25982 /* Store OPERAND to the memory after reload is completed. This means
25983 that we can't easily use assign_stack_local. */
25985 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25989 gcc_assert (reload_completed);
25990 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25992 result = gen_rtx_MEM (mode,
25993 gen_rtx_PLUS (Pmode,
25995 GEN_INT (-RED_ZONE_SIZE)));
25996 emit_move_insn (result, operand);
25998 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
26004 operand = gen_lowpart (DImode, operand);
26008 gen_rtx_SET (VOIDmode,
26009 gen_rtx_MEM (DImode,
26010 gen_rtx_PRE_DEC (DImode,
26011 stack_pointer_rtx)),
26015 gcc_unreachable ();
26017 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26026 split_di (&operand, 1, operands, operands + 1);
26028 gen_rtx_SET (VOIDmode,
26029 gen_rtx_MEM (SImode,
26030 gen_rtx_PRE_DEC (Pmode,
26031 stack_pointer_rtx)),
26034 gen_rtx_SET (VOIDmode,
26035 gen_rtx_MEM (SImode,
26036 gen_rtx_PRE_DEC (Pmode,
26037 stack_pointer_rtx)),
26042 /* Store HImodes as SImodes. */
26043 operand = gen_lowpart (SImode, operand);
26047 gen_rtx_SET (VOIDmode,
26048 gen_rtx_MEM (GET_MODE (operand),
26049 gen_rtx_PRE_DEC (SImode,
26050 stack_pointer_rtx)),
26054 gcc_unreachable ();
26056 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26061 /* Free operand from the memory. */
26063 ix86_free_from_memory (enum machine_mode mode)
26065 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
26069 if (mode == DImode || TARGET_64BIT)
26073 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26074 to pop or add instruction if registers are available. */
26075 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26076 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26081 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26082 QImode must go into class Q_REGS.
26083 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26084 movdf to do mem-to-mem moves through integer regs. */
26086 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26088 enum machine_mode mode = GET_MODE (x);
26090 /* We're only allowed to return a subclass of CLASS. Many of the
26091 following checks fail for NO_REGS, so eliminate that early. */
26092 if (regclass == NO_REGS)
26095 /* All classes can load zeros. */
26096 if (x == CONST0_RTX (mode))
26099 /* Force constants into memory if we are loading a (nonzero) constant into
26100 an MMX or SSE register. This is because there are no MMX/SSE instructions
26101 to load from a constant. */
26103 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26106 /* Prefer SSE regs only, if we can use them for math. */
26107 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26108 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26110 /* Floating-point constants need more complex checks. */
26111 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26113 /* General regs can load everything. */
26114 if (reg_class_subset_p (regclass, GENERAL_REGS))
26117 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26118 zero above. We only want to wind up preferring 80387 registers if
26119 we plan on doing computation with them. */
26121 && standard_80387_constant_p (x))
26123 /* Limit class to non-sse. */
26124 if (regclass == FLOAT_SSE_REGS)
26126 if (regclass == FP_TOP_SSE_REGS)
26128 if (regclass == FP_SECOND_SSE_REGS)
26129 return FP_SECOND_REG;
26130 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26137 /* Generally when we see PLUS here, it's the function invariant
26138 (plus soft-fp const_int). Which can only be computed into general
26140 if (GET_CODE (x) == PLUS)
26141 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26143 /* QImode constants are easy to load, but non-constant QImode data
26144 must go into Q_REGS. */
26145 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26147 if (reg_class_subset_p (regclass, Q_REGS))
26149 if (reg_class_subset_p (Q_REGS, regclass))
26157 /* Discourage putting floating-point values in SSE registers unless
26158 SSE math is being used, and likewise for the 387 registers. */
26160 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26162 enum machine_mode mode = GET_MODE (x);
26164 /* Restrict the output reload class to the register bank that we are doing
26165 math on. If we would like not to return a subset of CLASS, reject this
26166 alternative: if reload cannot do this, it will still use its choice. */
26167 mode = GET_MODE (x);
26168 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26169 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26171 if (X87_FLOAT_MODE_P (mode))
26173 if (regclass == FP_TOP_SSE_REGS)
26175 else if (regclass == FP_SECOND_SSE_REGS)
26176 return FP_SECOND_REG;
26178 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26184 static enum reg_class
26185 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
26186 enum machine_mode mode,
26187 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26189 /* QImode spills from non-QI registers require
26190 intermediate register on 32bit targets. */
26191 if (!in_p && mode == QImode && !TARGET_64BIT
26192 && (rclass == GENERAL_REGS
26193 || rclass == LEGACY_REGS
26194 || rclass == INDEX_REGS))
26203 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26204 regno = true_regnum (x);
26206 /* Return Q_REGS if the operand is in memory. */
26214 /* If we are copying between general and FP registers, we need a memory
26215 location. The same is true for SSE and MMX registers.
26217 To optimize register_move_cost performance, allow inline variant.
26219 The macro can't work reliably when one of the CLASSES is class containing
26220 registers from multiple units (SSE, MMX, integer). We avoid this by never
26221 combining those units in single alternative in the machine description.
26222 Ensure that this constraint holds to avoid unexpected surprises.
26224 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26225 enforce these sanity checks. */
26228 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26229 enum machine_mode mode, int strict)
26231 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26232 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26233 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26234 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26235 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26236 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26238 gcc_assert (!strict);
26242 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26245 /* ??? This is a lie. We do have moves between mmx/general, and for
26246 mmx/sse2. But by saying we need secondary memory we discourage the
26247 register allocator from using the mmx registers unless needed. */
26248 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26251 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26253 /* SSE1 doesn't have any direct moves from other classes. */
26257 /* If the target says that inter-unit moves are more expensive
26258 than moving through memory, then don't generate them. */
26259 if (!TARGET_INTER_UNIT_MOVES)
26262 /* Between SSE and general, we have moves no larger than word size. */
26263 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26271 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26272 enum machine_mode mode, int strict)
26274 return inline_secondary_memory_needed (class1, class2, mode, strict);
26277 /* Return true if the registers in CLASS cannot represent the change from
26278 modes FROM to TO. */
26281 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26282 enum reg_class regclass)
26287 /* x87 registers can't do subreg at all, as all values are reformatted
26288 to extended precision. */
26289 if (MAYBE_FLOAT_CLASS_P (regclass))
26292 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26294 /* Vector registers do not support QI or HImode loads. If we don't
26295 disallow a change to these modes, reload will assume it's ok to
26296 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26297 the vec_dupv4hi pattern. */
26298 if (GET_MODE_SIZE (from) < 4)
26301 /* Vector registers do not support subreg with nonzero offsets, which
26302 are otherwise valid for integer registers. Since we can't see
26303 whether we have a nonzero offset from here, prohibit all
26304 nonparadoxical subregs changing size. */
26305 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26312 /* Return the cost of moving data of mode M between a
26313 register and memory. A value of 2 is the default; this cost is
26314 relative to those in `REGISTER_MOVE_COST'.
26316 This function is used extensively by register_move_cost that is used to
26317 build tables at startup. Make it inline in this case.
26318 When IN is 2, return maximum of in and out move cost.
26320 If moving between registers and memory is more expensive than
26321 between two registers, you should define this macro to express the
26324 Model also increased moving costs of QImode registers in non
26328 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26332 if (FLOAT_CLASS_P (regclass))
26350 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26351 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26353 if (SSE_CLASS_P (regclass))
26356 switch (GET_MODE_SIZE (mode))
26371 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26372 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26374 if (MMX_CLASS_P (regclass))
26377 switch (GET_MODE_SIZE (mode))
26389 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26390 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26392 switch (GET_MODE_SIZE (mode))
26395 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26398 return ix86_cost->int_store[0];
26399 if (TARGET_PARTIAL_REG_DEPENDENCY
26400 && optimize_function_for_speed_p (cfun))
26401 cost = ix86_cost->movzbl_load;
26403 cost = ix86_cost->int_load[0];
26405 return MAX (cost, ix86_cost->int_store[0]);
26411 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26413 return ix86_cost->movzbl_load;
26415 return ix86_cost->int_store[0] + 4;
26420 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26421 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26423 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26424 if (mode == TFmode)
26427 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26429 cost = ix86_cost->int_load[2];
26431 cost = ix86_cost->int_store[2];
26432 return (cost * (((int) GET_MODE_SIZE (mode)
26433 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26438 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26440 return inline_memory_move_cost (mode, regclass, in);
26444 /* Return the cost of moving data from a register in class CLASS1 to
26445 one in class CLASS2.
26447 It is not required that the cost always equal 2 when FROM is the same as TO;
26448 on some machines it is expensive to move between registers if they are not
26449 general registers. */
26452 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26453 enum reg_class class2)
26455 /* In case we require secondary memory, compute cost of the store followed
26456 by load. In order to avoid bad register allocation choices, we need
26457 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26459 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26463 cost += inline_memory_move_cost (mode, class1, 2);
26464 cost += inline_memory_move_cost (mode, class2, 2);
26466 /* In case of copying from general_purpose_register we may emit multiple
26467 stores followed by single load causing memory size mismatch stall.
26468 Count this as arbitrarily high cost of 20. */
26469 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26472 /* In the case of FP/MMX moves, the registers actually overlap, and we
26473 have to switch modes in order to treat them differently. */
26474 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26475 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26481 /* Moves between SSE/MMX and integer unit are expensive. */
26482 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26483 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26485 /* ??? By keeping returned value relatively high, we limit the number
26486 of moves between integer and MMX/SSE registers for all targets.
26487 Additionally, high value prevents problem with x86_modes_tieable_p(),
26488 where integer modes in MMX/SSE registers are not tieable
26489 because of missing QImode and HImode moves to, from or between
26490 MMX/SSE registers. */
26491 return MAX (8, ix86_cost->mmxsse_to_integer);
26493 if (MAYBE_FLOAT_CLASS_P (class1))
26494 return ix86_cost->fp_move;
26495 if (MAYBE_SSE_CLASS_P (class1))
26496 return ix86_cost->sse_move;
26497 if (MAYBE_MMX_CLASS_P (class1))
26498 return ix86_cost->mmx_move;
26502 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26505 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26507 /* Flags and only flags can only hold CCmode values. */
26508 if (CC_REGNO_P (regno))
26509 return GET_MODE_CLASS (mode) == MODE_CC;
26510 if (GET_MODE_CLASS (mode) == MODE_CC
26511 || GET_MODE_CLASS (mode) == MODE_RANDOM
26512 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26514 if (FP_REGNO_P (regno))
26515 return VALID_FP_MODE_P (mode);
26516 if (SSE_REGNO_P (regno))
26518 /* We implement the move patterns for all vector modes into and
26519 out of SSE registers, even when no operation instructions
26520 are available. OImode move is available only when AVX is
26522 return ((TARGET_AVX && mode == OImode)
26523 || VALID_AVX256_REG_MODE (mode)
26524 || VALID_SSE_REG_MODE (mode)
26525 || VALID_SSE2_REG_MODE (mode)
26526 || VALID_MMX_REG_MODE (mode)
26527 || VALID_MMX_REG_MODE_3DNOW (mode));
26529 if (MMX_REGNO_P (regno))
26531 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26532 so if the register is available at all, then we can move data of
26533 the given mode into or out of it. */
26534 return (VALID_MMX_REG_MODE (mode)
26535 || VALID_MMX_REG_MODE_3DNOW (mode));
26538 if (mode == QImode)
26540 /* Take care for QImode values - they can be in non-QI regs,
26541 but then they do cause partial register stalls. */
26542 if (regno <= BX_REG || TARGET_64BIT)
26544 if (!TARGET_PARTIAL_REG_STALL)
26546 return reload_in_progress || reload_completed;
26548 /* We handle both integer and floats in the general purpose registers. */
26549 else if (VALID_INT_MODE_P (mode))
26551 else if (VALID_FP_MODE_P (mode))
26553 else if (VALID_DFP_MODE_P (mode))
26555 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26556 on to use that value in smaller contexts, this can easily force a
26557 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26558 supporting DImode, allow it. */
26559 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26565 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26566 tieable integer mode. */
26569 ix86_tieable_integer_mode_p (enum machine_mode mode)
26578 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26581 return TARGET_64BIT;
26588 /* Return true if MODE1 is accessible in a register that can hold MODE2
26589 without copying. That is, all register classes that can hold MODE2
26590 can also hold MODE1. */
26593 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26595 if (mode1 == mode2)
26598 if (ix86_tieable_integer_mode_p (mode1)
26599 && ix86_tieable_integer_mode_p (mode2))
26602 /* MODE2 being XFmode implies fp stack or general regs, which means we
26603 can tie any smaller floating point modes to it. Note that we do not
26604 tie this with TFmode. */
26605 if (mode2 == XFmode)
26606 return mode1 == SFmode || mode1 == DFmode;
26608 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26609 that we can tie it with SFmode. */
26610 if (mode2 == DFmode)
26611 return mode1 == SFmode;
26613 /* If MODE2 is only appropriate for an SSE register, then tie with
26614 any other mode acceptable to SSE registers. */
26615 if (GET_MODE_SIZE (mode2) == 16
26616 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26617 return (GET_MODE_SIZE (mode1) == 16
26618 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26620 /* If MODE2 is appropriate for an MMX register, then tie
26621 with any other mode acceptable to MMX registers. */
26622 if (GET_MODE_SIZE (mode2) == 8
26623 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26624 return (GET_MODE_SIZE (mode1) == 8
26625 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26630 /* Compute a (partial) cost for rtx X. Return true if the complete
26631 cost has been computed, and false if subexpressions should be
26632 scanned. In either case, *TOTAL contains the cost result. */
26635 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26637 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26638 enum machine_mode mode = GET_MODE (x);
26639 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26647 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26649 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26651 else if (flag_pic && SYMBOLIC_CONST (x)
26653 || (!GET_CODE (x) != LABEL_REF
26654 && (GET_CODE (x) != SYMBOL_REF
26655 || !SYMBOL_REF_LOCAL_P (x)))))
26662 if (mode == VOIDmode)
26665 switch (standard_80387_constant_p (x))
26670 default: /* Other constants */
26675 /* Start with (MEM (SYMBOL_REF)), since that's where
26676 it'll probably end up. Add a penalty for size. */
26677 *total = (COSTS_N_INSNS (1)
26678 + (flag_pic != 0 && !TARGET_64BIT)
26679 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26685 /* The zero extensions is often completely free on x86_64, so make
26686 it as cheap as possible. */
26687 if (TARGET_64BIT && mode == DImode
26688 && GET_MODE (XEXP (x, 0)) == SImode)
26690 else if (TARGET_ZERO_EXTEND_WITH_AND)
26691 *total = cost->add;
26693 *total = cost->movzx;
26697 *total = cost->movsx;
26701 if (CONST_INT_P (XEXP (x, 1))
26702 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26704 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26707 *total = cost->add;
26710 if ((value == 2 || value == 3)
26711 && cost->lea <= cost->shift_const)
26713 *total = cost->lea;
26723 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26725 if (CONST_INT_P (XEXP (x, 1)))
26727 if (INTVAL (XEXP (x, 1)) > 32)
26728 *total = cost->shift_const + COSTS_N_INSNS (2);
26730 *total = cost->shift_const * 2;
26734 if (GET_CODE (XEXP (x, 1)) == AND)
26735 *total = cost->shift_var * 2;
26737 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26742 if (CONST_INT_P (XEXP (x, 1)))
26743 *total = cost->shift_const;
26745 *total = cost->shift_var;
26750 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26752 /* ??? SSE scalar cost should be used here. */
26753 *total = cost->fmul;
26756 else if (X87_FLOAT_MODE_P (mode))
26758 *total = cost->fmul;
26761 else if (FLOAT_MODE_P (mode))
26763 /* ??? SSE vector cost should be used here. */
26764 *total = cost->fmul;
26769 rtx op0 = XEXP (x, 0);
26770 rtx op1 = XEXP (x, 1);
26772 if (CONST_INT_P (XEXP (x, 1)))
26774 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26775 for (nbits = 0; value != 0; value &= value - 1)
26779 /* This is arbitrary. */
26782 /* Compute costs correctly for widening multiplication. */
26783 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26784 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26785 == GET_MODE_SIZE (mode))
26787 int is_mulwiden = 0;
26788 enum machine_mode inner_mode = GET_MODE (op0);
26790 if (GET_CODE (op0) == GET_CODE (op1))
26791 is_mulwiden = 1, op1 = XEXP (op1, 0);
26792 else if (CONST_INT_P (op1))
26794 if (GET_CODE (op0) == SIGN_EXTEND)
26795 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26798 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26802 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26805 *total = (cost->mult_init[MODE_INDEX (mode)]
26806 + nbits * cost->mult_bit
26807 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26816 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26817 /* ??? SSE cost should be used here. */
26818 *total = cost->fdiv;
26819 else if (X87_FLOAT_MODE_P (mode))
26820 *total = cost->fdiv;
26821 else if (FLOAT_MODE_P (mode))
26822 /* ??? SSE vector cost should be used here. */
26823 *total = cost->fdiv;
26825 *total = cost->divide[MODE_INDEX (mode)];
26829 if (GET_MODE_CLASS (mode) == MODE_INT
26830 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26832 if (GET_CODE (XEXP (x, 0)) == PLUS
26833 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26834 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26835 && CONSTANT_P (XEXP (x, 1)))
26837 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26838 if (val == 2 || val == 4 || val == 8)
26840 *total = cost->lea;
26841 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26842 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26843 outer_code, speed);
26844 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26848 else if (GET_CODE (XEXP (x, 0)) == MULT
26849 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26851 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26852 if (val == 2 || val == 4 || val == 8)
26854 *total = cost->lea;
26855 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26856 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26860 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26862 *total = cost->lea;
26863 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26864 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26865 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26872 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26874 /* ??? SSE cost should be used here. */
26875 *total = cost->fadd;
26878 else if (X87_FLOAT_MODE_P (mode))
26880 *total = cost->fadd;
26883 else if (FLOAT_MODE_P (mode))
26885 /* ??? SSE vector cost should be used here. */
26886 *total = cost->fadd;
26894 if (!TARGET_64BIT && mode == DImode)
26896 *total = (cost->add * 2
26897 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26898 << (GET_MODE (XEXP (x, 0)) != DImode))
26899 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26900 << (GET_MODE (XEXP (x, 1)) != DImode)));
26906 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26908 /* ??? SSE cost should be used here. */
26909 *total = cost->fchs;
26912 else if (X87_FLOAT_MODE_P (mode))
26914 *total = cost->fchs;
26917 else if (FLOAT_MODE_P (mode))
26919 /* ??? SSE vector cost should be used here. */
26920 *total = cost->fchs;
26926 if (!TARGET_64BIT && mode == DImode)
26927 *total = cost->add * 2;
26929 *total = cost->add;
26933 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26934 && XEXP (XEXP (x, 0), 1) == const1_rtx
26935 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26936 && XEXP (x, 1) == const0_rtx)
26938 /* This kind of construct is implemented using test[bwl].
26939 Treat it as if we had an AND. */
26940 *total = (cost->add
26941 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26942 + rtx_cost (const1_rtx, outer_code, speed));
26948 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26953 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26954 /* ??? SSE cost should be used here. */
26955 *total = cost->fabs;
26956 else if (X87_FLOAT_MODE_P (mode))
26957 *total = cost->fabs;
26958 else if (FLOAT_MODE_P (mode))
26959 /* ??? SSE vector cost should be used here. */
26960 *total = cost->fabs;
26964 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26965 /* ??? SSE cost should be used here. */
26966 *total = cost->fsqrt;
26967 else if (X87_FLOAT_MODE_P (mode))
26968 *total = cost->fsqrt;
26969 else if (FLOAT_MODE_P (mode))
26970 /* ??? SSE vector cost should be used here. */
26971 *total = cost->fsqrt;
26975 if (XINT (x, 1) == UNSPEC_TP)
26986 static int current_machopic_label_num;
26988 /* Given a symbol name and its associated stub, write out the
26989 definition of the stub. */
26992 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26994 unsigned int length;
26995 char *binder_name, *symbol_name, lazy_ptr_name[32];
26996 int label = ++current_machopic_label_num;
26998 /* For 64-bit we shouldn't get here. */
26999 gcc_assert (!TARGET_64BIT);
27001 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27002 symb = (*targetm.strip_name_encoding) (symb);
27004 length = strlen (stub);
27005 binder_name = XALLOCAVEC (char, length + 32);
27006 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27008 length = strlen (symb);
27009 symbol_name = XALLOCAVEC (char, length + 32);
27010 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27012 sprintf (lazy_ptr_name, "L%d$lz", label);
27015 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27017 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27019 fprintf (file, "%s:\n", stub);
27020 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27024 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27025 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27026 fprintf (file, "\tjmp\t*%%edx\n");
27029 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27031 fprintf (file, "%s:\n", binder_name);
27035 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27036 fprintf (file, "\tpushl\t%%eax\n");
27039 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27041 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
27043 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27044 fprintf (file, "%s:\n", lazy_ptr_name);
27045 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27046 fprintf (file, "\t.long %s\n", binder_name);
27050 darwin_x86_file_end (void)
27052 darwin_file_end ();
27055 #endif /* TARGET_MACHO */
27057 /* Order the registers for register allocator. */
27060 x86_order_regs_for_local_alloc (void)
27065 /* First allocate the local general purpose registers. */
27066 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27067 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27068 reg_alloc_order [pos++] = i;
27070 /* Global general purpose registers. */
27071 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27072 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27073 reg_alloc_order [pos++] = i;
27075 /* x87 registers come first in case we are doing FP math
27077 if (!TARGET_SSE_MATH)
27078 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27079 reg_alloc_order [pos++] = i;
27081 /* SSE registers. */
27082 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27083 reg_alloc_order [pos++] = i;
27084 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27085 reg_alloc_order [pos++] = i;
27087 /* x87 registers. */
27088 if (TARGET_SSE_MATH)
27089 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27090 reg_alloc_order [pos++] = i;
27092 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27093 reg_alloc_order [pos++] = i;
27095 /* Initialize the rest of array as we do not allocate some registers
27097 while (pos < FIRST_PSEUDO_REGISTER)
27098 reg_alloc_order [pos++] = 0;
27101 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27102 struct attribute_spec.handler. */
27104 ix86_handle_abi_attribute (tree *node, tree name,
27105 tree args ATTRIBUTE_UNUSED,
27106 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27108 if (TREE_CODE (*node) != FUNCTION_TYPE
27109 && TREE_CODE (*node) != METHOD_TYPE
27110 && TREE_CODE (*node) != FIELD_DECL
27111 && TREE_CODE (*node) != TYPE_DECL)
27113 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27115 *no_add_attrs = true;
27120 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27122 *no_add_attrs = true;
27126 /* Can combine regparm with all attributes but fastcall. */
27127 if (is_attribute_p ("ms_abi", name))
27129 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27131 error ("ms_abi and sysv_abi attributes are not compatible");
27136 else if (is_attribute_p ("sysv_abi", name))
27138 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27140 error ("ms_abi and sysv_abi attributes are not compatible");
27149 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27150 struct attribute_spec.handler. */
27152 ix86_handle_struct_attribute (tree *node, tree name,
27153 tree args ATTRIBUTE_UNUSED,
27154 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27157 if (DECL_P (*node))
27159 if (TREE_CODE (*node) == TYPE_DECL)
27160 type = &TREE_TYPE (*node);
27165 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27166 || TREE_CODE (*type) == UNION_TYPE)))
27168 warning (OPT_Wattributes, "%qE attribute ignored",
27170 *no_add_attrs = true;
27173 else if ((is_attribute_p ("ms_struct", name)
27174 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27175 || ((is_attribute_p ("gcc_struct", name)
27176 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27178 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27180 *no_add_attrs = true;
27187 ix86_ms_bitfield_layout_p (const_tree record_type)
27189 return (TARGET_MS_BITFIELD_LAYOUT &&
27190 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27191 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27194 /* Returns an expression indicating where the this parameter is
27195 located on entry to the FUNCTION. */
27198 x86_this_parameter (tree function)
27200 tree type = TREE_TYPE (function);
27201 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27206 const int *parm_regs;
27208 if (ix86_function_type_abi (type) == MS_ABI)
27209 parm_regs = x86_64_ms_abi_int_parameter_registers;
27211 parm_regs = x86_64_int_parameter_registers;
27212 return gen_rtx_REG (DImode, parm_regs[aggr]);
27215 nregs = ix86_function_regparm (type, function);
27217 if (nregs > 0 && !stdarg_p (type))
27221 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27222 regno = aggr ? DX_REG : CX_REG;
27230 return gen_rtx_MEM (SImode,
27231 plus_constant (stack_pointer_rtx, 4));
27234 return gen_rtx_REG (SImode, regno);
27237 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27240 /* Determine whether x86_output_mi_thunk can succeed. */
27243 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27244 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27245 HOST_WIDE_INT vcall_offset, const_tree function)
27247 /* 64-bit can handle anything. */
27251 /* For 32-bit, everything's fine if we have one free register. */
27252 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27255 /* Need a free register for vcall_offset. */
27259 /* Need a free register for GOT references. */
27260 if (flag_pic && !(*targetm.binds_local_p) (function))
27263 /* Otherwise ok. */
27267 /* Output the assembler code for a thunk function. THUNK_DECL is the
27268 declaration for the thunk function itself, FUNCTION is the decl for
27269 the target function. DELTA is an immediate constant offset to be
27270 added to THIS. If VCALL_OFFSET is nonzero, the word at
27271 *(*this + vcall_offset) should be added to THIS. */
27274 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27275 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27276 HOST_WIDE_INT vcall_offset, tree function)
27279 rtx this_param = x86_this_parameter (function);
27282 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27283 pull it in now and let DELTA benefit. */
27284 if (REG_P (this_param))
27285 this_reg = this_param;
27286 else if (vcall_offset)
27288 /* Put the this parameter into %eax. */
27289 xops[0] = this_param;
27290 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27291 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27294 this_reg = NULL_RTX;
27296 /* Adjust the this parameter by a fixed constant. */
27299 xops[0] = GEN_INT (delta);
27300 xops[1] = this_reg ? this_reg : this_param;
27303 if (!x86_64_general_operand (xops[0], DImode))
27305 tmp = gen_rtx_REG (DImode, R10_REG);
27307 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27309 xops[1] = this_param;
27311 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27314 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27317 /* Adjust the this parameter by a value stored in the vtable. */
27321 tmp = gen_rtx_REG (DImode, R10_REG);
27324 int tmp_regno = CX_REG;
27325 if (lookup_attribute ("fastcall",
27326 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27327 tmp_regno = AX_REG;
27328 tmp = gen_rtx_REG (SImode, tmp_regno);
27331 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27333 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27335 /* Adjust the this parameter. */
27336 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27337 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27339 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27340 xops[0] = GEN_INT (vcall_offset);
27342 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27343 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27345 xops[1] = this_reg;
27346 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27349 /* If necessary, drop THIS back to its stack slot. */
27350 if (this_reg && this_reg != this_param)
27352 xops[0] = this_reg;
27353 xops[1] = this_param;
27354 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27357 xops[0] = XEXP (DECL_RTL (function), 0);
27360 if (!flag_pic || (*targetm.binds_local_p) (function))
27361 output_asm_insn ("jmp\t%P0", xops);
27362 /* All thunks should be in the same object as their target,
27363 and thus binds_local_p should be true. */
27364 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27365 gcc_unreachable ();
27368 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27369 tmp = gen_rtx_CONST (Pmode, tmp);
27370 tmp = gen_rtx_MEM (QImode, tmp);
27372 output_asm_insn ("jmp\t%A0", xops);
27377 if (!flag_pic || (*targetm.binds_local_p) (function))
27378 output_asm_insn ("jmp\t%P0", xops);
27383 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27384 tmp = (gen_rtx_SYMBOL_REF
27386 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27387 tmp = gen_rtx_MEM (QImode, tmp);
27389 output_asm_insn ("jmp\t%0", xops);
27392 #endif /* TARGET_MACHO */
27394 tmp = gen_rtx_REG (SImode, CX_REG);
27395 output_set_got (tmp, NULL_RTX);
27398 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27399 output_asm_insn ("jmp\t{*}%1", xops);
27405 x86_file_start (void)
27407 default_file_start ();
27409 darwin_file_start ();
27411 if (X86_FILE_START_VERSION_DIRECTIVE)
27412 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27413 if (X86_FILE_START_FLTUSED)
27414 fputs ("\t.global\t__fltused\n", asm_out_file);
27415 if (ix86_asm_dialect == ASM_INTEL)
27416 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27420 x86_field_alignment (tree field, int computed)
27422 enum machine_mode mode;
27423 tree type = TREE_TYPE (field);
27425 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27427 mode = TYPE_MODE (strip_array_types (type));
27428 if (mode == DFmode || mode == DCmode
27429 || GET_MODE_CLASS (mode) == MODE_INT
27430 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27431 return MIN (32, computed);
27435 /* Output assembler code to FILE to increment profiler label # LABELNO
27436 for profiling a function entry. */
27438 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27442 #ifndef NO_PROFILE_COUNTERS
27443 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27446 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27447 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27449 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27453 #ifndef NO_PROFILE_COUNTERS
27454 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27455 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27457 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27461 #ifndef NO_PROFILE_COUNTERS
27462 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27463 PROFILE_COUNT_REGISTER);
27465 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27469 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27470 /* We don't have exact information about the insn sizes, but we may assume
27471 quite safely that we are informed about all 1 byte insns and memory
27472 address sizes. This is enough to eliminate unnecessary padding in
27476 min_insn_size (rtx insn)
27480 if (!INSN_P (insn) || !active_insn_p (insn))
27483 /* Discard alignments we've emit and jump instructions. */
27484 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27485 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27487 if (JUMP_TABLE_DATA_P (insn))
27490 /* Important case - calls are always 5 bytes.
27491 It is common to have many calls in the row. */
27493 && symbolic_reference_mentioned_p (PATTERN (insn))
27494 && !SIBLING_CALL_P (insn))
27496 len = get_attr_length (insn);
27500 /* For normal instructions we rely on get_attr_length being exact,
27501 with a few exceptions. */
27502 if (!JUMP_P (insn))
27504 enum attr_type type = get_attr_type (insn);
27509 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27510 || asm_noperands (PATTERN (insn)) >= 0)
27517 /* Otherwise trust get_attr_length. */
27521 l = get_attr_length_address (insn);
27522 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27531 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27535 ix86_avoid_jump_mispredicts (void)
27537 rtx insn, start = get_insns ();
27538 int nbytes = 0, njumps = 0;
27541 /* Look for all minimal intervals of instructions containing 4 jumps.
27542 The intervals are bounded by START and INSN. NBYTES is the total
27543 size of instructions in the interval including INSN and not including
27544 START. When the NBYTES is smaller than 16 bytes, it is possible
27545 that the end of START and INSN ends up in the same 16byte page.
27547 The smallest offset in the page INSN can start is the case where START
27548 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27549 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27551 for (insn = start; insn; insn = NEXT_INSN (insn))
27555 if (LABEL_P (insn))
27557 int align = label_to_alignment (insn);
27558 int max_skip = label_to_max_skip (insn);
27562 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27563 already in the current 16 byte page, because otherwise
27564 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27565 bytes to reach 16 byte boundary. */
27567 || (align <= 3 && max_skip != (1 << align) - 1))
27570 fprintf (dump_file, "Label %i with max_skip %i\n",
27571 INSN_UID (insn), max_skip);
27574 while (nbytes + max_skip >= 16)
27576 start = NEXT_INSN (start);
27577 if ((JUMP_P (start)
27578 && GET_CODE (PATTERN (start)) != ADDR_VEC
27579 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27581 njumps--, isjump = 1;
27584 nbytes -= min_insn_size (start);
27590 min_size = min_insn_size (insn);
27591 nbytes += min_size;
27593 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27594 INSN_UID (insn), min_size);
27596 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27597 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27605 start = NEXT_INSN (start);
27606 if ((JUMP_P (start)
27607 && GET_CODE (PATTERN (start)) != ADDR_VEC
27608 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27610 njumps--, isjump = 1;
27613 nbytes -= min_insn_size (start);
27615 gcc_assert (njumps >= 0);
27617 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27618 INSN_UID (start), INSN_UID (insn), nbytes);
27620 if (njumps == 3 && isjump && nbytes < 16)
27622 int padsize = 15 - nbytes + min_insn_size (insn);
27625 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27626 INSN_UID (insn), padsize);
27627 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27633 /* AMD Athlon works faster
27634 when RET is not destination of conditional jump or directly preceded
27635 by other jump instruction. We avoid the penalty by inserting NOP just
27636 before the RET instructions in such cases. */
27638 ix86_pad_returns (void)
27643 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27645 basic_block bb = e->src;
27646 rtx ret = BB_END (bb);
27648 bool replace = false;
27650 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27651 || optimize_bb_for_size_p (bb))
27653 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27654 if (active_insn_p (prev) || LABEL_P (prev))
27656 if (prev && LABEL_P (prev))
27661 FOR_EACH_EDGE (e, ei, bb->preds)
27662 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27663 && !(e->flags & EDGE_FALLTHRU))
27668 prev = prev_active_insn (ret);
27670 && ((JUMP_P (prev) && any_condjump_p (prev))
27673 /* Empty functions get branch mispredict even when the jump destination
27674 is not visible to us. */
27675 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27680 emit_jump_insn_before (gen_return_internal_long (), ret);
27686 /* Implement machine specific optimizations. We implement padding of returns
27687 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27691 if (optimize && optimize_function_for_speed_p (cfun))
27693 if (TARGET_PAD_RETURNS)
27694 ix86_pad_returns ();
27695 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27696 if (TARGET_FOUR_JUMP_LIMIT)
27697 ix86_avoid_jump_mispredicts ();
27702 /* Return nonzero when QImode register that must be represented via REX prefix
27705 x86_extended_QIreg_mentioned_p (rtx insn)
27708 extract_insn_cached (insn);
27709 for (i = 0; i < recog_data.n_operands; i++)
27710 if (REG_P (recog_data.operand[i])
27711 && REGNO (recog_data.operand[i]) > BX_REG)
27716 /* Return nonzero when P points to register encoded via REX prefix.
27717 Called via for_each_rtx. */
27719 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27721 unsigned int regno;
27724 regno = REGNO (*p);
27725 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27728 /* Return true when INSN mentions register that must be encoded using REX
27731 x86_extended_reg_mentioned_p (rtx insn)
27733 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27734 extended_reg_mentioned_1, NULL);
27737 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27738 optabs would emit if we didn't have TFmode patterns. */
27741 x86_emit_floatuns (rtx operands[2])
27743 rtx neglab, donelab, i0, i1, f0, in, out;
27744 enum machine_mode mode, inmode;
27746 inmode = GET_MODE (operands[1]);
27747 gcc_assert (inmode == SImode || inmode == DImode);
27750 in = force_reg (inmode, operands[1]);
27751 mode = GET_MODE (out);
27752 neglab = gen_label_rtx ();
27753 donelab = gen_label_rtx ();
27754 f0 = gen_reg_rtx (mode);
27756 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27758 expand_float (out, in, 0);
27760 emit_jump_insn (gen_jump (donelab));
27763 emit_label (neglab);
27765 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27767 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27769 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27771 expand_float (f0, i0, 0);
27773 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27775 emit_label (donelab);
27778 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27779 with all elements equal to VAR. Return true if successful. */
27782 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27783 rtx target, rtx val)
27785 enum machine_mode hmode, smode, wsmode, wvmode;
27800 val = force_reg (GET_MODE_INNER (mode), val);
27801 x = gen_rtx_VEC_DUPLICATE (mode, val);
27802 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27808 if (TARGET_SSE || TARGET_3DNOW_A)
27810 val = gen_lowpart (SImode, val);
27811 x = gen_rtx_TRUNCATE (HImode, val);
27812 x = gen_rtx_VEC_DUPLICATE (mode, x);
27813 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27835 /* Extend HImode to SImode using a paradoxical SUBREG. */
27836 tmp1 = gen_reg_rtx (SImode);
27837 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27838 /* Insert the SImode value as low element of V4SImode vector. */
27839 tmp2 = gen_reg_rtx (V4SImode);
27840 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27841 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27842 CONST0_RTX (V4SImode),
27844 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27845 /* Cast the V4SImode vector back to a V8HImode vector. */
27846 tmp1 = gen_reg_rtx (V8HImode);
27847 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27848 /* Duplicate the low short through the whole low SImode word. */
27849 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27850 /* Cast the V8HImode vector back to a V4SImode vector. */
27851 tmp2 = gen_reg_rtx (V4SImode);
27852 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27853 /* Replicate the low element of the V4SImode vector. */
27854 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27855 /* Cast the V2SImode back to V8HImode, and store in target. */
27856 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27867 /* Extend QImode to SImode using a paradoxical SUBREG. */
27868 tmp1 = gen_reg_rtx (SImode);
27869 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27870 /* Insert the SImode value as low element of V4SImode vector. */
27871 tmp2 = gen_reg_rtx (V4SImode);
27872 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27873 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27874 CONST0_RTX (V4SImode),
27876 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27877 /* Cast the V4SImode vector back to a V16QImode vector. */
27878 tmp1 = gen_reg_rtx (V16QImode);
27879 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27880 /* Duplicate the low byte through the whole low SImode word. */
27881 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27882 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27883 /* Cast the V16QImode vector back to a V4SImode vector. */
27884 tmp2 = gen_reg_rtx (V4SImode);
27885 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27886 /* Replicate the low element of the V4SImode vector. */
27887 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27888 /* Cast the V2SImode back to V16QImode, and store in target. */
27889 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27897 /* Replicate the value once into the next wider mode and recurse. */
27898 val = convert_modes (wsmode, smode, val, true);
27899 x = expand_simple_binop (wsmode, ASHIFT, val,
27900 GEN_INT (GET_MODE_BITSIZE (smode)),
27901 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27902 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27904 x = gen_reg_rtx (wvmode);
27905 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27906 gcc_unreachable ();
27907 emit_move_insn (target, gen_lowpart (mode, x));
27930 rtx tmp = gen_reg_rtx (hmode);
27931 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27932 emit_insn (gen_rtx_SET (VOIDmode, target,
27933 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27942 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27943 whose ONE_VAR element is VAR, and other elements are zero. Return true
27947 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27948 rtx target, rtx var, int one_var)
27950 enum machine_mode vsimode;
27953 bool use_vector_set = false;
27958 /* For SSE4.1, we normally use vector set. But if the second
27959 element is zero and inter-unit moves are OK, we use movq
27961 use_vector_set = (TARGET_64BIT
27963 && !(TARGET_INTER_UNIT_MOVES
27969 use_vector_set = TARGET_SSE4_1;
27972 use_vector_set = TARGET_SSE2;
27975 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27982 use_vector_set = TARGET_AVX;
27985 /* Use ix86_expand_vector_set in 64bit mode only. */
27986 use_vector_set = TARGET_AVX && TARGET_64BIT;
27992 if (use_vector_set)
27994 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27995 var = force_reg (GET_MODE_INNER (mode), var);
27996 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28012 var = force_reg (GET_MODE_INNER (mode), var);
28013 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28014 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28019 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28020 new_target = gen_reg_rtx (mode);
28022 new_target = target;
28023 var = force_reg (GET_MODE_INNER (mode), var);
28024 x = gen_rtx_VEC_DUPLICATE (mode, var);
28025 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28026 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28029 /* We need to shuffle the value to the correct position, so
28030 create a new pseudo to store the intermediate result. */
28032 /* With SSE2, we can use the integer shuffle insns. */
28033 if (mode != V4SFmode && TARGET_SSE2)
28035 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28037 GEN_INT (one_var == 1 ? 0 : 1),
28038 GEN_INT (one_var == 2 ? 0 : 1),
28039 GEN_INT (one_var == 3 ? 0 : 1)));
28040 if (target != new_target)
28041 emit_move_insn (target, new_target);
28045 /* Otherwise convert the intermediate result to V4SFmode and
28046 use the SSE1 shuffle instructions. */
28047 if (mode != V4SFmode)
28049 tmp = gen_reg_rtx (V4SFmode);
28050 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28055 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28057 GEN_INT (one_var == 1 ? 0 : 1),
28058 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28059 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28061 if (mode != V4SFmode)
28062 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28063 else if (tmp != target)
28064 emit_move_insn (target, tmp);
28066 else if (target != new_target)
28067 emit_move_insn (target, new_target);
28072 vsimode = V4SImode;
28078 vsimode = V2SImode;
28084 /* Zero extend the variable element to SImode and recurse. */
28085 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28087 x = gen_reg_rtx (vsimode);
28088 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28090 gcc_unreachable ();
28092 emit_move_insn (target, gen_lowpart (mode, x));
28100 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28101 consisting of the values in VALS. It is known that all elements
28102 except ONE_VAR are constants. Return true if successful. */
28105 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28106 rtx target, rtx vals, int one_var)
28108 rtx var = XVECEXP (vals, 0, one_var);
28109 enum machine_mode wmode;
28112 const_vec = copy_rtx (vals);
28113 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28114 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28122 /* For the two element vectors, it's just as easy to use
28123 the general case. */
28127 /* Use ix86_expand_vector_set in 64bit mode only. */
28150 /* There's no way to set one QImode entry easily. Combine
28151 the variable value with its adjacent constant value, and
28152 promote to an HImode set. */
28153 x = XVECEXP (vals, 0, one_var ^ 1);
28156 var = convert_modes (HImode, QImode, var, true);
28157 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28158 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28159 x = GEN_INT (INTVAL (x) & 0xff);
28163 var = convert_modes (HImode, QImode, var, true);
28164 x = gen_int_mode (INTVAL (x) << 8, HImode);
28166 if (x != const0_rtx)
28167 var = expand_simple_binop (HImode, IOR, var, x, var,
28168 1, OPTAB_LIB_WIDEN);
28170 x = gen_reg_rtx (wmode);
28171 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28172 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28174 emit_move_insn (target, gen_lowpart (mode, x));
28181 emit_move_insn (target, const_vec);
28182 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28186 /* A subroutine of ix86_expand_vector_init_general. Use vector
28187 concatenate to handle the most general case: all values variable,
28188 and none identical. */
28191 ix86_expand_vector_init_concat (enum machine_mode mode,
28192 rtx target, rtx *ops, int n)
28194 enum machine_mode cmode, hmode = VOIDmode;
28195 rtx first[8], second[4];
28235 gcc_unreachable ();
28238 if (!register_operand (ops[1], cmode))
28239 ops[1] = force_reg (cmode, ops[1]);
28240 if (!register_operand (ops[0], cmode))
28241 ops[0] = force_reg (cmode, ops[0]);
28242 emit_insn (gen_rtx_SET (VOIDmode, target,
28243 gen_rtx_VEC_CONCAT (mode, ops[0],
28263 gcc_unreachable ();
28279 gcc_unreachable ();
28284 /* FIXME: We process inputs backward to help RA. PR 36222. */
28287 for (; i > 0; i -= 2, j--)
28289 first[j] = gen_reg_rtx (cmode);
28290 v = gen_rtvec (2, ops[i - 1], ops[i]);
28291 ix86_expand_vector_init (false, first[j],
28292 gen_rtx_PARALLEL (cmode, v));
28298 gcc_assert (hmode != VOIDmode);
28299 for (i = j = 0; i < n; i += 2, j++)
28301 second[j] = gen_reg_rtx (hmode);
28302 ix86_expand_vector_init_concat (hmode, second [j],
28306 ix86_expand_vector_init_concat (mode, target, second, n);
28309 ix86_expand_vector_init_concat (mode, target, first, n);
28313 gcc_unreachable ();
28317 /* A subroutine of ix86_expand_vector_init_general. Use vector
28318 interleave to handle the most general case: all values variable,
28319 and none identical. */
28322 ix86_expand_vector_init_interleave (enum machine_mode mode,
28323 rtx target, rtx *ops, int n)
28325 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28328 rtx (*gen_load_even) (rtx, rtx, rtx);
28329 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28330 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28335 gen_load_even = gen_vec_setv8hi;
28336 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28337 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28338 inner_mode = HImode;
28339 first_imode = V4SImode;
28340 second_imode = V2DImode;
28341 third_imode = VOIDmode;
28344 gen_load_even = gen_vec_setv16qi;
28345 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28346 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28347 inner_mode = QImode;
28348 first_imode = V8HImode;
28349 second_imode = V4SImode;
28350 third_imode = V2DImode;
28353 gcc_unreachable ();
28356 for (i = 0; i < n; i++)
28358 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28359 op0 = gen_reg_rtx (SImode);
28360 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28362 /* Insert the SImode value as low element of V4SImode vector. */
28363 op1 = gen_reg_rtx (V4SImode);
28364 op0 = gen_rtx_VEC_MERGE (V4SImode,
28365 gen_rtx_VEC_DUPLICATE (V4SImode,
28367 CONST0_RTX (V4SImode),
28369 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28371 /* Cast the V4SImode vector back to a vector in orignal mode. */
28372 op0 = gen_reg_rtx (mode);
28373 emit_move_insn (op0, gen_lowpart (mode, op1));
28375 /* Load even elements into the second positon. */
28376 emit_insn ((*gen_load_even) (op0,
28377 force_reg (inner_mode,
28381 /* Cast vector to FIRST_IMODE vector. */
28382 ops[i] = gen_reg_rtx (first_imode);
28383 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28386 /* Interleave low FIRST_IMODE vectors. */
28387 for (i = j = 0; i < n; i += 2, j++)
28389 op0 = gen_reg_rtx (first_imode);
28390 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28392 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28393 ops[j] = gen_reg_rtx (second_imode);
28394 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28397 /* Interleave low SECOND_IMODE vectors. */
28398 switch (second_imode)
28401 for (i = j = 0; i < n / 2; i += 2, j++)
28403 op0 = gen_reg_rtx (second_imode);
28404 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28407 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28409 ops[j] = gen_reg_rtx (third_imode);
28410 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28412 second_imode = V2DImode;
28413 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28417 op0 = gen_reg_rtx (second_imode);
28418 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28421 /* Cast the SECOND_IMODE vector back to a vector on original
28423 emit_insn (gen_rtx_SET (VOIDmode, target,
28424 gen_lowpart (mode, op0)));
28428 gcc_unreachable ();
28432 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28433 all values variable, and none identical. */
28436 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28437 rtx target, rtx vals)
28439 rtx ops[32], op0, op1;
28440 enum machine_mode half_mode = VOIDmode;
28447 if (!mmx_ok && !TARGET_SSE)
28459 n = GET_MODE_NUNITS (mode);
28460 for (i = 0; i < n; i++)
28461 ops[i] = XVECEXP (vals, 0, i);
28462 ix86_expand_vector_init_concat (mode, target, ops, n);
28466 half_mode = V16QImode;
28470 half_mode = V8HImode;
28474 n = GET_MODE_NUNITS (mode);
28475 for (i = 0; i < n; i++)
28476 ops[i] = XVECEXP (vals, 0, i);
28477 op0 = gen_reg_rtx (half_mode);
28478 op1 = gen_reg_rtx (half_mode);
28479 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28481 ix86_expand_vector_init_interleave (half_mode, op1,
28482 &ops [n >> 1], n >> 2);
28483 emit_insn (gen_rtx_SET (VOIDmode, target,
28484 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28488 if (!TARGET_SSE4_1)
28496 /* Don't use ix86_expand_vector_init_interleave if we can't
28497 move from GPR to SSE register directly. */
28498 if (!TARGET_INTER_UNIT_MOVES)
28501 n = GET_MODE_NUNITS (mode);
28502 for (i = 0; i < n; i++)
28503 ops[i] = XVECEXP (vals, 0, i);
28504 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28512 gcc_unreachable ();
28516 int i, j, n_elts, n_words, n_elt_per_word;
28517 enum machine_mode inner_mode;
28518 rtx words[4], shift;
28520 inner_mode = GET_MODE_INNER (mode);
28521 n_elts = GET_MODE_NUNITS (mode);
28522 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28523 n_elt_per_word = n_elts / n_words;
28524 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28526 for (i = 0; i < n_words; ++i)
28528 rtx word = NULL_RTX;
28530 for (j = 0; j < n_elt_per_word; ++j)
28532 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28533 elt = convert_modes (word_mode, inner_mode, elt, true);
28539 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28540 word, 1, OPTAB_LIB_WIDEN);
28541 word = expand_simple_binop (word_mode, IOR, word, elt,
28542 word, 1, OPTAB_LIB_WIDEN);
28550 emit_move_insn (target, gen_lowpart (mode, words[0]));
28551 else if (n_words == 2)
28553 rtx tmp = gen_reg_rtx (mode);
28554 emit_clobber (tmp);
28555 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28556 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28557 emit_move_insn (target, tmp);
28559 else if (n_words == 4)
28561 rtx tmp = gen_reg_rtx (V4SImode);
28562 gcc_assert (word_mode == SImode);
28563 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28564 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28565 emit_move_insn (target, gen_lowpart (mode, tmp));
28568 gcc_unreachable ();
28572 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28573 instructions unless MMX_OK is true. */
28576 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28578 enum machine_mode mode = GET_MODE (target);
28579 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28580 int n_elts = GET_MODE_NUNITS (mode);
28581 int n_var = 0, one_var = -1;
28582 bool all_same = true, all_const_zero = true;
28586 for (i = 0; i < n_elts; ++i)
28588 x = XVECEXP (vals, 0, i);
28589 if (!(CONST_INT_P (x)
28590 || GET_CODE (x) == CONST_DOUBLE
28591 || GET_CODE (x) == CONST_FIXED))
28592 n_var++, one_var = i;
28593 else if (x != CONST0_RTX (inner_mode))
28594 all_const_zero = false;
28595 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28599 /* Constants are best loaded from the constant pool. */
28602 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28606 /* If all values are identical, broadcast the value. */
28608 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28609 XVECEXP (vals, 0, 0)))
28612 /* Values where only one field is non-constant are best loaded from
28613 the pool and overwritten via move later. */
28617 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28618 XVECEXP (vals, 0, one_var),
28622 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28626 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28630 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28632 enum machine_mode mode = GET_MODE (target);
28633 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28634 enum machine_mode half_mode;
28635 bool use_vec_merge = false;
28637 static rtx (*gen_extract[6][2]) (rtx, rtx)
28639 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28640 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28641 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28642 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28643 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28644 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28646 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28648 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28649 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28650 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28651 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28652 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28653 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28663 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28664 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28666 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28668 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28669 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28675 use_vec_merge = TARGET_SSE4_1;
28683 /* For the two element vectors, we implement a VEC_CONCAT with
28684 the extraction of the other element. */
28686 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28687 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28690 op0 = val, op1 = tmp;
28692 op0 = tmp, op1 = val;
28694 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28695 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28700 use_vec_merge = TARGET_SSE4_1;
28707 use_vec_merge = true;
28711 /* tmp = target = A B C D */
28712 tmp = copy_to_reg (target);
28713 /* target = A A B B */
28714 emit_insn (gen_sse_unpcklps (target, target, target));
28715 /* target = X A B B */
28716 ix86_expand_vector_set (false, target, val, 0);
28717 /* target = A X C D */
28718 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28719 GEN_INT (1), GEN_INT (0),
28720 GEN_INT (2+4), GEN_INT (3+4)));
28724 /* tmp = target = A B C D */
28725 tmp = copy_to_reg (target);
28726 /* tmp = X B C D */
28727 ix86_expand_vector_set (false, tmp, val, 0);
28728 /* target = A B X D */
28729 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28730 GEN_INT (0), GEN_INT (1),
28731 GEN_INT (0+4), GEN_INT (3+4)));
28735 /* tmp = target = A B C D */
28736 tmp = copy_to_reg (target);
28737 /* tmp = X B C D */
28738 ix86_expand_vector_set (false, tmp, val, 0);
28739 /* target = A B X D */
28740 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28741 GEN_INT (0), GEN_INT (1),
28742 GEN_INT (2+4), GEN_INT (0+4)));
28746 gcc_unreachable ();
28751 use_vec_merge = TARGET_SSE4_1;
28755 /* Element 0 handled by vec_merge below. */
28758 use_vec_merge = true;
28764 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28765 store into element 0, then shuffle them back. */
28769 order[0] = GEN_INT (elt);
28770 order[1] = const1_rtx;
28771 order[2] = const2_rtx;
28772 order[3] = GEN_INT (3);
28773 order[elt] = const0_rtx;
28775 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28776 order[1], order[2], order[3]));
28778 ix86_expand_vector_set (false, target, val, 0);
28780 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28781 order[1], order[2], order[3]));
28785 /* For SSE1, we have to reuse the V4SF code. */
28786 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28787 gen_lowpart (SFmode, val), elt);
28792 use_vec_merge = TARGET_SSE2;
28795 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28799 use_vec_merge = TARGET_SSE4_1;
28806 half_mode = V16QImode;
28812 half_mode = V8HImode;
28818 half_mode = V4SImode;
28824 half_mode = V2DImode;
28830 half_mode = V4SFmode;
28836 half_mode = V2DFmode;
28842 /* Compute offset. */
28846 gcc_assert (i <= 1);
28848 /* Extract the half. */
28849 tmp = gen_reg_rtx (half_mode);
28850 emit_insn ((*gen_extract[j][i]) (tmp, target));
28852 /* Put val in tmp at elt. */
28853 ix86_expand_vector_set (false, tmp, val, elt);
28856 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28865 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28866 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28867 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28871 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28873 emit_move_insn (mem, target);
28875 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28876 emit_move_insn (tmp, val);
28878 emit_move_insn (target, mem);
28883 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28885 enum machine_mode mode = GET_MODE (vec);
28886 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28887 bool use_vec_extr = false;
28900 use_vec_extr = true;
28904 use_vec_extr = TARGET_SSE4_1;
28916 tmp = gen_reg_rtx (mode);
28917 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28918 GEN_INT (elt), GEN_INT (elt),
28919 GEN_INT (elt+4), GEN_INT (elt+4)));
28923 tmp = gen_reg_rtx (mode);
28924 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28928 gcc_unreachable ();
28931 use_vec_extr = true;
28936 use_vec_extr = TARGET_SSE4_1;
28950 tmp = gen_reg_rtx (mode);
28951 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28952 GEN_INT (elt), GEN_INT (elt),
28953 GEN_INT (elt), GEN_INT (elt)));
28957 tmp = gen_reg_rtx (mode);
28958 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28962 gcc_unreachable ();
28965 use_vec_extr = true;
28970 /* For SSE1, we have to reuse the V4SF code. */
28971 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28972 gen_lowpart (V4SFmode, vec), elt);
28978 use_vec_extr = TARGET_SSE2;
28981 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28985 use_vec_extr = TARGET_SSE4_1;
28989 /* ??? Could extract the appropriate HImode element and shift. */
28996 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28997 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28999 /* Let the rtl optimizers know about the zero extension performed. */
29000 if (inner_mode == QImode || inner_mode == HImode)
29002 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29003 target = gen_lowpart (SImode, target);
29006 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29010 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29012 emit_move_insn (mem, vec);
29014 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29015 emit_move_insn (target, tmp);
29019 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29020 pattern to reduce; DEST is the destination; IN is the input vector. */
29023 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29025 rtx tmp1, tmp2, tmp3;
29027 tmp1 = gen_reg_rtx (V4SFmode);
29028 tmp2 = gen_reg_rtx (V4SFmode);
29029 tmp3 = gen_reg_rtx (V4SFmode);
29031 emit_insn (gen_sse_movhlps (tmp1, in, in));
29032 emit_insn (fn (tmp2, tmp1, in));
29034 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29035 GEN_INT (1), GEN_INT (1),
29036 GEN_INT (1+4), GEN_INT (1+4)));
29037 emit_insn (fn (dest, tmp2, tmp3));
29040 /* Target hook for scalar_mode_supported_p. */
29042 ix86_scalar_mode_supported_p (enum machine_mode mode)
29044 if (DECIMAL_FLOAT_MODE_P (mode))
29046 else if (mode == TFmode)
29049 return default_scalar_mode_supported_p (mode);
29052 /* Implements target hook vector_mode_supported_p. */
29054 ix86_vector_mode_supported_p (enum machine_mode mode)
29056 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29058 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29060 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29062 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29064 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29069 /* Target hook for c_mode_for_suffix. */
29070 static enum machine_mode
29071 ix86_c_mode_for_suffix (char suffix)
29081 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29083 We do this in the new i386 backend to maintain source compatibility
29084 with the old cc0-based compiler. */
29087 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29088 tree inputs ATTRIBUTE_UNUSED,
29091 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29093 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29098 /* Implements target vector targetm.asm.encode_section_info. This
29099 is not used by netware. */
29101 static void ATTRIBUTE_UNUSED
29102 ix86_encode_section_info (tree decl, rtx rtl, int first)
29104 default_encode_section_info (decl, rtl, first);
29106 if (TREE_CODE (decl) == VAR_DECL
29107 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29108 && ix86_in_large_data_p (decl))
29109 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29112 /* Worker function for REVERSE_CONDITION. */
29115 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29117 return (mode != CCFPmode && mode != CCFPUmode
29118 ? reverse_condition (code)
29119 : reverse_condition_maybe_unordered (code));
29122 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29126 output_387_reg_move (rtx insn, rtx *operands)
29128 if (REG_P (operands[0]))
29130 if (REG_P (operands[1])
29131 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29133 if (REGNO (operands[0]) == FIRST_STACK_REG)
29134 return output_387_ffreep (operands, 0);
29135 return "fstp\t%y0";
29137 if (STACK_TOP_P (operands[0]))
29138 return "fld%Z1\t%y1";
29141 else if (MEM_P (operands[0]))
29143 gcc_assert (REG_P (operands[1]));
29144 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29145 return "fstp%Z0\t%y0";
29148 /* There is no non-popping store to memory for XFmode.
29149 So if we need one, follow the store with a load. */
29150 if (GET_MODE (operands[0]) == XFmode)
29151 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29153 return "fst%Z0\t%y0";
29160 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29161 FP status register is set. */
29164 ix86_emit_fp_unordered_jump (rtx label)
29166 rtx reg = gen_reg_rtx (HImode);
29169 emit_insn (gen_x86_fnstsw_1 (reg));
29171 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29173 emit_insn (gen_x86_sahf_1 (reg));
29175 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29176 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29180 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29182 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29183 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29186 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29187 gen_rtx_LABEL_REF (VOIDmode, label),
29189 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29191 emit_jump_insn (temp);
29192 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29195 /* Output code to perform a log1p XFmode calculation. */
29197 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29199 rtx label1 = gen_label_rtx ();
29200 rtx label2 = gen_label_rtx ();
29202 rtx tmp = gen_reg_rtx (XFmode);
29203 rtx tmp2 = gen_reg_rtx (XFmode);
29206 emit_insn (gen_absxf2 (tmp, op1));
29207 test = gen_rtx_GE (VOIDmode, tmp,
29208 CONST_DOUBLE_FROM_REAL_VALUE (
29209 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29211 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29213 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29214 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29215 emit_jump (label2);
29217 emit_label (label1);
29218 emit_move_insn (tmp, CONST1_RTX (XFmode));
29219 emit_insn (gen_addxf3 (tmp, op1, tmp));
29220 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29221 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29223 emit_label (label2);
29226 /* Output code to perform a Newton-Rhapson approximation of a single precision
29227 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29229 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29231 rtx x0, x1, e0, e1, two;
29233 x0 = gen_reg_rtx (mode);
29234 e0 = gen_reg_rtx (mode);
29235 e1 = gen_reg_rtx (mode);
29236 x1 = gen_reg_rtx (mode);
29238 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29240 if (VECTOR_MODE_P (mode))
29241 two = ix86_build_const_vector (SFmode, true, two);
29243 two = force_reg (mode, two);
29245 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29247 /* x0 = rcp(b) estimate */
29248 emit_insn (gen_rtx_SET (VOIDmode, x0,
29249 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29252 emit_insn (gen_rtx_SET (VOIDmode, e0,
29253 gen_rtx_MULT (mode, x0, b)));
29255 emit_insn (gen_rtx_SET (VOIDmode, e1,
29256 gen_rtx_MINUS (mode, two, e0)));
29258 emit_insn (gen_rtx_SET (VOIDmode, x1,
29259 gen_rtx_MULT (mode, x0, e1)));
29261 emit_insn (gen_rtx_SET (VOIDmode, res,
29262 gen_rtx_MULT (mode, a, x1)));
29265 /* Output code to perform a Newton-Rhapson approximation of a
29266 single precision floating point [reciprocal] square root. */
29268 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29271 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29274 x0 = gen_reg_rtx (mode);
29275 e0 = gen_reg_rtx (mode);
29276 e1 = gen_reg_rtx (mode);
29277 e2 = gen_reg_rtx (mode);
29278 e3 = gen_reg_rtx (mode);
29280 real_from_integer (&r, VOIDmode, -3, -1, 0);
29281 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29283 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29284 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29286 if (VECTOR_MODE_P (mode))
29288 mthree = ix86_build_const_vector (SFmode, true, mthree);
29289 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29292 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29293 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29295 /* x0 = rsqrt(a) estimate */
29296 emit_insn (gen_rtx_SET (VOIDmode, x0,
29297 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29300 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29305 zero = gen_reg_rtx (mode);
29306 mask = gen_reg_rtx (mode);
29308 zero = force_reg (mode, CONST0_RTX(mode));
29309 emit_insn (gen_rtx_SET (VOIDmode, mask,
29310 gen_rtx_NE (mode, zero, a)));
29312 emit_insn (gen_rtx_SET (VOIDmode, x0,
29313 gen_rtx_AND (mode, x0, mask)));
29317 emit_insn (gen_rtx_SET (VOIDmode, e0,
29318 gen_rtx_MULT (mode, x0, a)));
29320 emit_insn (gen_rtx_SET (VOIDmode, e1,
29321 gen_rtx_MULT (mode, e0, x0)));
29324 mthree = force_reg (mode, mthree);
29325 emit_insn (gen_rtx_SET (VOIDmode, e2,
29326 gen_rtx_PLUS (mode, e1, mthree)));
29328 mhalf = force_reg (mode, mhalf);
29330 /* e3 = -.5 * x0 */
29331 emit_insn (gen_rtx_SET (VOIDmode, e3,
29332 gen_rtx_MULT (mode, x0, mhalf)));
29334 /* e3 = -.5 * e0 */
29335 emit_insn (gen_rtx_SET (VOIDmode, e3,
29336 gen_rtx_MULT (mode, e0, mhalf)));
29337 /* ret = e2 * e3 */
29338 emit_insn (gen_rtx_SET (VOIDmode, res,
29339 gen_rtx_MULT (mode, e2, e3)));
29342 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29344 static void ATTRIBUTE_UNUSED
29345 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29348 /* With Binutils 2.15, the "@unwind" marker must be specified on
29349 every occurrence of the ".eh_frame" section, not just the first
29352 && strcmp (name, ".eh_frame") == 0)
29354 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29355 flags & SECTION_WRITE ? "aw" : "a");
29358 default_elf_asm_named_section (name, flags, decl);
29361 /* Return the mangling of TYPE if it is an extended fundamental type. */
29363 static const char *
29364 ix86_mangle_type (const_tree type)
29366 type = TYPE_MAIN_VARIANT (type);
29368 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29369 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29372 switch (TYPE_MODE (type))
29375 /* __float128 is "g". */
29378 /* "long double" or __float80 is "e". */
29385 /* For 32-bit code we can save PIC register setup by using
29386 __stack_chk_fail_local hidden function instead of calling
29387 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29388 register, so it is better to call __stack_chk_fail directly. */
29391 ix86_stack_protect_fail (void)
29393 return TARGET_64BIT
29394 ? default_external_stack_protect_fail ()
29395 : default_hidden_stack_protect_fail ();
29398 /* Select a format to encode pointers in exception handling data. CODE
29399 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29400 true if the symbol may be affected by dynamic relocations.
29402 ??? All x86 object file formats are capable of representing this.
29403 After all, the relocation needed is the same as for the call insn.
29404 Whether or not a particular assembler allows us to enter such, I
29405 guess we'll have to see. */
29407 asm_preferred_eh_data_format (int code, int global)
29411 int type = DW_EH_PE_sdata8;
29413 || ix86_cmodel == CM_SMALL_PIC
29414 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29415 type = DW_EH_PE_sdata4;
29416 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29418 if (ix86_cmodel == CM_SMALL
29419 || (ix86_cmodel == CM_MEDIUM && code))
29420 return DW_EH_PE_udata4;
29421 return DW_EH_PE_absptr;
29424 /* Expand copysign from SIGN to the positive value ABS_VALUE
29425 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29428 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29430 enum machine_mode mode = GET_MODE (sign);
29431 rtx sgn = gen_reg_rtx (mode);
29432 if (mask == NULL_RTX)
29434 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29435 if (!VECTOR_MODE_P (mode))
29437 /* We need to generate a scalar mode mask in this case. */
29438 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29439 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29440 mask = gen_reg_rtx (mode);
29441 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29445 mask = gen_rtx_NOT (mode, mask);
29446 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29447 gen_rtx_AND (mode, mask, sign)));
29448 emit_insn (gen_rtx_SET (VOIDmode, result,
29449 gen_rtx_IOR (mode, abs_value, sgn)));
29452 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29453 mask for masking out the sign-bit is stored in *SMASK, if that is
29456 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29458 enum machine_mode mode = GET_MODE (op0);
29461 xa = gen_reg_rtx (mode);
29462 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29463 if (!VECTOR_MODE_P (mode))
29465 /* We need to generate a scalar mode mask in this case. */
29466 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29467 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29468 mask = gen_reg_rtx (mode);
29469 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29471 emit_insn (gen_rtx_SET (VOIDmode, xa,
29472 gen_rtx_AND (mode, op0, mask)));
29480 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29481 swapping the operands if SWAP_OPERANDS is true. The expanded
29482 code is a forward jump to a newly created label in case the
29483 comparison is true. The generated label rtx is returned. */
29485 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29486 bool swap_operands)
29497 label = gen_label_rtx ();
29498 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29499 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29500 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29501 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29502 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29503 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29504 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29505 JUMP_LABEL (tmp) = label;
29510 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29511 using comparison code CODE. Operands are swapped for the comparison if
29512 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29514 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29515 bool swap_operands)
29517 enum machine_mode mode = GET_MODE (op0);
29518 rtx mask = gen_reg_rtx (mode);
29527 if (mode == DFmode)
29528 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29529 gen_rtx_fmt_ee (code, mode, op0, op1)));
29531 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29532 gen_rtx_fmt_ee (code, mode, op0, op1)));
29537 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29538 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29540 ix86_gen_TWO52 (enum machine_mode mode)
29542 REAL_VALUE_TYPE TWO52r;
29545 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29546 TWO52 = const_double_from_real_value (TWO52r, mode);
29547 TWO52 = force_reg (mode, TWO52);
29552 /* Expand SSE sequence for computing lround from OP1 storing
29555 ix86_expand_lround (rtx op0, rtx op1)
29557 /* C code for the stuff we're doing below:
29558 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29561 enum machine_mode mode = GET_MODE (op1);
29562 const struct real_format *fmt;
29563 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29566 /* load nextafter (0.5, 0.0) */
29567 fmt = REAL_MODE_FORMAT (mode);
29568 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29569 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29571 /* adj = copysign (0.5, op1) */
29572 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29573 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29575 /* adj = op1 + adj */
29576 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29578 /* op0 = (imode)adj */
29579 expand_fix (op0, adj, 0);
29582 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29585 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29587 /* C code for the stuff we're doing below (for do_floor):
29589 xi -= (double)xi > op1 ? 1 : 0;
29592 enum machine_mode fmode = GET_MODE (op1);
29593 enum machine_mode imode = GET_MODE (op0);
29594 rtx ireg, freg, label, tmp;
29596 /* reg = (long)op1 */
29597 ireg = gen_reg_rtx (imode);
29598 expand_fix (ireg, op1, 0);
29600 /* freg = (double)reg */
29601 freg = gen_reg_rtx (fmode);
29602 expand_float (freg, ireg, 0);
29604 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29605 label = ix86_expand_sse_compare_and_jump (UNLE,
29606 freg, op1, !do_floor);
29607 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29608 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29609 emit_move_insn (ireg, tmp);
29611 emit_label (label);
29612 LABEL_NUSES (label) = 1;
29614 emit_move_insn (op0, ireg);
29617 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29618 result in OPERAND0. */
29620 ix86_expand_rint (rtx operand0, rtx operand1)
29622 /* C code for the stuff we're doing below:
29623 xa = fabs (operand1);
29624 if (!isless (xa, 2**52))
29626 xa = xa + 2**52 - 2**52;
29627 return copysign (xa, operand1);
29629 enum machine_mode mode = GET_MODE (operand0);
29630 rtx res, xa, label, TWO52, mask;
29632 res = gen_reg_rtx (mode);
29633 emit_move_insn (res, operand1);
29635 /* xa = abs (operand1) */
29636 xa = ix86_expand_sse_fabs (res, &mask);
29638 /* if (!isless (xa, TWO52)) goto label; */
29639 TWO52 = ix86_gen_TWO52 (mode);
29640 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29642 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29643 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29645 ix86_sse_copysign_to_positive (res, xa, res, mask);
29647 emit_label (label);
29648 LABEL_NUSES (label) = 1;
29650 emit_move_insn (operand0, res);
29653 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29656 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29658 /* C code for the stuff we expand below.
29659 double xa = fabs (x), x2;
29660 if (!isless (xa, TWO52))
29662 xa = xa + TWO52 - TWO52;
29663 x2 = copysign (xa, x);
29672 enum machine_mode mode = GET_MODE (operand0);
29673 rtx xa, TWO52, tmp, label, one, res, mask;
29675 TWO52 = ix86_gen_TWO52 (mode);
29677 /* Temporary for holding the result, initialized to the input
29678 operand to ease control flow. */
29679 res = gen_reg_rtx (mode);
29680 emit_move_insn (res, operand1);
29682 /* xa = abs (operand1) */
29683 xa = ix86_expand_sse_fabs (res, &mask);
29685 /* if (!isless (xa, TWO52)) goto label; */
29686 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29688 /* xa = xa + TWO52 - TWO52; */
29689 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29690 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29692 /* xa = copysign (xa, operand1) */
29693 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29695 /* generate 1.0 or -1.0 */
29696 one = force_reg (mode,
29697 const_double_from_real_value (do_floor
29698 ? dconst1 : dconstm1, mode));
29700 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29701 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29702 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29703 gen_rtx_AND (mode, one, tmp)));
29704 /* We always need to subtract here to preserve signed zero. */
29705 tmp = expand_simple_binop (mode, MINUS,
29706 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29707 emit_move_insn (res, tmp);
29709 emit_label (label);
29710 LABEL_NUSES (label) = 1;
29712 emit_move_insn (operand0, res);
29715 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29718 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29720 /* C code for the stuff we expand below.
29721 double xa = fabs (x), x2;
29722 if (!isless (xa, TWO52))
29724 x2 = (double)(long)x;
29731 if (HONOR_SIGNED_ZEROS (mode))
29732 return copysign (x2, x);
29735 enum machine_mode mode = GET_MODE (operand0);
29736 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29738 TWO52 = ix86_gen_TWO52 (mode);
29740 /* Temporary for holding the result, initialized to the input
29741 operand to ease control flow. */
29742 res = gen_reg_rtx (mode);
29743 emit_move_insn (res, operand1);
29745 /* xa = abs (operand1) */
29746 xa = ix86_expand_sse_fabs (res, &mask);
29748 /* if (!isless (xa, TWO52)) goto label; */
29749 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29751 /* xa = (double)(long)x */
29752 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29753 expand_fix (xi, res, 0);
29754 expand_float (xa, xi, 0);
29757 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29759 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29760 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29761 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29762 gen_rtx_AND (mode, one, tmp)));
29763 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29764 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29765 emit_move_insn (res, tmp);
29767 if (HONOR_SIGNED_ZEROS (mode))
29768 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29770 emit_label (label);
29771 LABEL_NUSES (label) = 1;
29773 emit_move_insn (operand0, res);
29776 /* Expand SSE sequence for computing round from OPERAND1 storing
29777 into OPERAND0. Sequence that works without relying on DImode truncation
29778 via cvttsd2siq that is only available on 64bit targets. */
29780 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29782 /* C code for the stuff we expand below.
29783 double xa = fabs (x), xa2, x2;
29784 if (!isless (xa, TWO52))
29786 Using the absolute value and copying back sign makes
29787 -0.0 -> -0.0 correct.
29788 xa2 = xa + TWO52 - TWO52;
29793 else if (dxa > 0.5)
29795 x2 = copysign (xa2, x);
29798 enum machine_mode mode = GET_MODE (operand0);
29799 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29801 TWO52 = ix86_gen_TWO52 (mode);
29803 /* Temporary for holding the result, initialized to the input
29804 operand to ease control flow. */
29805 res = gen_reg_rtx (mode);
29806 emit_move_insn (res, operand1);
29808 /* xa = abs (operand1) */
29809 xa = ix86_expand_sse_fabs (res, &mask);
29811 /* if (!isless (xa, TWO52)) goto label; */
29812 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29814 /* xa2 = xa + TWO52 - TWO52; */
29815 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29816 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29818 /* dxa = xa2 - xa; */
29819 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29821 /* generate 0.5, 1.0 and -0.5 */
29822 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29823 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29824 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29828 tmp = gen_reg_rtx (mode);
29829 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29830 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29831 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29832 gen_rtx_AND (mode, one, tmp)));
29833 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29834 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29835 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29836 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29837 gen_rtx_AND (mode, one, tmp)));
29838 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29840 /* res = copysign (xa2, operand1) */
29841 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29843 emit_label (label);
29844 LABEL_NUSES (label) = 1;
29846 emit_move_insn (operand0, res);
29849 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29852 ix86_expand_trunc (rtx operand0, rtx operand1)
29854 /* C code for SSE variant we expand below.
29855 double xa = fabs (x), x2;
29856 if (!isless (xa, TWO52))
29858 x2 = (double)(long)x;
29859 if (HONOR_SIGNED_ZEROS (mode))
29860 return copysign (x2, x);
29863 enum machine_mode mode = GET_MODE (operand0);
29864 rtx xa, xi, TWO52, label, res, mask;
29866 TWO52 = ix86_gen_TWO52 (mode);
29868 /* Temporary for holding the result, initialized to the input
29869 operand to ease control flow. */
29870 res = gen_reg_rtx (mode);
29871 emit_move_insn (res, operand1);
29873 /* xa = abs (operand1) */
29874 xa = ix86_expand_sse_fabs (res, &mask);
29876 /* if (!isless (xa, TWO52)) goto label; */
29877 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29879 /* x = (double)(long)x */
29880 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29881 expand_fix (xi, res, 0);
29882 expand_float (res, xi, 0);
29884 if (HONOR_SIGNED_ZEROS (mode))
29885 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29887 emit_label (label);
29888 LABEL_NUSES (label) = 1;
29890 emit_move_insn (operand0, res);
29893 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29896 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29898 enum machine_mode mode = GET_MODE (operand0);
29899 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29901 /* C code for SSE variant we expand below.
29902 double xa = fabs (x), x2;
29903 if (!isless (xa, TWO52))
29905 xa2 = xa + TWO52 - TWO52;
29909 x2 = copysign (xa2, x);
29913 TWO52 = ix86_gen_TWO52 (mode);
29915 /* Temporary for holding the result, initialized to the input
29916 operand to ease control flow. */
29917 res = gen_reg_rtx (mode);
29918 emit_move_insn (res, operand1);
29920 /* xa = abs (operand1) */
29921 xa = ix86_expand_sse_fabs (res, &smask);
29923 /* if (!isless (xa, TWO52)) goto label; */
29924 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29926 /* res = xa + TWO52 - TWO52; */
29927 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29928 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29929 emit_move_insn (res, tmp);
29932 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29934 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29935 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29936 emit_insn (gen_rtx_SET (VOIDmode, mask,
29937 gen_rtx_AND (mode, mask, one)));
29938 tmp = expand_simple_binop (mode, MINUS,
29939 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29940 emit_move_insn (res, tmp);
29942 /* res = copysign (res, operand1) */
29943 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29945 emit_label (label);
29946 LABEL_NUSES (label) = 1;
29948 emit_move_insn (operand0, res);
29951 /* Expand SSE sequence for computing round from OPERAND1 storing
29954 ix86_expand_round (rtx operand0, rtx operand1)
29956 /* C code for the stuff we're doing below:
29957 double xa = fabs (x);
29958 if (!isless (xa, TWO52))
29960 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29961 return copysign (xa, x);
29963 enum machine_mode mode = GET_MODE (operand0);
29964 rtx res, TWO52, xa, label, xi, half, mask;
29965 const struct real_format *fmt;
29966 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29968 /* Temporary for holding the result, initialized to the input
29969 operand to ease control flow. */
29970 res = gen_reg_rtx (mode);
29971 emit_move_insn (res, operand1);
29973 TWO52 = ix86_gen_TWO52 (mode);
29974 xa = ix86_expand_sse_fabs (res, &mask);
29975 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29977 /* load nextafter (0.5, 0.0) */
29978 fmt = REAL_MODE_FORMAT (mode);
29979 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29980 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29982 /* xa = xa + 0.5 */
29983 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29984 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29986 /* xa = (double)(int64_t)xa */
29987 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29988 expand_fix (xi, xa, 0);
29989 expand_float (xa, xi, 0);
29991 /* res = copysign (xa, operand1) */
29992 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29994 emit_label (label);
29995 LABEL_NUSES (label) = 1;
29997 emit_move_insn (operand0, res);
30001 /* Validate whether a SSE5 instruction is valid or not.
30002 OPERANDS is the array of operands.
30003 NUM is the number of operands.
30004 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
30005 NUM_MEMORY is the maximum number of memory operands to accept.
30006 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
30009 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
30010 bool uses_oc0, int num_memory, bool commutative)
30016 /* Count the number of memory arguments */
30019 for (i = 0; i < num; i++)
30021 enum machine_mode mode = GET_MODE (operands[i]);
30022 if (register_operand (operands[i], mode))
30025 else if (memory_operand (operands[i], mode))
30027 mem_mask |= (1 << i);
30033 rtx pattern = PATTERN (insn);
30035 /* allow 0 for pcmov */
30036 if (GET_CODE (pattern) != SET
30037 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
30039 || operands[i] != CONST0_RTX (mode))
30044 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
30045 a memory operation. */
30046 if (num_memory < 0)
30048 num_memory = -num_memory;
30049 if ((mem_mask & (1 << (num-1))) != 0)
30051 mem_mask &= ~(1 << (num-1));
30056 /* If there were no memory operations, allow the insn */
30060 /* Do not allow the destination register to be a memory operand. */
30061 else if (mem_mask & (1 << 0))
30064 /* If there are too many memory operations, disallow the instruction. While
30065 the hardware only allows 1 memory reference, before register allocation
30066 for some insns, we allow two memory operations sometimes in order to allow
30067 code like the following to be optimized:
30069 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
30071 or similar cases that are vectorized into using the fmaddss
30073 else if (mem_count > num_memory)
30076 /* Don't allow more than one memory operation if not optimizing. */
30077 else if (mem_count > 1 && !optimize)
30080 else if (num == 4 && mem_count == 1)
30082 /* formats (destination is the first argument), example fmaddss:
30083 xmm1, xmm1, xmm2, xmm3/mem
30084 xmm1, xmm1, xmm2/mem, xmm3
30085 xmm1, xmm2, xmm3/mem, xmm1
30086 xmm1, xmm2/mem, xmm3, xmm1 */
30088 return ((mem_mask == (1 << 1))
30089 || (mem_mask == (1 << 2))
30090 || (mem_mask == (1 << 3)));
30092 /* format, example pmacsdd:
30093 xmm1, xmm2, xmm3/mem, xmm1 */
30095 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
30097 return (mem_mask == (1 << 2));
30100 else if (num == 4 && num_memory == 2)
30102 /* If there are two memory operations, we can load one of the memory ops
30103 into the destination register. This is for optimizing the
30104 multiply/add ops, which the combiner has optimized both the multiply
30105 and the add insns to have a memory operation. We have to be careful
30106 that the destination doesn't overlap with the inputs. */
30107 rtx op0 = operands[0];
30109 if (reg_mentioned_p (op0, operands[1])
30110 || reg_mentioned_p (op0, operands[2])
30111 || reg_mentioned_p (op0, operands[3]))
30114 /* formats (destination is the first argument), example fmaddss:
30115 xmm1, xmm1, xmm2, xmm3/mem
30116 xmm1, xmm1, xmm2/mem, xmm3
30117 xmm1, xmm2, xmm3/mem, xmm1
30118 xmm1, xmm2/mem, xmm3, xmm1
30120 For the oc0 case, we will load either operands[1] or operands[3] into
30121 operands[0], so any combination of 2 memory operands is ok. */
30125 /* format, example pmacsdd:
30126 xmm1, xmm2, xmm3/mem, xmm1
30128 For the integer multiply/add instructions be more restrictive and
30129 require operands[2] and operands[3] to be the memory operands. */
30131 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
30133 return (mem_mask == ((1 << 2) | (1 << 3)));
30136 else if (num == 3 && num_memory == 1)
30138 /* formats, example protb:
30139 xmm1, xmm2, xmm3/mem
30140 xmm1, xmm2/mem, xmm3 */
30142 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
30144 /* format, example comeq:
30145 xmm1, xmm2, xmm3/mem */
30147 return (mem_mask == (1 << 2));
30151 gcc_unreachable ();
30157 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
30158 hardware will allow by using the destination register to load one of the
30159 memory operations. Presently this is used by the multiply/add routines to
30160 allow 2 memory references. */
30163 ix86_expand_sse5_multiple_memory (rtx operands[],
30165 enum machine_mode mode)
30167 rtx op0 = operands[0];
30169 || memory_operand (op0, mode)
30170 || reg_mentioned_p (op0, operands[1])
30171 || reg_mentioned_p (op0, operands[2])
30172 || reg_mentioned_p (op0, operands[3]))
30173 gcc_unreachable ();
30175 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
30176 the destination register. */
30177 if (memory_operand (operands[1], mode))
30179 emit_move_insn (op0, operands[1]);
30182 else if (memory_operand (operands[3], mode))
30184 emit_move_insn (op0, operands[3]);
30188 gcc_unreachable ();
30194 /* Table of valid machine attributes. */
30195 static const struct attribute_spec ix86_attribute_table[] =
30197 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30198 /* Stdcall attribute says callee is responsible for popping arguments
30199 if they are not variable. */
30200 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30201 /* Fastcall attribute says callee is responsible for popping arguments
30202 if they are not variable. */
30203 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30204 /* Cdecl attribute says the callee is a normal C declaration */
30205 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30206 /* Regparm attribute specifies how many integer arguments are to be
30207 passed in registers. */
30208 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30209 /* Sseregparm attribute says we are using x86_64 calling conventions
30210 for FP arguments. */
30211 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30212 /* force_align_arg_pointer says this function realigns the stack at entry. */
30213 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30214 false, true, true, ix86_handle_cconv_attribute },
30215 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30216 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30217 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30218 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30220 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30221 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30222 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30223 SUBTARGET_ATTRIBUTE_TABLE,
30225 /* ms_abi and sysv_abi calling convention function attributes. */
30226 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30227 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30229 { NULL, 0, 0, false, false, false, NULL }
30232 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30234 x86_builtin_vectorization_cost (bool runtime_test)
30236 /* If the branch of the runtime test is taken - i.e. - the vectorized
30237 version is skipped - this incurs a misprediction cost (because the
30238 vectorized version is expected to be the fall-through). So we subtract
30239 the latency of a mispredicted branch from the costs that are incured
30240 when the vectorized version is executed.
30242 TODO: The values in individual target tables have to be tuned or new
30243 fields may be needed. For eg. on K8, the default branch path is the
30244 not-taken path. If the taken path is predicted correctly, the minimum
30245 penalty of going down the taken-path is 1 cycle. If the taken-path is
30246 not predicted correctly, then the minimum penalty is 10 cycles. */
30250 return (-(ix86_cost->cond_taken_branch_cost));
30256 /* This function returns the calling abi specific va_list type node.
30257 It returns the FNDECL specific va_list type. */
30260 ix86_fn_abi_va_list (tree fndecl)
30263 return va_list_type_node;
30264 gcc_assert (fndecl != NULL_TREE);
30266 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30267 return ms_va_list_type_node;
30269 return sysv_va_list_type_node;
30272 /* Returns the canonical va_list type specified by TYPE. If there
30273 is no valid TYPE provided, it return NULL_TREE. */
30276 ix86_canonical_va_list_type (tree type)
30280 /* Resolve references and pointers to va_list type. */
30281 if (INDIRECT_REF_P (type))
30282 type = TREE_TYPE (type);
30283 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30284 type = TREE_TYPE (type);
30288 wtype = va_list_type_node;
30289 gcc_assert (wtype != NULL_TREE);
30291 if (TREE_CODE (wtype) == ARRAY_TYPE)
30293 /* If va_list is an array type, the argument may have decayed
30294 to a pointer type, e.g. by being passed to another function.
30295 In that case, unwrap both types so that we can compare the
30296 underlying records. */
30297 if (TREE_CODE (htype) == ARRAY_TYPE
30298 || POINTER_TYPE_P (htype))
30300 wtype = TREE_TYPE (wtype);
30301 htype = TREE_TYPE (htype);
30304 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30305 return va_list_type_node;
30306 wtype = sysv_va_list_type_node;
30307 gcc_assert (wtype != NULL_TREE);
30309 if (TREE_CODE (wtype) == ARRAY_TYPE)
30311 /* If va_list is an array type, the argument may have decayed
30312 to a pointer type, e.g. by being passed to another function.
30313 In that case, unwrap both types so that we can compare the
30314 underlying records. */
30315 if (TREE_CODE (htype) == ARRAY_TYPE
30316 || POINTER_TYPE_P (htype))
30318 wtype = TREE_TYPE (wtype);
30319 htype = TREE_TYPE (htype);
30322 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30323 return sysv_va_list_type_node;
30324 wtype = ms_va_list_type_node;
30325 gcc_assert (wtype != NULL_TREE);
30327 if (TREE_CODE (wtype) == ARRAY_TYPE)
30329 /* If va_list is an array type, the argument may have decayed
30330 to a pointer type, e.g. by being passed to another function.
30331 In that case, unwrap both types so that we can compare the
30332 underlying records. */
30333 if (TREE_CODE (htype) == ARRAY_TYPE
30334 || POINTER_TYPE_P (htype))
30336 wtype = TREE_TYPE (wtype);
30337 htype = TREE_TYPE (htype);
30340 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30341 return ms_va_list_type_node;
30344 return std_canonical_va_list_type (type);
30347 /* Iterate through the target-specific builtin types for va_list.
30348 IDX denotes the iterator, *PTREE is set to the result type of
30349 the va_list builtin, and *PNAME to its internal type.
30350 Returns zero if there is no element for this index, otherwise
30351 IDX should be increased upon the next call.
30352 Note, do not iterate a base builtin's name like __builtin_va_list.
30353 Used from c_common_nodes_and_builtins. */
30356 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30362 *ptree = ms_va_list_type_node;
30363 *pname = "__builtin_ms_va_list";
30366 *ptree = sysv_va_list_type_node;
30367 *pname = "__builtin_sysv_va_list";
30375 /* Initialize the GCC target structure. */
30376 #undef TARGET_RETURN_IN_MEMORY
30377 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30379 #undef TARGET_LEGITIMIZE_ADDRESS
30380 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30382 #undef TARGET_ATTRIBUTE_TABLE
30383 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30384 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30385 # undef TARGET_MERGE_DECL_ATTRIBUTES
30386 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30389 #undef TARGET_COMP_TYPE_ATTRIBUTES
30390 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30392 #undef TARGET_INIT_BUILTINS
30393 #define TARGET_INIT_BUILTINS ix86_init_builtins
30394 #undef TARGET_EXPAND_BUILTIN
30395 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30397 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30398 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30399 ix86_builtin_vectorized_function
30401 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30402 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30404 #undef TARGET_BUILTIN_RECIPROCAL
30405 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30407 #undef TARGET_ASM_FUNCTION_EPILOGUE
30408 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30410 #undef TARGET_ENCODE_SECTION_INFO
30411 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30412 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30414 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30417 #undef TARGET_ASM_OPEN_PAREN
30418 #define TARGET_ASM_OPEN_PAREN ""
30419 #undef TARGET_ASM_CLOSE_PAREN
30420 #define TARGET_ASM_CLOSE_PAREN ""
30422 #undef TARGET_ASM_ALIGNED_HI_OP
30423 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30424 #undef TARGET_ASM_ALIGNED_SI_OP
30425 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30427 #undef TARGET_ASM_ALIGNED_DI_OP
30428 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30431 #undef TARGET_ASM_UNALIGNED_HI_OP
30432 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30433 #undef TARGET_ASM_UNALIGNED_SI_OP
30434 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30435 #undef TARGET_ASM_UNALIGNED_DI_OP
30436 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30438 #undef TARGET_SCHED_ADJUST_COST
30439 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30440 #undef TARGET_SCHED_ISSUE_RATE
30441 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30442 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30443 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30444 ia32_multipass_dfa_lookahead
30446 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30447 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30450 #undef TARGET_HAVE_TLS
30451 #define TARGET_HAVE_TLS true
30453 #undef TARGET_CANNOT_FORCE_CONST_MEM
30454 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30455 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30456 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30458 #undef TARGET_DELEGITIMIZE_ADDRESS
30459 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30461 #undef TARGET_MS_BITFIELD_LAYOUT_P
30462 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30465 #undef TARGET_BINDS_LOCAL_P
30466 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30468 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30469 #undef TARGET_BINDS_LOCAL_P
30470 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30473 #undef TARGET_ASM_OUTPUT_MI_THUNK
30474 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30475 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30476 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30478 #undef TARGET_ASM_FILE_START
30479 #define TARGET_ASM_FILE_START x86_file_start
30481 #undef TARGET_DEFAULT_TARGET_FLAGS
30482 #define TARGET_DEFAULT_TARGET_FLAGS \
30484 | TARGET_SUBTARGET_DEFAULT \
30485 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30487 #undef TARGET_HANDLE_OPTION
30488 #define TARGET_HANDLE_OPTION ix86_handle_option
30490 #undef TARGET_RTX_COSTS
30491 #define TARGET_RTX_COSTS ix86_rtx_costs
30492 #undef TARGET_ADDRESS_COST
30493 #define TARGET_ADDRESS_COST ix86_address_cost
30495 #undef TARGET_FIXED_CONDITION_CODE_REGS
30496 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30497 #undef TARGET_CC_MODES_COMPATIBLE
30498 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30500 #undef TARGET_MACHINE_DEPENDENT_REORG
30501 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30503 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30504 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30506 #undef TARGET_BUILD_BUILTIN_VA_LIST
30507 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30509 #undef TARGET_FN_ABI_VA_LIST
30510 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30512 #undef TARGET_CANONICAL_VA_LIST_TYPE
30513 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30515 #undef TARGET_EXPAND_BUILTIN_VA_START
30516 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30518 #undef TARGET_MD_ASM_CLOBBERS
30519 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30521 #undef TARGET_PROMOTE_PROTOTYPES
30522 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30523 #undef TARGET_STRUCT_VALUE_RTX
30524 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30525 #undef TARGET_SETUP_INCOMING_VARARGS
30526 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30527 #undef TARGET_MUST_PASS_IN_STACK
30528 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30529 #undef TARGET_PASS_BY_REFERENCE
30530 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30531 #undef TARGET_INTERNAL_ARG_POINTER
30532 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30533 #undef TARGET_UPDATE_STACK_BOUNDARY
30534 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30535 #undef TARGET_GET_DRAP_RTX
30536 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30537 #undef TARGET_STRICT_ARGUMENT_NAMING
30538 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30540 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30541 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30543 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30544 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30546 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30547 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30549 #undef TARGET_C_MODE_FOR_SUFFIX
30550 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30553 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30554 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30557 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30558 #undef TARGET_INSERT_ATTRIBUTES
30559 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30562 #undef TARGET_MANGLE_TYPE
30563 #define TARGET_MANGLE_TYPE ix86_mangle_type
30565 #undef TARGET_STACK_PROTECT_FAIL
30566 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30568 #undef TARGET_FUNCTION_VALUE
30569 #define TARGET_FUNCTION_VALUE ix86_function_value
30571 #undef TARGET_SECONDARY_RELOAD
30572 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30574 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30575 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30577 #undef TARGET_SET_CURRENT_FUNCTION
30578 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30580 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30581 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30583 #undef TARGET_OPTION_SAVE
30584 #define TARGET_OPTION_SAVE ix86_function_specific_save
30586 #undef TARGET_OPTION_RESTORE
30587 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30589 #undef TARGET_OPTION_PRINT
30590 #define TARGET_OPTION_PRINT ix86_function_specific_print
30592 #undef TARGET_OPTION_CAN_INLINE_P
30593 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30595 #undef TARGET_EXPAND_TO_RTL_HOOK
30596 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30598 #undef TARGET_LEGITIMATE_ADDRESS_P
30599 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30601 #undef TARGET_FRAME_POINTER_REQUIRED
30602 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
30604 struct gcc_target targetm = TARGET_INITIALIZER;
30606 #include "gt-i386.h"