1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned char ix86_tune_features[X86_TUNE_LAST];
1215 /* Feature tests against the various tunings used to create ix86_tune_features
1216 based on the processor mask. */
1217 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1218 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1219 negatively, so enabling for Generic64 seems like good code size
1220 tradeoff. We can't enable it for 32bit generic because it does not
1221 work well with PPro base chips. */
1222 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1224 /* X86_TUNE_PUSH_MEMORY */
1225 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1226 | m_NOCONA | m_CORE2 | m_GENERIC,
1228 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1231 /* X86_TUNE_USE_BIT_TEST */
1234 /* X86_TUNE_UNROLL_STRLEN */
1235 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1237 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1238 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1240 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1241 on simulation result. But after P4 was made, no performance benefit
1242 was observed with branch hints. It also increases the code size.
1243 As a result, icc never generates branch hints. */
1246 /* X86_TUNE_DOUBLE_WITH_ADD */
1249 /* X86_TUNE_USE_SAHF */
1250 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC,
1253 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1254 partial dependencies. */
1255 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1256 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1258 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1259 register stalls on Generic32 compilation setting as well. However
1260 in current implementation the partial register stalls are not eliminated
1261 very well - they can be introduced via subregs synthesized by combine
1262 and can happen in caller/callee saving sequences. Because this option
1263 pays back little on PPro based chips and is in conflict with partial reg
1264 dependencies used by Athlon/P4 based chips, it is better to leave it off
1265 for generic32 for now. */
1268 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1269 m_CORE2 | m_GENERIC,
1271 /* X86_TUNE_USE_HIMODE_FIOP */
1272 m_386 | m_486 | m_K6_GEODE,
1274 /* X86_TUNE_USE_SIMODE_FIOP */
1275 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1277 /* X86_TUNE_USE_MOV0 */
1280 /* X86_TUNE_USE_CLTD */
1281 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1283 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1286 /* X86_TUNE_SPLIT_LONG_MOVES */
1289 /* X86_TUNE_READ_MODIFY_WRITE */
1292 /* X86_TUNE_READ_MODIFY */
1295 /* X86_TUNE_PROMOTE_QIMODE */
1296 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1297 | m_GENERIC /* | m_PENT4 ? */,
1299 /* X86_TUNE_FAST_PREFIX */
1300 ~(m_PENT | m_486 | m_386),
1302 /* X86_TUNE_SINGLE_STRINGOP */
1303 m_386 | m_PENT4 | m_NOCONA,
1305 /* X86_TUNE_QIMODE_MATH */
1308 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1309 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1310 might be considered for Generic32 if our scheme for avoiding partial
1311 stalls was more effective. */
1314 /* X86_TUNE_PROMOTE_QI_REGS */
1317 /* X86_TUNE_PROMOTE_HI_REGS */
1320 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1321 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_ADD_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1325 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_SUB_ESP_4 */
1328 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_SUB_ESP_8 */
1331 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1332 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1334 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1335 for DFmode copies */
1336 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1337 | m_GENERIC | m_GEODE),
1339 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1340 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1342 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1343 conflict here in between PPro/Pentium4 based chips that thread 128bit
1344 SSE registers as single units versus K8 based chips that divide SSE
1345 registers to two 64bit halves. This knob promotes all store destinations
1346 to be 128bit to allow register renaming on 128bit SSE units, but usually
1347 results in one extra microop on 64bit SSE units. Experimental results
1348 shows that disabling this option on P4 brings over 20% SPECfp regression,
1349 while enabling it on K8 brings roughly 2.4% regression that can be partly
1350 masked by careful scheduling of moves. */
1351 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1353 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1356 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1357 are resolved on SSE register parts instead of whole registers, so we may
1358 maintain just lower part of scalar values in proper format leaving the
1359 upper part undefined. */
1362 /* X86_TUNE_SSE_TYPELESS_STORES */
1365 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1366 m_PPRO | m_PENT4 | m_NOCONA,
1368 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1369 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1371 /* X86_TUNE_PROLOGUE_USING_MOVE */
1372 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1374 /* X86_TUNE_EPILOGUE_USING_MOVE */
1375 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1377 /* X86_TUNE_SHIFT1 */
1380 /* X86_TUNE_USE_FFREEP */
1383 /* X86_TUNE_INTER_UNIT_MOVES */
1384 ~(m_AMD_MULTIPLE | m_GENERIC),
1386 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1389 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1390 than 4 branch instructions in the 16 byte window. */
1391 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_SCHEDULE */
1394 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1396 /* X86_TUNE_USE_BT */
1397 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1399 /* X86_TUNE_USE_INCDEC */
1400 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1402 /* X86_TUNE_PAD_RETURNS */
1403 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1405 /* X86_TUNE_EXT_80387_CONSTANTS */
1406 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1408 /* X86_TUNE_SHORTEN_X87_SSE */
1411 /* X86_TUNE_AVOID_VECTOR_DECODE */
1414 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1415 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1418 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1419 vector path on AMD machines. */
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1424 m_K8 | m_GENERIC64 | m_AMDFAM10,
1426 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1430 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1431 but one byte longer. */
1434 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1435 operand that cannot be represented using a modRM byte. The XOR
1436 replacement is long decoded, so this split helps here as well. */
1439 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1440 from integer to FP. */
1443 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1444 with a subsequent conditional jump instruction into a single
1445 compare-and-branch uop. */
1449 /* Feature tests against the various architecture variations. */
1450 unsigned char ix86_arch_features[X86_ARCH_LAST];
1452 /* Feature tests against the various architecture variations, used to create
1453 ix86_arch_features based on the processor mask. */
1454 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1455 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1456 ~(m_386 | m_486 | m_PENT | m_K6),
1458 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1461 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1464 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1467 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1471 static const unsigned int x86_accumulate_outgoing_args
1472 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1474 static const unsigned int x86_arch_always_fancy_math_387
1475 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1476 | m_NOCONA | m_CORE2 | m_GENERIC;
1478 static enum stringop_alg stringop_alg = no_stringop;
1480 /* In case the average insn count for single function invocation is
1481 lower than this constant, emit fast (but longer) prologue and
1483 #define FAST_PROLOGUE_INSN_COUNT 20
1485 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1486 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1487 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1488 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1490 /* Array of the smallest class containing reg number REGNO, indexed by
1491 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1493 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1495 /* ax, dx, cx, bx */
1496 AREG, DREG, CREG, BREG,
1497 /* si, di, bp, sp */
1498 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1500 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1501 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1504 /* flags, fpsr, fpcr, frame */
1505 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1507 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1510 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1513 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1514 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1515 /* SSE REX registers */
1516 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1520 /* The "default" register map used in 32bit mode. */
1522 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1524 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1525 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1526 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1527 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1528 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1529 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1530 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1533 static int const x86_64_int_parameter_registers[6] =
1535 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1536 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1539 static int const x86_64_ms_abi_int_parameter_registers[4] =
1541 2 /*RCX*/, 1 /*RDX*/,
1542 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1545 static int const x86_64_int_return_registers[4] =
1547 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1550 /* The "default" register map used in 64bit mode. */
1551 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1553 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1554 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1555 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1556 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1557 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1558 8,9,10,11,12,13,14,15, /* extended integer registers */
1559 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1562 /* Define the register numbers to be used in Dwarf debugging information.
1563 The SVR4 reference port C compiler uses the following register numbers
1564 in its Dwarf output code:
1565 0 for %eax (gcc regno = 0)
1566 1 for %ecx (gcc regno = 2)
1567 2 for %edx (gcc regno = 1)
1568 3 for %ebx (gcc regno = 3)
1569 4 for %esp (gcc regno = 7)
1570 5 for %ebp (gcc regno = 6)
1571 6 for %esi (gcc regno = 4)
1572 7 for %edi (gcc regno = 5)
1573 The following three DWARF register numbers are never generated by
1574 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1575 believes these numbers have these meanings.
1576 8 for %eip (no gcc equivalent)
1577 9 for %eflags (gcc regno = 17)
1578 10 for %trapno (no gcc equivalent)
1579 It is not at all clear how we should number the FP stack registers
1580 for the x86 architecture. If the version of SDB on x86/svr4 were
1581 a bit less brain dead with respect to floating-point then we would
1582 have a precedent to follow with respect to DWARF register numbers
1583 for x86 FP registers, but the SDB on x86/svr4 is so completely
1584 broken with respect to FP registers that it is hardly worth thinking
1585 of it as something to strive for compatibility with.
1586 The version of x86/svr4 SDB I have at the moment does (partially)
1587 seem to believe that DWARF register number 11 is associated with
1588 the x86 register %st(0), but that's about all. Higher DWARF
1589 register numbers don't seem to be associated with anything in
1590 particular, and even for DWARF regno 11, SDB only seems to under-
1591 stand that it should say that a variable lives in %st(0) (when
1592 asked via an `=' command) if we said it was in DWARF regno 11,
1593 but SDB still prints garbage when asked for the value of the
1594 variable in question (via a `/' command).
1595 (Also note that the labels SDB prints for various FP stack regs
1596 when doing an `x' command are all wrong.)
1597 Note that these problems generally don't affect the native SVR4
1598 C compiler because it doesn't allow the use of -O with -g and
1599 because when it is *not* optimizing, it allocates a memory
1600 location for each floating-point variable, and the memory
1601 location is what gets described in the DWARF AT_location
1602 attribute for the variable in question.
1603 Regardless of the severe mental illness of the x86/svr4 SDB, we
1604 do something sensible here and we use the following DWARF
1605 register numbers. Note that these are all stack-top-relative
1607 11 for %st(0) (gcc regno = 8)
1608 12 for %st(1) (gcc regno = 9)
1609 13 for %st(2) (gcc regno = 10)
1610 14 for %st(3) (gcc regno = 11)
1611 15 for %st(4) (gcc regno = 12)
1612 16 for %st(5) (gcc regno = 13)
1613 17 for %st(6) (gcc regno = 14)
1614 18 for %st(7) (gcc regno = 15)
1616 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1618 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1619 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1620 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1627 /* Test and compare insns in i386.md store the information needed to
1628 generate branch and scc insns here. */
1630 rtx ix86_compare_op0 = NULL_RTX;
1631 rtx ix86_compare_op1 = NULL_RTX;
1632 rtx ix86_compare_emitted = NULL_RTX;
1634 /* Define the structure for the machine field in struct function. */
1636 struct stack_local_entry GTY(())
1638 unsigned short mode;
1641 struct stack_local_entry *next;
1644 /* Structure describing stack frame layout.
1645 Stack grows downward:
1651 saved frame pointer if frame_pointer_needed
1652 <- HARD_FRAME_POINTER
1657 [va_arg registers] (
1658 > to_allocate <- FRAME_POINTER
1668 HOST_WIDE_INT frame;
1670 int outgoing_arguments_size;
1673 HOST_WIDE_INT to_allocate;
1674 /* The offsets relative to ARG_POINTER. */
1675 HOST_WIDE_INT frame_pointer_offset;
1676 HOST_WIDE_INT hard_frame_pointer_offset;
1677 HOST_WIDE_INT stack_pointer_offset;
1679 /* When save_regs_using_mov is set, emit prologue using
1680 move instead of push instructions. */
1681 bool save_regs_using_mov;
1684 /* Code model option. */
1685 enum cmodel ix86_cmodel;
1687 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1689 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1691 /* Which unit we are generating floating point math for. */
1692 enum fpmath_unit ix86_fpmath;
1694 /* Which cpu are we scheduling for. */
1695 enum processor_type ix86_tune;
1697 /* Which instruction set architecture to use. */
1698 enum processor_type ix86_arch;
1700 /* true if sse prefetch instruction is not NOOP. */
1701 int x86_prefetch_sse;
1703 /* ix86_regparm_string as a number */
1704 static int ix86_regparm;
1706 /* -mstackrealign option */
1707 extern int ix86_force_align_arg_pointer;
1708 static const char ix86_force_align_arg_pointer_string[]
1709 = "force_align_arg_pointer";
1711 static rtx (*ix86_gen_leave) (void);
1712 static rtx (*ix86_gen_pop1) (rtx);
1713 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1714 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1715 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1716 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1717 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1718 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1720 /* Preferred alignment for stack boundary in bits. */
1721 unsigned int ix86_preferred_stack_boundary;
1723 /* Alignment for incoming stack boundary in bits specified at
1725 static unsigned int ix86_user_incoming_stack_boundary;
1727 /* Default alignment for incoming stack boundary in bits. */
1728 static unsigned int ix86_default_incoming_stack_boundary;
1730 /* Alignment for incoming stack boundary in bits. */
1731 unsigned int ix86_incoming_stack_boundary;
1733 /* Values 1-5: see jump.c */
1734 int ix86_branch_cost;
1736 /* Calling abi specific va_list type nodes. */
1737 static GTY(()) tree sysv_va_list_type_node;
1738 static GTY(()) tree ms_va_list_type_node;
1740 /* Variables which are this size or smaller are put in the data/bss
1741 or ldata/lbss sections. */
1743 int ix86_section_threshold = 65536;
1745 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1746 char internal_label_prefix[16];
1747 int internal_label_prefix_len;
1749 /* Fence to use after loop using movnt. */
1752 /* Register class used for passing given 64bit part of the argument.
1753 These represent classes as documented by the PS ABI, with the exception
1754 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1755 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1757 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1758 whenever possible (upper half does contain padding). */
1759 enum x86_64_reg_class
1762 X86_64_INTEGER_CLASS,
1763 X86_64_INTEGERSI_CLASS,
1771 X86_64_COMPLEX_X87_CLASS,
1774 static const char * const x86_64_reg_class_name[] =
1776 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1777 "sseup", "x87", "x87up", "cplx87", "no"
1780 #define MAX_CLASSES 4
1782 /* Table of constants used by fldpi, fldln2, etc.... */
1783 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1784 static bool ext_80387_constants_init = 0;
1787 static struct machine_function * ix86_init_machine_status (void);
1788 static rtx ix86_function_value (const_tree, const_tree, bool);
1789 static int ix86_function_regparm (const_tree, const_tree);
1790 static void ix86_compute_frame_layout (struct ix86_frame *);
1791 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1793 static void ix86_add_new_builtins (int);
1795 enum ix86_function_specific_strings
1797 IX86_FUNCTION_SPECIFIC_ARCH,
1798 IX86_FUNCTION_SPECIFIC_TUNE,
1799 IX86_FUNCTION_SPECIFIC_FPMATH,
1800 IX86_FUNCTION_SPECIFIC_MAX
1803 static char *ix86_target_string (int, int, const char *, const char *,
1804 const char *, bool);
1805 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1806 static void ix86_function_specific_save (struct cl_target_option *);
1807 static void ix86_function_specific_restore (struct cl_target_option *);
1808 static void ix86_function_specific_print (FILE *, int,
1809 struct cl_target_option *);
1810 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1811 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1812 static bool ix86_can_inline_p (tree, tree);
1813 static void ix86_set_current_function (tree);
1816 /* The svr4 ABI for the i386 says that records and unions are returned
1818 #ifndef DEFAULT_PCC_STRUCT_RETURN
1819 #define DEFAULT_PCC_STRUCT_RETURN 1
1822 /* Whether -mtune= or -march= were specified */
1823 static int ix86_tune_defaulted;
1824 static int ix86_arch_specified;
1826 /* Bit flags that specify the ISA we are compiling for. */
1827 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1829 /* A mask of ix86_isa_flags that includes bit X if X
1830 was set or cleared on the command line. */
1831 static int ix86_isa_flags_explicit;
1833 /* Define a set of ISAs which are available when a given ISA is
1834 enabled. MMX and SSE ISAs are handled separately. */
1836 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1837 #define OPTION_MASK_ISA_3DNOW_SET \
1838 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1840 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1841 #define OPTION_MASK_ISA_SSE2_SET \
1842 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1843 #define OPTION_MASK_ISA_SSE3_SET \
1844 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1845 #define OPTION_MASK_ISA_SSSE3_SET \
1846 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1847 #define OPTION_MASK_ISA_SSE4_1_SET \
1848 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1849 #define OPTION_MASK_ISA_SSE4_2_SET \
1850 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1851 #define OPTION_MASK_ISA_AVX_SET \
1852 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1853 #define OPTION_MASK_ISA_FMA_SET \
1854 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1856 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1858 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1860 #define OPTION_MASK_ISA_SSE4A_SET \
1861 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1862 #define OPTION_MASK_ISA_SSE5_SET \
1863 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1865 /* AES and PCLMUL need SSE2 because they use xmm registers */
1866 #define OPTION_MASK_ISA_AES_SET \
1867 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1868 #define OPTION_MASK_ISA_PCLMUL_SET \
1869 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1871 #define OPTION_MASK_ISA_ABM_SET \
1872 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1873 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1874 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1875 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1877 /* Define a set of ISAs which aren't available when a given ISA is
1878 disabled. MMX and SSE ISAs are handled separately. */
1880 #define OPTION_MASK_ISA_MMX_UNSET \
1881 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1882 #define OPTION_MASK_ISA_3DNOW_UNSET \
1883 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1884 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1886 #define OPTION_MASK_ISA_SSE_UNSET \
1887 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1888 #define OPTION_MASK_ISA_SSE2_UNSET \
1889 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1890 #define OPTION_MASK_ISA_SSE3_UNSET \
1891 (OPTION_MASK_ISA_SSE3 \
1892 | OPTION_MASK_ISA_SSSE3_UNSET \
1893 | OPTION_MASK_ISA_SSE4A_UNSET )
1894 #define OPTION_MASK_ISA_SSSE3_UNSET \
1895 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1896 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1897 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1898 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1899 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1900 #define OPTION_MASK_ISA_AVX_UNSET \
1901 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1902 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1904 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1906 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1908 #define OPTION_MASK_ISA_SSE4A_UNSET \
1909 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1910 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1911 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1912 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1913 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1914 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1915 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1916 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1918 /* Vectorization library interface and handlers. */
1919 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1920 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1921 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1923 /* Processor target table, indexed by processor number */
1926 const struct processor_costs *cost; /* Processor costs */
1927 const int align_loop; /* Default alignments. */
1928 const int align_loop_max_skip;
1929 const int align_jump;
1930 const int align_jump_max_skip;
1931 const int align_func;
1934 static const struct ptt processor_target_table[PROCESSOR_max] =
1936 {&i386_cost, 4, 3, 4, 3, 4},
1937 {&i486_cost, 16, 15, 16, 15, 16},
1938 {&pentium_cost, 16, 7, 16, 7, 16},
1939 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1940 {&geode_cost, 0, 0, 0, 0, 0},
1941 {&k6_cost, 32, 7, 32, 7, 32},
1942 {&athlon_cost, 16, 7, 16, 7, 16},
1943 {&pentium4_cost, 0, 0, 0, 0, 0},
1944 {&k8_cost, 16, 7, 16, 7, 16},
1945 {&nocona_cost, 0, 0, 0, 0, 0},
1946 {&core2_cost, 16, 10, 16, 10, 16},
1947 {&generic32_cost, 16, 7, 16, 7, 16},
1948 {&generic64_cost, 16, 10, 16, 10, 16},
1949 {&amdfam10_cost, 32, 24, 32, 7, 32}
1952 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1977 /* Implement TARGET_HANDLE_OPTION. */
1980 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1987 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1988 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1992 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1993 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2000 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2001 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2005 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2006 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2016 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2017 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2021 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2022 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2029 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2030 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2034 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2035 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2042 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2043 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2047 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2048 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2055 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2056 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2060 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2061 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2068 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2069 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2073 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2074 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2081 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2082 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2086 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2087 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2094 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2095 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2099 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2100 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2107 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2108 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2112 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2113 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2118 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2123 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2124 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2130 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2131 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2135 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2136 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2143 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2144 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2148 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2149 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2156 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2157 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2161 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2162 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2169 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2170 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2174 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2175 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2182 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2183 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2187 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2188 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2195 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2196 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2200 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2201 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2208 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2209 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2213 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2214 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2221 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2222 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2226 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2227 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2236 /* Return a string the documents the current -m options. The caller is
2237 responsible for freeing the string. */
2240 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2241 const char *fpmath, bool add_nl_p)
2243 struct ix86_target_opts
2245 const char *option; /* option string */
2246 int mask; /* isa mask options */
2249 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2250 preceding options while match those first. */
2251 static struct ix86_target_opts isa_opts[] =
2253 { "-m64", OPTION_MASK_ISA_64BIT },
2254 { "-msse5", OPTION_MASK_ISA_SSE5 },
2255 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2256 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2257 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2258 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2259 { "-msse3", OPTION_MASK_ISA_SSE3 },
2260 { "-msse2", OPTION_MASK_ISA_SSE2 },
2261 { "-msse", OPTION_MASK_ISA_SSE },
2262 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2263 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2264 { "-mmmx", OPTION_MASK_ISA_MMX },
2265 { "-mabm", OPTION_MASK_ISA_ABM },
2266 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2267 { "-maes", OPTION_MASK_ISA_AES },
2268 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2272 static struct ix86_target_opts flag_opts[] =
2274 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2275 { "-m80387", MASK_80387 },
2276 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2277 { "-malign-double", MASK_ALIGN_DOUBLE },
2278 { "-mcld", MASK_CLD },
2279 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2280 { "-mieee-fp", MASK_IEEE_FP },
2281 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2282 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2283 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2284 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2285 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2286 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2287 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2288 { "-mno-red-zone", MASK_NO_RED_ZONE },
2289 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2290 { "-mrecip", MASK_RECIP },
2291 { "-mrtd", MASK_RTD },
2292 { "-msseregparm", MASK_SSEREGPARM },
2293 { "-mstack-arg-probe", MASK_STACK_PROBE },
2294 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2297 const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0])
2298 + sizeof (flag_opts) / sizeof (flag_opts[0])
2302 char target_other[40];
2311 memset (opts, '\0', sizeof (opts));
2313 /* Add -march= option. */
2316 opts[num][0] = "-march=";
2317 opts[num++][1] = arch;
2320 /* Add -mtune= option. */
2323 opts[num][0] = "-mtune=";
2324 opts[num++][1] = tune;
2327 /* Pick out the options in isa options. */
2328 for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++)
2330 if ((isa & isa_opts[i].mask) != 0)
2332 opts[num++][0] = isa_opts[i].option;
2333 isa &= ~ isa_opts[i].mask;
2337 if (isa && add_nl_p)
2339 opts[num++][0] = isa_other;
2340 sprintf (isa_other, "(other isa: 0x%x)", isa);
2343 /* Add flag options. */
2344 for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++)
2346 if ((flags & flag_opts[i].mask) != 0)
2348 opts[num++][0] = flag_opts[i].option;
2349 flags &= ~ flag_opts[i].mask;
2353 if (flags && add_nl_p)
2355 opts[num++][0] = target_other;
2356 sprintf (target_other, "(other flags: 0x%x)", isa);
2359 /* Add -fpmath= option. */
2362 opts[num][0] = "-mfpmath=";
2363 opts[num++][1] = fpmath;
2370 gcc_assert (num < sizeof (opts) / sizeof (opts[0]));
2372 /* Size the string. */
2374 sep_len = (add_nl_p) ? 3 : 1;
2375 for (i = 0; i < num; i++)
2378 for (j = 0; j < 2; j++)
2380 len += strlen (opts[i][j]);
2383 /* Build the string. */
2384 ret = ptr = (char *) xmalloc (len);
2387 for (i = 0; i < num; i++)
2391 for (j = 0; j < 2; j++)
2392 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2399 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2407 for (j = 0; j < 2; j++)
2410 memcpy (ptr, opts[i][j], len2[j]);
2412 line_len += len2[j];
2417 gcc_assert (ret + len >= ptr);
2422 /* Function that is callable from the debugger to print the current
2425 ix86_debug_options (void)
2427 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2428 ix86_arch_string, ix86_tune_string,
2429 ix86_fpmath_string, true);
2433 fprintf (stderr, "%s\n\n", opts);
2437 fprintf (stderr, "<no options>\n\n");
2442 /* Sometimes certain combinations of command options do not make
2443 sense on a particular target machine. You can define a macro
2444 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2445 defined, is executed once just after all the command options have
2448 Don't use this macro to turn on various extra optimizations for
2449 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2452 override_options (bool main_args_p)
2455 unsigned int ix86_arch_mask, ix86_tune_mask;
2460 /* Comes from final.c -- no real reason to change it. */
2461 #define MAX_CODE_ALIGN 16
2469 PTA_PREFETCH_SSE = 1 << 4,
2471 PTA_3DNOW_A = 1 << 6,
2475 PTA_POPCNT = 1 << 10,
2477 PTA_SSE4A = 1 << 12,
2478 PTA_NO_SAHF = 1 << 13,
2479 PTA_SSE4_1 = 1 << 14,
2480 PTA_SSE4_2 = 1 << 15,
2483 PTA_PCLMUL = 1 << 18,
2490 const char *const name; /* processor name or nickname. */
2491 const enum processor_type processor;
2492 const unsigned /*enum pta_flags*/ flags;
2494 const processor_alias_table[] =
2496 {"i386", PROCESSOR_I386, 0},
2497 {"i486", PROCESSOR_I486, 0},
2498 {"i586", PROCESSOR_PENTIUM, 0},
2499 {"pentium", PROCESSOR_PENTIUM, 0},
2500 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2501 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2502 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2503 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2504 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2505 {"i686", PROCESSOR_PENTIUMPRO, 0},
2506 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2507 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2508 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2509 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2510 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2511 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2512 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2513 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2514 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2515 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2516 | PTA_CX16 | PTA_NO_SAHF)},
2517 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2518 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2521 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2522 |PTA_PREFETCH_SSE)},
2523 {"k6", PROCESSOR_K6, PTA_MMX},
2524 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2525 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2526 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2527 | PTA_PREFETCH_SSE)},
2528 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2529 | PTA_PREFETCH_SSE)},
2530 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2532 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2534 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2536 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2537 | PTA_MMX | PTA_SSE | PTA_SSE2
2539 {"k8", PROCESSOR_K8, (PTA_64BIT
2540 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2541 | PTA_SSE | PTA_SSE2
2543 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2544 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2545 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2547 {"opteron", PROCESSOR_K8, (PTA_64BIT
2548 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2549 | PTA_SSE | PTA_SSE2
2551 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2552 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2553 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2555 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2556 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2557 | PTA_SSE | PTA_SSE2
2559 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2560 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2561 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2563 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2564 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2565 | PTA_SSE | PTA_SSE2
2567 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2568 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2569 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2571 | PTA_CX16 | PTA_ABM)},
2572 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2573 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2574 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2576 | PTA_CX16 | PTA_ABM)},
2577 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2578 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2581 int const pta_size = ARRAY_SIZE (processor_alias_table);
2583 /* Set up prefix/suffix so the error messages refer to either the command
2584 line argument, or the attribute(target). */
2593 prefix = "option(\"";
2598 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2599 SUBTARGET_OVERRIDE_OPTIONS;
2602 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2603 SUBSUBTARGET_OVERRIDE_OPTIONS;
2606 /* -fPIC is the default for x86_64. */
2607 if (TARGET_MACHO && TARGET_64BIT)
2610 /* Set the default values for switches whose default depends on TARGET_64BIT
2611 in case they weren't overwritten by command line options. */
2614 /* Mach-O doesn't support omitting the frame pointer for now. */
2615 if (flag_omit_frame_pointer == 2)
2616 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2617 if (flag_asynchronous_unwind_tables == 2)
2618 flag_asynchronous_unwind_tables = 1;
2619 if (flag_pcc_struct_return == 2)
2620 flag_pcc_struct_return = 0;
2624 if (flag_omit_frame_pointer == 2)
2625 flag_omit_frame_pointer = 0;
2626 if (flag_asynchronous_unwind_tables == 2)
2627 flag_asynchronous_unwind_tables = 0;
2628 if (flag_pcc_struct_return == 2)
2629 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2632 /* Need to check -mtune=generic first. */
2633 if (ix86_tune_string)
2635 if (!strcmp (ix86_tune_string, "generic")
2636 || !strcmp (ix86_tune_string, "i686")
2637 /* As special support for cross compilers we read -mtune=native
2638 as -mtune=generic. With native compilers we won't see the
2639 -mtune=native, as it was changed by the driver. */
2640 || !strcmp (ix86_tune_string, "native"))
2643 ix86_tune_string = "generic64";
2645 ix86_tune_string = "generic32";
2647 /* If this call is for setting the option attribute, allow the
2648 generic32/generic64 that was previously set. */
2649 else if (!main_args_p
2650 && (!strcmp (ix86_tune_string, "generic32")
2651 || !strcmp (ix86_tune_string, "generic64")))
2653 else if (!strncmp (ix86_tune_string, "generic", 7))
2654 error ("bad value (%s) for %stune=%s %s",
2655 ix86_tune_string, prefix, suffix, sw);
2659 if (ix86_arch_string)
2660 ix86_tune_string = ix86_arch_string;
2661 if (!ix86_tune_string)
2663 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2664 ix86_tune_defaulted = 1;
2667 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2668 need to use a sensible tune option. */
2669 if (!strcmp (ix86_tune_string, "generic")
2670 || !strcmp (ix86_tune_string, "x86-64")
2671 || !strcmp (ix86_tune_string, "i686"))
2674 ix86_tune_string = "generic64";
2676 ix86_tune_string = "generic32";
2679 if (ix86_stringop_string)
2681 if (!strcmp (ix86_stringop_string, "rep_byte"))
2682 stringop_alg = rep_prefix_1_byte;
2683 else if (!strcmp (ix86_stringop_string, "libcall"))
2684 stringop_alg = libcall;
2685 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2686 stringop_alg = rep_prefix_4_byte;
2687 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2688 stringop_alg = rep_prefix_8_byte;
2689 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2690 stringop_alg = loop_1_byte;
2691 else if (!strcmp (ix86_stringop_string, "loop"))
2692 stringop_alg = loop;
2693 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2694 stringop_alg = unrolled_loop;
2696 error ("bad value (%s) for %sstringop-strategy=%s %s",
2697 ix86_stringop_string, prefix, suffix, sw);
2699 if (!strcmp (ix86_tune_string, "x86-64"))
2700 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2701 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2702 prefix, suffix, prefix, suffix, prefix, suffix);
2704 if (!ix86_arch_string)
2705 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2707 ix86_arch_specified = 1;
2709 if (!strcmp (ix86_arch_string, "generic"))
2710 error ("generic CPU can be used only for %stune=%s %s",
2711 prefix, suffix, sw);
2712 if (!strncmp (ix86_arch_string, "generic", 7))
2713 error ("bad value (%s) for %sarch=%s %s",
2714 ix86_arch_string, prefix, suffix, sw);
2716 if (ix86_cmodel_string != 0)
2718 if (!strcmp (ix86_cmodel_string, "small"))
2719 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2720 else if (!strcmp (ix86_cmodel_string, "medium"))
2721 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2722 else if (!strcmp (ix86_cmodel_string, "large"))
2723 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2725 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2726 else if (!strcmp (ix86_cmodel_string, "32"))
2727 ix86_cmodel = CM_32;
2728 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2729 ix86_cmodel = CM_KERNEL;
2731 error ("bad value (%s) for %scmodel=%s %s",
2732 ix86_cmodel_string, prefix, suffix, sw);
2736 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2737 use of rip-relative addressing. This eliminates fixups that
2738 would otherwise be needed if this object is to be placed in a
2739 DLL, and is essentially just as efficient as direct addressing. */
2740 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2741 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2742 else if (TARGET_64BIT)
2743 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2745 ix86_cmodel = CM_32;
2747 if (ix86_asm_string != 0)
2750 && !strcmp (ix86_asm_string, "intel"))
2751 ix86_asm_dialect = ASM_INTEL;
2752 else if (!strcmp (ix86_asm_string, "att"))
2753 ix86_asm_dialect = ASM_ATT;
2755 error ("bad value (%s) for %sasm=%s %s",
2756 ix86_asm_string, prefix, suffix, sw);
2758 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2759 error ("code model %qs not supported in the %s bit mode",
2760 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2761 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2762 sorry ("%i-bit mode not compiled in",
2763 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2765 for (i = 0; i < pta_size; i++)
2766 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2768 ix86_arch = processor_alias_table[i].processor;
2769 /* Default cpu tuning to the architecture. */
2770 ix86_tune = ix86_arch;
2772 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2773 error ("CPU you selected does not support x86-64 "
2776 if (processor_alias_table[i].flags & PTA_MMX
2777 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2778 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2779 if (processor_alias_table[i].flags & PTA_3DNOW
2780 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2781 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2782 if (processor_alias_table[i].flags & PTA_3DNOW_A
2783 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2784 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2785 if (processor_alias_table[i].flags & PTA_SSE
2786 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2787 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2788 if (processor_alias_table[i].flags & PTA_SSE2
2789 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2790 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2791 if (processor_alias_table[i].flags & PTA_SSE3
2792 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2793 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2794 if (processor_alias_table[i].flags & PTA_SSSE3
2795 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2796 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2797 if (processor_alias_table[i].flags & PTA_SSE4_1
2798 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2799 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2800 if (processor_alias_table[i].flags & PTA_SSE4_2
2801 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2802 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2803 if (processor_alias_table[i].flags & PTA_AVX
2804 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2805 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2806 if (processor_alias_table[i].flags & PTA_FMA
2807 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2808 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2809 if (processor_alias_table[i].flags & PTA_SSE4A
2810 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2811 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2812 if (processor_alias_table[i].flags & PTA_SSE5
2813 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2814 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2815 if (processor_alias_table[i].flags & PTA_ABM
2816 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2817 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2818 if (processor_alias_table[i].flags & PTA_CX16
2819 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2820 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2821 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2822 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2823 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2824 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2825 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2826 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2827 if (processor_alias_table[i].flags & PTA_AES
2828 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2829 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2830 if (processor_alias_table[i].flags & PTA_PCLMUL
2831 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2832 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2833 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2834 x86_prefetch_sse = true;
2840 error ("bad value (%s) for %sarch=%s %s",
2841 ix86_arch_string, prefix, suffix, sw);
2843 ix86_arch_mask = 1u << ix86_arch;
2844 for (i = 0; i < X86_ARCH_LAST; ++i)
2845 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2847 for (i = 0; i < pta_size; i++)
2848 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2850 ix86_tune = processor_alias_table[i].processor;
2851 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2853 if (ix86_tune_defaulted)
2855 ix86_tune_string = "x86-64";
2856 for (i = 0; i < pta_size; i++)
2857 if (! strcmp (ix86_tune_string,
2858 processor_alias_table[i].name))
2860 ix86_tune = processor_alias_table[i].processor;
2863 error ("CPU you selected does not support x86-64 "
2866 /* Intel CPUs have always interpreted SSE prefetch instructions as
2867 NOPs; so, we can enable SSE prefetch instructions even when
2868 -mtune (rather than -march) points us to a processor that has them.
2869 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2870 higher processors. */
2872 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2873 x86_prefetch_sse = true;
2877 error ("bad value (%s) for %stune=%s %s",
2878 ix86_tune_string, prefix, suffix, sw);
2880 ix86_tune_mask = 1u << ix86_tune;
2881 for (i = 0; i < X86_TUNE_LAST; ++i)
2882 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2885 ix86_cost = &ix86_size_cost;
2887 ix86_cost = processor_target_table[ix86_tune].cost;
2889 /* Arrange to set up i386_stack_locals for all functions. */
2890 init_machine_status = ix86_init_machine_status;
2892 /* Validate -mregparm= value. */
2893 if (ix86_regparm_string)
2896 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2897 i = atoi (ix86_regparm_string);
2898 if (i < 0 || i > REGPARM_MAX)
2899 error ("%sregparm=%d%s is not between 0 and %d",
2900 prefix, i, suffix, REGPARM_MAX);
2905 ix86_regparm = REGPARM_MAX;
2907 /* If the user has provided any of the -malign-* options,
2908 warn and use that value only if -falign-* is not set.
2909 Remove this code in GCC 3.2 or later. */
2910 if (ix86_align_loops_string)
2912 warning (0, "%salign-loops%s is obsolete, use %salign-loops%s",
2913 prefix, suffix, prefix, suffix);
2914 if (align_loops == 0)
2916 i = atoi (ix86_align_loops_string);
2917 if (i < 0 || i > MAX_CODE_ALIGN)
2918 error ("%salign-loops=%d%s is not between 0 and %d",
2919 prefix, i, suffix, MAX_CODE_ALIGN);
2921 align_loops = 1 << i;
2925 if (ix86_align_jumps_string)
2927 warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s",
2928 prefix, suffix, prefix, suffix);
2929 if (align_jumps == 0)
2931 i = atoi (ix86_align_jumps_string);
2932 if (i < 0 || i > MAX_CODE_ALIGN)
2933 error ("%salign-loops=%d%s is not between 0 and %d",
2934 prefix, i, suffix, MAX_CODE_ALIGN);
2936 align_jumps = 1 << i;
2940 if (ix86_align_funcs_string)
2942 warning (0, "%salign-functions%s is obsolete, use %salign-functions%s",
2943 prefix, suffix, prefix, suffix);
2944 if (align_functions == 0)
2946 i = atoi (ix86_align_funcs_string);
2947 if (i < 0 || i > MAX_CODE_ALIGN)
2948 error ("%salign-loops=%d%s is not between 0 and %d",
2949 prefix, i, suffix, MAX_CODE_ALIGN);
2951 align_functions = 1 << i;
2955 /* Default align_* from the processor table. */
2956 if (align_loops == 0)
2958 align_loops = processor_target_table[ix86_tune].align_loop;
2959 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2961 if (align_jumps == 0)
2963 align_jumps = processor_target_table[ix86_tune].align_jump;
2964 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2966 if (align_functions == 0)
2968 align_functions = processor_target_table[ix86_tune].align_func;
2971 /* Validate -mbranch-cost= value, or provide default. */
2972 ix86_branch_cost = ix86_cost->branch_cost;
2973 if (ix86_branch_cost_string)
2975 i = atoi (ix86_branch_cost_string);
2977 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2979 ix86_branch_cost = i;
2981 if (ix86_section_threshold_string)
2983 i = atoi (ix86_section_threshold_string);
2985 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2987 ix86_section_threshold = i;
2990 if (ix86_tls_dialect_string)
2992 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2993 ix86_tls_dialect = TLS_DIALECT_GNU;
2994 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2995 ix86_tls_dialect = TLS_DIALECT_GNU2;
2996 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2997 ix86_tls_dialect = TLS_DIALECT_SUN;
2999 error ("bad value (%s) for %stls-dialect=%s %s",
3000 ix86_tls_dialect_string, prefix, suffix, sw);
3003 if (ix87_precision_string)
3005 i = atoi (ix87_precision_string);
3006 if (i != 32 && i != 64 && i != 80)
3007 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3012 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3014 /* Enable by default the SSE and MMX builtins. Do allow the user to
3015 explicitly disable any of these. In particular, disabling SSE and
3016 MMX for kernel code is extremely useful. */
3017 if (!ix86_arch_specified)
3019 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3020 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3023 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3027 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3029 if (!ix86_arch_specified)
3031 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3033 /* i386 ABI does not specify red zone. It still makes sense to use it
3034 when programmer takes care to stack from being destroyed. */
3035 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3036 target_flags |= MASK_NO_RED_ZONE;
3039 /* Keep nonleaf frame pointers. */
3040 if (flag_omit_frame_pointer)
3041 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3042 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3043 flag_omit_frame_pointer = 1;
3045 /* If we're doing fast math, we don't care about comparison order
3046 wrt NaNs. This lets us use a shorter comparison sequence. */
3047 if (flag_finite_math_only)
3048 target_flags &= ~MASK_IEEE_FP;
3050 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3051 since the insns won't need emulation. */
3052 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3053 target_flags &= ~MASK_NO_FANCY_MATH_387;
3055 /* Likewise, if the target doesn't have a 387, or we've specified
3056 software floating point, don't use 387 inline intrinsics. */
3058 target_flags |= MASK_NO_FANCY_MATH_387;
3060 /* Turn on MMX builtins for -msse. */
3063 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3064 x86_prefetch_sse = true;
3067 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3068 if (TARGET_SSE4_2 || TARGET_ABM)
3069 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3071 /* Validate -mpreferred-stack-boundary= value or default it to
3072 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3073 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3074 if (ix86_preferred_stack_boundary_string)
3076 i = atoi (ix86_preferred_stack_boundary_string);
3077 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3078 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3079 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3081 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3084 /* Set the default value for -mstackrealign. */
3085 if (ix86_force_align_arg_pointer == -1)
3086 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3088 /* Validate -mincoming-stack-boundary= value or default it to
3089 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3090 if (ix86_force_align_arg_pointer)
3091 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3093 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3094 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3095 if (ix86_incoming_stack_boundary_string)
3097 i = atoi (ix86_incoming_stack_boundary_string);
3098 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3099 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3100 i, TARGET_64BIT ? 4 : 2);
3103 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3104 ix86_incoming_stack_boundary
3105 = ix86_user_incoming_stack_boundary;
3109 /* Accept -msseregparm only if at least SSE support is enabled. */
3110 if (TARGET_SSEREGPARM
3112 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3114 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3115 if (ix86_fpmath_string != 0)
3117 if (! strcmp (ix86_fpmath_string, "387"))
3118 ix86_fpmath = FPMATH_387;
3119 else if (! strcmp (ix86_fpmath_string, "sse"))
3123 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3124 ix86_fpmath = FPMATH_387;
3127 ix86_fpmath = FPMATH_SSE;
3129 else if (! strcmp (ix86_fpmath_string, "387,sse")
3130 || ! strcmp (ix86_fpmath_string, "387+sse")
3131 || ! strcmp (ix86_fpmath_string, "sse,387")
3132 || ! strcmp (ix86_fpmath_string, "sse+387")
3133 || ! strcmp (ix86_fpmath_string, "both"))
3137 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3138 ix86_fpmath = FPMATH_387;
3140 else if (!TARGET_80387)
3142 warning (0, "387 instruction set disabled, using SSE arithmetics");
3143 ix86_fpmath = FPMATH_SSE;
3146 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3149 error ("bad value (%s) for %sfpmath=%s %s",
3150 ix86_fpmath_string, prefix, suffix, sw);
3153 /* If the i387 is disabled, then do not return values in it. */
3155 target_flags &= ~MASK_FLOAT_RETURNS;
3157 /* Use external vectorized library in vectorizing intrinsics. */
3158 if (ix86_veclibabi_string)
3160 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3161 ix86_veclib_handler = ix86_veclibabi_svml;
3162 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3163 ix86_veclib_handler = ix86_veclibabi_acml;
3165 error ("unknown vectorization library ABI type (%s) for "
3166 "%sveclibabi=%s %s", ix86_veclibabi_string,
3167 prefix, suffix, sw);
3170 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3171 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3173 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3175 /* ??? Unwind info is not correct around the CFG unless either a frame
3176 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3177 unwind info generation to be aware of the CFG and propagating states
3179 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3180 || flag_exceptions || flag_non_call_exceptions)
3181 && flag_omit_frame_pointer
3182 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3184 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3185 warning (0, "unwind tables currently require either a frame pointer "
3186 "or %saccumulate-outgoing-args%s for correctness",
3188 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3191 /* If stack probes are required, the space used for large function
3192 arguments on the stack must also be probed, so enable
3193 -maccumulate-outgoing-args so this happens in the prologue. */
3194 if (TARGET_STACK_PROBE
3195 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3197 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3198 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3199 "for correctness", prefix, suffix);
3200 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3203 /* For sane SSE instruction set generation we need fcomi instruction.
3204 It is safe to enable all CMOVE instructions. */
3208 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3211 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3212 p = strchr (internal_label_prefix, 'X');
3213 internal_label_prefix_len = p - internal_label_prefix;
3217 /* When scheduling description is not available, disable scheduler pass
3218 so it won't slow down the compilation and make x87 code slower. */
3219 if (!TARGET_SCHEDULE)
3220 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3222 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3223 set_param_value ("simultaneous-prefetches",
3224 ix86_cost->simultaneous_prefetches);
3225 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3226 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3227 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3228 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3229 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3230 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3232 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3233 can be optimized to ap = __builtin_next_arg (0). */
3235 targetm.expand_builtin_va_start = NULL;
3239 ix86_gen_leave = gen_leave_rex64;
3240 ix86_gen_pop1 = gen_popdi1;
3241 ix86_gen_add3 = gen_adddi3;
3242 ix86_gen_sub3 = gen_subdi3;
3243 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3244 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3245 ix86_gen_monitor = gen_sse3_monitor64;
3246 ix86_gen_andsp = gen_anddi3;
3250 ix86_gen_leave = gen_leave;
3251 ix86_gen_pop1 = gen_popsi1;
3252 ix86_gen_add3 = gen_addsi3;
3253 ix86_gen_sub3 = gen_subsi3;
3254 ix86_gen_sub3_carry = gen_subsi3_carry;
3255 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3256 ix86_gen_monitor = gen_sse3_monitor;
3257 ix86_gen_andsp = gen_andsi3;
3261 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3263 target_flags |= MASK_CLD & ~target_flags_explicit;
3266 /* Save the initial options in case the user does function specific options */
3268 target_option_default_node = target_option_current_node
3269 = build_target_option_node ();
3272 /* Save the current options */
3275 ix86_function_specific_save (struct cl_target_option *ptr)
3277 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3278 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3279 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3280 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3282 ptr->arch = ix86_arch;
3283 ptr->tune = ix86_tune;
3284 ptr->fpmath = ix86_fpmath;
3285 ptr->branch_cost = ix86_branch_cost;
3286 ptr->tune_defaulted = ix86_tune_defaulted;
3287 ptr->arch_specified = ix86_arch_specified;
3288 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3289 ptr->target_flags_explicit = target_flags_explicit;
3292 /* Restore the current options */
3295 ix86_function_specific_restore (struct cl_target_option *ptr)
3297 enum processor_type old_tune = ix86_tune;
3298 enum processor_type old_arch = ix86_arch;
3299 unsigned int ix86_arch_mask, ix86_tune_mask;
3302 ix86_arch = ptr->arch;
3303 ix86_tune = ptr->tune;
3304 ix86_fpmath = ptr->fpmath;
3305 ix86_branch_cost = ptr->branch_cost;
3306 ix86_tune_defaulted = ptr->tune_defaulted;
3307 ix86_arch_specified = ptr->arch_specified;
3308 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3309 target_flags_explicit = ptr->target_flags_explicit;
3311 /* Recreate the arch feature tests if the arch changed */
3312 if (old_arch != ix86_arch)
3314 ix86_arch_mask = 1u << ix86_arch;
3315 for (i = 0; i < X86_ARCH_LAST; ++i)
3316 ix86_arch_features[i]
3317 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3320 /* Recreate the tune optimization tests */
3321 if (old_tune != ix86_tune)
3323 ix86_tune_mask = 1u << ix86_tune;
3324 for (i = 0; i < X86_TUNE_LAST; ++i)
3325 ix86_tune_features[i]
3326 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3330 /* Print the current options */
3333 ix86_function_specific_print (FILE *file, int indent,
3334 struct cl_target_option *ptr)
3337 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3338 NULL, NULL, NULL, false);
3340 fprintf (file, "%*sarch = %d (%s)\n",
3343 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3344 ? cpu_names[ptr->arch]
3347 fprintf (file, "%*stune = %d (%s)\n",
3350 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3351 ? cpu_names[ptr->tune]
3354 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3355 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3356 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3357 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3361 fprintf (file, "%*s%s\n", indent, "", target_string);
3362 free (target_string);
3367 /* Inner function to process the attribute((target(...))), take an argument and
3368 set the current options from the argument. If we have a list, recursively go
3372 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3377 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3378 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3379 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3380 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3395 enum ix86_opt_type type;
3400 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3401 IX86_ATTR_ISA ("abm", OPT_mabm),
3402 IX86_ATTR_ISA ("aes", OPT_maes),
3403 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3404 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3405 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3406 IX86_ATTR_ISA ("sse", OPT_msse),
3407 IX86_ATTR_ISA ("sse2", OPT_msse2),
3408 IX86_ATTR_ISA ("sse3", OPT_msse3),
3409 IX86_ATTR_ISA ("sse4", OPT_msse4),
3410 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3411 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3412 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3413 IX86_ATTR_ISA ("sse5", OPT_msse5),
3414 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3416 /* string options */
3417 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3418 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3419 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3422 IX86_ATTR_YES ("cld",
3426 IX86_ATTR_NO ("fancy-math-387",
3427 OPT_mfancy_math_387,
3428 MASK_NO_FANCY_MATH_387),
3430 IX86_ATTR_NO ("fused-madd",
3432 MASK_NO_FUSED_MADD),
3434 IX86_ATTR_YES ("ieee-fp",
3438 IX86_ATTR_YES ("inline-all-stringops",
3439 OPT_minline_all_stringops,
3440 MASK_INLINE_ALL_STRINGOPS),
3442 IX86_ATTR_YES ("inline-stringops-dynamically",
3443 OPT_minline_stringops_dynamically,
3444 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3446 IX86_ATTR_NO ("align-stringops",
3447 OPT_mno_align_stringops,
3448 MASK_NO_ALIGN_STRINGOPS),
3450 IX86_ATTR_YES ("recip",
3456 /* If this is a list, recurse to get the options. */
3457 if (TREE_CODE (args) == TREE_LIST)
3461 for (; args; args = TREE_CHAIN (args))
3462 if (TREE_VALUE (args)
3463 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3469 else if (TREE_CODE (args) != STRING_CST)
3472 /* Handle multiple arguments separated by commas. */
3473 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3475 while (next_optstr && *next_optstr != '\0')
3477 char *p = next_optstr;
3479 char *comma = strchr (next_optstr, ',');
3480 const char *opt_string;
3481 size_t len, opt_len;
3486 enum ix86_opt_type type = ix86_opt_unknown;
3492 len = comma - next_optstr;
3493 next_optstr = comma + 1;
3501 /* Recognize no-xxx. */
3502 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3511 /* Find the option. */
3514 for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++)
3516 type = attrs[i].type;
3517 opt_len = attrs[i].len;
3518 if (ch == attrs[i].string[0]
3519 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3520 && memcmp (p, attrs[i].string, opt_len) == 0)
3523 mask = attrs[i].mask;
3524 opt_string = attrs[i].string;
3529 /* Process the option. */
3532 error ("attribute(target(\"%s\")) is unknown", orig_p);
3536 else if (type == ix86_opt_isa)
3537 ix86_handle_option (opt, p, opt_set_p);
3539 else if (type == ix86_opt_yes || type == ix86_opt_no)
3541 if (type == ix86_opt_no)
3542 opt_set_p = !opt_set_p;
3545 target_flags |= mask;
3547 target_flags &= ~mask;
3550 else if (type == ix86_opt_str)
3554 error ("option(\"%s\") was already specified", opt_string);
3558 p_strings[opt] = xstrdup (p + opt_len);
3568 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3571 ix86_valid_target_attribute_tree (tree args)
3573 const char *orig_arch_string = ix86_arch_string;
3574 const char *orig_tune_string = ix86_tune_string;
3575 const char *orig_fpmath_string = ix86_fpmath_string;
3576 int orig_tune_defaulted = ix86_tune_defaulted;
3577 int orig_arch_specified = ix86_arch_specified;
3578 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3581 struct cl_target_option *def
3582 = TREE_TARGET_OPTION (target_option_default_node);
3584 /* Process each of the options on the chain. */
3585 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3588 /* If the changed options are different from the default, rerun override_options,
3589 and then save the options away. The string options are are attribute options,
3590 and will be undone when we copy the save structure. */
3591 if (ix86_isa_flags != def->ix86_isa_flags
3592 || target_flags != def->target_flags
3593 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3594 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3595 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3597 /* If we are using the default tune= or arch=, undo the string assigned,
3598 and use the default. */
3599 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3600 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3601 else if (!orig_arch_specified)
3602 ix86_arch_string = NULL;
3604 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3605 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3606 else if (orig_tune_defaulted)
3607 ix86_tune_string = NULL;
3609 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3610 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3611 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3612 else if (!TARGET_64BIT && TARGET_SSE)
3613 ix86_fpmath_string = "sse,387";
3615 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3616 override_options (false);
3618 /* Add any builtin functions with the new isa if any. */
3619 ix86_add_new_builtins (ix86_isa_flags);
3621 /* Save the current options unless we are validating options for
3623 t = build_target_option_node ();
3625 ix86_arch_string = orig_arch_string;
3626 ix86_tune_string = orig_tune_string;
3627 ix86_fpmath_string = orig_fpmath_string;
3629 /* Free up memory allocated to hold the strings */
3630 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3631 if (option_strings[i])
3632 free (option_strings[i]);
3638 /* Hook to validate attribute((target("string"))). */
3641 ix86_valid_target_attribute_p (tree fndecl,
3642 tree ARG_UNUSED (name),
3644 int ARG_UNUSED (flags))
3646 struct cl_target_option cur_target;
3648 tree old_optimize = build_optimization_node ();
3649 tree new_target, new_optimize;
3650 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3652 /* If the function changed the optimization levels as well as setting target
3653 options, start with the optimizations specified. */
3654 if (func_optimize && func_optimize != old_optimize)
3655 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3657 /* The target attributes may also change some optimization flags, so update
3658 the optimization options if necessary. */
3659 cl_target_option_save (&cur_target);
3660 new_target = ix86_valid_target_attribute_tree (args);
3661 new_optimize = build_optimization_node ();
3668 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3670 if (old_optimize != new_optimize)
3671 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3674 cl_target_option_restore (&cur_target);
3676 if (old_optimize != new_optimize)
3677 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3683 /* Hook to determine if one function can safely inline another. */
3686 ix86_can_inline_p (tree caller, tree callee)
3689 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3690 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3692 /* If callee has no option attributes, then it is ok to inline. */
3696 /* If caller has no option attributes, but callee does then it is not ok to
3698 else if (!caller_tree)
3703 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3704 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3706 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3707 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3709 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3710 != callee_opts->ix86_isa_flags)
3713 /* See if we have the same non-isa options. */
3714 else if (caller_opts->target_flags != callee_opts->target_flags)
3717 /* See if arch, tune, etc. are the same. */
3718 else if (caller_opts->arch != callee_opts->arch)
3721 else if (caller_opts->tune != callee_opts->tune)
3724 else if (caller_opts->fpmath != callee_opts->fpmath)
3727 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3738 /* Remember the last target of ix86_set_current_function. */
3739 static GTY(()) tree ix86_previous_fndecl;
3741 /* Establish appropriate back-end context for processing the function
3742 FNDECL. The argument might be NULL to indicate processing at top
3743 level, outside of any function scope. */
3745 ix86_set_current_function (tree fndecl)
3747 /* Only change the context if the function changes. This hook is called
3748 several times in the course of compiling a function, and we don't want to
3749 slow things down too much or call target_reinit when it isn't safe. */
3750 if (fndecl && fndecl != ix86_previous_fndecl)
3752 tree old_tree = (ix86_previous_fndecl
3753 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3756 tree new_tree = (fndecl
3757 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3760 ix86_previous_fndecl = fndecl;
3761 if (old_tree == new_tree)
3766 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3772 struct cl_target_option *def
3773 = TREE_TARGET_OPTION (target_option_current_node);
3775 cl_target_option_restore (def);
3782 /* Return true if this goes in large data/bss. */
3785 ix86_in_large_data_p (tree exp)
3787 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3790 /* Functions are never large data. */
3791 if (TREE_CODE (exp) == FUNCTION_DECL)
3794 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3796 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3797 if (strcmp (section, ".ldata") == 0
3798 || strcmp (section, ".lbss") == 0)
3804 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3806 /* If this is an incomplete type with size 0, then we can't put it
3807 in data because it might be too big when completed. */
3808 if (!size || size > ix86_section_threshold)
3815 /* Switch to the appropriate section for output of DECL.
3816 DECL is either a `VAR_DECL' node or a constant of some sort.
3817 RELOC indicates whether forming the initial value of DECL requires
3818 link-time relocations. */
3820 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3824 x86_64_elf_select_section (tree decl, int reloc,
3825 unsigned HOST_WIDE_INT align)
3827 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3828 && ix86_in_large_data_p (decl))
3830 const char *sname = NULL;
3831 unsigned int flags = SECTION_WRITE;
3832 switch (categorize_decl_for_section (decl, reloc))
3837 case SECCAT_DATA_REL:
3838 sname = ".ldata.rel";
3840 case SECCAT_DATA_REL_LOCAL:
3841 sname = ".ldata.rel.local";
3843 case SECCAT_DATA_REL_RO:
3844 sname = ".ldata.rel.ro";
3846 case SECCAT_DATA_REL_RO_LOCAL:
3847 sname = ".ldata.rel.ro.local";
3851 flags |= SECTION_BSS;
3854 case SECCAT_RODATA_MERGE_STR:
3855 case SECCAT_RODATA_MERGE_STR_INIT:
3856 case SECCAT_RODATA_MERGE_CONST:
3860 case SECCAT_SRODATA:
3867 /* We don't split these for medium model. Place them into
3868 default sections and hope for best. */
3870 case SECCAT_EMUTLS_VAR:
3871 case SECCAT_EMUTLS_TMPL:
3876 /* We might get called with string constants, but get_named_section
3877 doesn't like them as they are not DECLs. Also, we need to set
3878 flags in that case. */
3880 return get_section (sname, flags, NULL);
3881 return get_named_section (decl, sname, reloc);
3884 return default_elf_select_section (decl, reloc, align);
3887 /* Build up a unique section name, expressed as a
3888 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3889 RELOC indicates whether the initial value of EXP requires
3890 link-time relocations. */
3892 static void ATTRIBUTE_UNUSED
3893 x86_64_elf_unique_section (tree decl, int reloc)
3895 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3896 && ix86_in_large_data_p (decl))
3898 const char *prefix = NULL;
3899 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3900 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3902 switch (categorize_decl_for_section (decl, reloc))
3905 case SECCAT_DATA_REL:
3906 case SECCAT_DATA_REL_LOCAL:
3907 case SECCAT_DATA_REL_RO:
3908 case SECCAT_DATA_REL_RO_LOCAL:
3909 prefix = one_only ? ".ld" : ".ldata";
3912 prefix = one_only ? ".lb" : ".lbss";
3915 case SECCAT_RODATA_MERGE_STR:
3916 case SECCAT_RODATA_MERGE_STR_INIT:
3917 case SECCAT_RODATA_MERGE_CONST:
3918 prefix = one_only ? ".lr" : ".lrodata";
3920 case SECCAT_SRODATA:
3927 /* We don't split these for medium model. Place them into
3928 default sections and hope for best. */
3930 case SECCAT_EMUTLS_VAR:
3931 prefix = targetm.emutls.var_section;
3933 case SECCAT_EMUTLS_TMPL:
3934 prefix = targetm.emutls.tmpl_section;
3939 const char *name, *linkonce;
3942 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3943 name = targetm.strip_name_encoding (name);
3945 /* If we're using one_only, then there needs to be a .gnu.linkonce
3946 prefix to the section name. */
3947 linkonce = one_only ? ".gnu.linkonce" : "";
3949 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3951 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3955 default_unique_section (decl, reloc);
3958 #ifdef COMMON_ASM_OP
3959 /* This says how to output assembler code to declare an
3960 uninitialized external linkage data object.
3962 For medium model x86-64 we need to use .largecomm opcode for
3965 x86_elf_aligned_common (FILE *file,
3966 const char *name, unsigned HOST_WIDE_INT size,
3969 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3970 && size > (unsigned int)ix86_section_threshold)
3971 fprintf (file, ".largecomm\t");
3973 fprintf (file, "%s", COMMON_ASM_OP);
3974 assemble_name (file, name);
3975 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3976 size, align / BITS_PER_UNIT);
3980 /* Utility function for targets to use in implementing
3981 ASM_OUTPUT_ALIGNED_BSS. */
3984 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3985 const char *name, unsigned HOST_WIDE_INT size,
3988 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3989 && size > (unsigned int)ix86_section_threshold)
3990 switch_to_section (get_named_section (decl, ".lbss", 0));
3992 switch_to_section (bss_section);
3993 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
3994 #ifdef ASM_DECLARE_OBJECT_NAME
3995 last_assemble_variable_decl = decl;
3996 ASM_DECLARE_OBJECT_NAME (file, name, decl);
3998 /* Standard thing is just output label for the object. */
3999 ASM_OUTPUT_LABEL (file, name);
4000 #endif /* ASM_DECLARE_OBJECT_NAME */
4001 ASM_OUTPUT_SKIP (file, size ? size : 1);
4005 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4007 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4008 make the problem with not enough registers even worse. */
4009 #ifdef INSN_SCHEDULING
4011 flag_schedule_insns = 0;
4015 /* The Darwin libraries never set errno, so we might as well
4016 avoid calling them when that's the only reason we would. */
4017 flag_errno_math = 0;
4019 /* The default values of these switches depend on the TARGET_64BIT
4020 that is not known at this moment. Mark these values with 2 and
4021 let user the to override these. In case there is no command line option
4022 specifying them, we will set the defaults in override_options. */
4024 flag_omit_frame_pointer = 2;
4025 flag_pcc_struct_return = 2;
4026 flag_asynchronous_unwind_tables = 2;
4027 flag_vect_cost_model = 1;
4028 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4029 SUBTARGET_OPTIMIZATION_OPTIONS;
4033 /* Decide whether we can make a sibling call to a function. DECL is the
4034 declaration of the function being targeted by the call and EXP is the
4035 CALL_EXPR representing the call. */
4038 ix86_function_ok_for_sibcall (tree decl, tree exp)
4043 /* If we are generating position-independent code, we cannot sibcall
4044 optimize any indirect call, or a direct call to a global function,
4045 as the PLT requires %ebx be live. */
4046 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4053 func = TREE_TYPE (CALL_EXPR_FN (exp));
4054 if (POINTER_TYPE_P (func))
4055 func = TREE_TYPE (func);
4058 /* Check that the return value locations are the same. Like
4059 if we are returning floats on the 80387 register stack, we cannot
4060 make a sibcall from a function that doesn't return a float to a
4061 function that does or, conversely, from a function that does return
4062 a float to a function that doesn't; the necessary stack adjustment
4063 would not be executed. This is also the place we notice
4064 differences in the return value ABI. Note that it is ok for one
4065 of the functions to have void return type as long as the return
4066 value of the other is passed in a register. */
4067 a = ix86_function_value (TREE_TYPE (exp), func, false);
4068 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4070 if (STACK_REG_P (a) || STACK_REG_P (b))
4072 if (!rtx_equal_p (a, b))
4075 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4077 else if (!rtx_equal_p (a, b))
4080 /* If this call is indirect, we'll need to be able to use a call-clobbered
4081 register for the address of the target function. Make sure that all
4082 such registers are not used for passing parameters. */
4083 if (!decl && !TARGET_64BIT)
4087 /* We're looking at the CALL_EXPR, we need the type of the function. */
4088 type = CALL_EXPR_FN (exp); /* pointer expression */
4089 type = TREE_TYPE (type); /* pointer type */
4090 type = TREE_TYPE (type); /* function type */
4092 if (ix86_function_regparm (type, NULL) >= 3)
4094 /* ??? Need to count the actual number of registers to be used,
4095 not the possible number of registers. Fix later. */
4100 /* Dllimport'd functions are also called indirectly. */
4101 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4102 && decl && DECL_DLLIMPORT_P (decl)
4103 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4106 /* Otherwise okay. That also includes certain types of indirect calls. */
4110 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4111 calling convention attributes;
4112 arguments as in struct attribute_spec.handler. */
4115 ix86_handle_cconv_attribute (tree *node, tree name,
4117 int flags ATTRIBUTE_UNUSED,
4120 if (TREE_CODE (*node) != FUNCTION_TYPE
4121 && TREE_CODE (*node) != METHOD_TYPE
4122 && TREE_CODE (*node) != FIELD_DECL
4123 && TREE_CODE (*node) != TYPE_DECL)
4125 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4126 IDENTIFIER_POINTER (name));
4127 *no_add_attrs = true;
4131 /* Can combine regparm with all attributes but fastcall. */
4132 if (is_attribute_p ("regparm", name))
4136 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4138 error ("fastcall and regparm attributes are not compatible");
4141 cst = TREE_VALUE (args);
4142 if (TREE_CODE (cst) != INTEGER_CST)
4144 warning (OPT_Wattributes,
4145 "%qs attribute requires an integer constant argument",
4146 IDENTIFIER_POINTER (name));
4147 *no_add_attrs = true;
4149 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4151 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4152 IDENTIFIER_POINTER (name), REGPARM_MAX);
4153 *no_add_attrs = true;
4161 /* Do not warn when emulating the MS ABI. */
4162 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4163 warning (OPT_Wattributes, "%qs attribute ignored",
4164 IDENTIFIER_POINTER (name));
4165 *no_add_attrs = true;
4169 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4170 if (is_attribute_p ("fastcall", name))
4172 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4174 error ("fastcall and cdecl attributes are not compatible");
4176 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4178 error ("fastcall and stdcall attributes are not compatible");
4180 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4182 error ("fastcall and regparm attributes are not compatible");
4186 /* Can combine stdcall with fastcall (redundant), regparm and
4188 else if (is_attribute_p ("stdcall", name))
4190 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4192 error ("stdcall and cdecl attributes are not compatible");
4194 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4196 error ("stdcall and fastcall attributes are not compatible");
4200 /* Can combine cdecl with regparm and sseregparm. */
4201 else if (is_attribute_p ("cdecl", name))
4203 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4205 error ("stdcall and cdecl attributes are not compatible");
4207 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4209 error ("fastcall and cdecl attributes are not compatible");
4213 /* Can combine sseregparm with all attributes. */
4218 /* Return 0 if the attributes for two types are incompatible, 1 if they
4219 are compatible, and 2 if they are nearly compatible (which causes a
4220 warning to be generated). */
4223 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4225 /* Check for mismatch of non-default calling convention. */
4226 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4228 if (TREE_CODE (type1) != FUNCTION_TYPE
4229 && TREE_CODE (type1) != METHOD_TYPE)
4232 /* Check for mismatched fastcall/regparm types. */
4233 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4234 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4235 || (ix86_function_regparm (type1, NULL)
4236 != ix86_function_regparm (type2, NULL)))
4239 /* Check for mismatched sseregparm types. */
4240 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4241 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4244 /* Check for mismatched return types (cdecl vs stdcall). */
4245 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4246 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4252 /* Return the regparm value for a function with the indicated TYPE and DECL.
4253 DECL may be NULL when calling function indirectly
4254 or considering a libcall. */
4257 ix86_function_regparm (const_tree type, const_tree decl)
4260 int regparm = ix86_regparm;
4262 static bool error_issued;
4266 if (ix86_function_type_abi (type) == DEFAULT_ABI)
4268 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
4271 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4275 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4277 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4279 /* We can't use regparm(3) for nested functions because
4280 these pass static chain pointer in %ecx register. */
4281 if (!error_issued && regparm == 3
4282 && decl_function_context (decl)
4283 && !DECL_NO_STATIC_CHAIN (decl))
4285 error ("nested functions are limited to 2 register parameters");
4286 error_issued = true;
4294 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4297 /* Use register calling convention for local functions when possible. */
4298 if (decl && TREE_CODE (decl) == FUNCTION_DECL
4301 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4302 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4305 int local_regparm, globals = 0, regno;
4308 /* Make sure no regparm register is taken by a
4309 fixed register variable. */
4310 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4311 if (fixed_regs[local_regparm])
4314 /* We can't use regparm(3) for nested functions as these use
4315 static chain pointer in third argument. */
4316 if (local_regparm == 3
4317 && decl_function_context (decl)
4318 && !DECL_NO_STATIC_CHAIN (decl))
4321 /* If the function realigns its stackpointer, the prologue will
4322 clobber %ecx. If we've already generated code for the callee,
4323 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4324 scanning the attributes for the self-realigning property. */
4325 f = DECL_STRUCT_FUNCTION (decl);
4326 /* Since current internal arg pointer won't conflict with
4327 parameter passing regs, so no need to change stack
4328 realignment and adjust regparm number.
4330 Each fixed register usage increases register pressure,
4331 so less registers should be used for argument passing.
4332 This functionality can be overriden by an explicit
4334 for (regno = 0; regno <= DI_REG; regno++)
4335 if (fixed_regs[regno])
4339 = globals < local_regparm ? local_regparm - globals : 0;
4341 if (local_regparm > regparm)
4342 regparm = local_regparm;
4349 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4350 DFmode (2) arguments in SSE registers for a function with the
4351 indicated TYPE and DECL. DECL may be NULL when calling function
4352 indirectly or considering a libcall. Otherwise return 0. */
4355 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4357 gcc_assert (!TARGET_64BIT);
4359 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4360 by the sseregparm attribute. */
4361 if (TARGET_SSEREGPARM
4362 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4369 error ("Calling %qD with attribute sseregparm without "
4370 "SSE/SSE2 enabled", decl);
4372 error ("Calling %qT with attribute sseregparm without "
4373 "SSE/SSE2 enabled", type);
4381 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4382 (and DFmode for SSE2) arguments in SSE registers. */
4383 if (decl && TARGET_SSE_MATH && !profile_flag)
4385 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4386 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4388 return TARGET_SSE2 ? 2 : 1;
4394 /* Return true if EAX is live at the start of the function. Used by
4395 ix86_expand_prologue to determine if we need special help before
4396 calling allocate_stack_worker. */
4399 ix86_eax_live_at_start_p (void)
4401 /* Cheat. Don't bother working forward from ix86_function_regparm
4402 to the function type to whether an actual argument is located in
4403 eax. Instead just look at cfg info, which is still close enough
4404 to correct at this point. This gives false positives for broken
4405 functions that might use uninitialized data that happens to be
4406 allocated in eax, but who cares? */
4407 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4410 /* Value is the number of bytes of arguments automatically
4411 popped when returning from a subroutine call.
4412 FUNDECL is the declaration node of the function (as a tree),
4413 FUNTYPE is the data type of the function (as a tree),
4414 or for a library call it is an identifier node for the subroutine name.
4415 SIZE is the number of bytes of arguments passed on the stack.
4417 On the 80386, the RTD insn may be used to pop them if the number
4418 of args is fixed, but if the number is variable then the caller
4419 must pop them all. RTD can't be used for library calls now
4420 because the library is compiled with the Unix compiler.
4421 Use of RTD is a selectable option, since it is incompatible with
4422 standard Unix calling sequences. If the option is not selected,
4423 the caller must always pop the args.
4425 The attribute stdcall is equivalent to RTD on a per module basis. */
4428 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4432 /* None of the 64-bit ABIs pop arguments. */
4436 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4438 /* Cdecl functions override -mrtd, and never pop the stack. */
4439 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4441 /* Stdcall and fastcall functions will pop the stack if not
4443 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4444 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4447 if (rtd && ! stdarg_p (funtype))
4451 /* Lose any fake structure return argument if it is passed on the stack. */
4452 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4453 && !KEEP_AGGREGATE_RETURN_POINTER)
4455 int nregs = ix86_function_regparm (funtype, fundecl);
4457 return GET_MODE_SIZE (Pmode);
4463 /* Argument support functions. */
4465 /* Return true when register may be used to pass function parameters. */
4467 ix86_function_arg_regno_p (int regno)
4470 const int *parm_regs;
4475 return (regno < REGPARM_MAX
4476 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4478 return (regno < REGPARM_MAX
4479 || (TARGET_MMX && MMX_REGNO_P (regno)
4480 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4481 || (TARGET_SSE && SSE_REGNO_P (regno)
4482 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4487 if (SSE_REGNO_P (regno) && TARGET_SSE)
4492 if (TARGET_SSE && SSE_REGNO_P (regno)
4493 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4497 /* TODO: The function should depend on current function ABI but
4498 builtins.c would need updating then. Therefore we use the
4501 /* RAX is used as hidden argument to va_arg functions. */
4502 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4505 if (DEFAULT_ABI == MS_ABI)
4506 parm_regs = x86_64_ms_abi_int_parameter_registers;
4508 parm_regs = x86_64_int_parameter_registers;
4509 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4510 : X86_64_REGPARM_MAX); i++)
4511 if (regno == parm_regs[i])
4516 /* Return if we do not know how to pass TYPE solely in registers. */
4519 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4521 if (must_pass_in_stack_var_size_or_pad (mode, type))
4524 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4525 The layout_type routine is crafty and tries to trick us into passing
4526 currently unsupported vector types on the stack by using TImode. */
4527 return (!TARGET_64BIT && mode == TImode
4528 && type && TREE_CODE (type) != VECTOR_TYPE);
4531 /* It returns the size, in bytes, of the area reserved for arguments passed
4532 in registers for the function represented by fndecl dependent to the used
4535 ix86_reg_parm_stack_space (const_tree fndecl)
4538 /* For libcalls it is possible that there is no fndecl at hand.
4539 Therefore assume for this case the default abi of the target. */
4541 call_abi = DEFAULT_ABI;
4543 call_abi = ix86_function_abi (fndecl);
4549 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4552 ix86_function_type_abi (const_tree fntype)
4554 if (TARGET_64BIT && fntype != NULL)
4557 if (DEFAULT_ABI == SYSV_ABI)
4558 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4560 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4568 ix86_function_abi (const_tree fndecl)
4572 return ix86_function_type_abi (TREE_TYPE (fndecl));
4575 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4578 ix86_cfun_abi (void)
4580 if (! cfun || ! TARGET_64BIT)
4582 return cfun->machine->call_abi;
4586 extern void init_regs (void);
4588 /* Implementation of call abi switching target hook. Specific to FNDECL
4589 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4591 To prevent redudant calls of costy function init_regs (), it checks not to
4592 reset register usage for default abi. */
4594 ix86_call_abi_override (const_tree fndecl)
4596 if (fndecl == NULL_TREE)
4597 cfun->machine->call_abi = DEFAULT_ABI;
4599 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4600 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
4602 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
4604 call_used_regs[4 /*RSI*/] = 0;
4605 call_used_regs[5 /*RDI*/] = 0;
4609 else if (TARGET_64BIT)
4611 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
4613 call_used_regs[4 /*RSI*/] = 1;
4614 call_used_regs[5 /*RDI*/] = 1;
4620 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4621 for a call to a function whose data type is FNTYPE.
4622 For a library call, FNTYPE is 0. */
4625 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4626 tree fntype, /* tree ptr for function decl */
4627 rtx libname, /* SYMBOL_REF of library name or 0 */
4630 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4631 memset (cum, 0, sizeof (*cum));
4633 cum->call_abi = ix86_function_type_abi (fntype);
4634 /* Set up the number of registers to use for passing arguments. */
4635 cum->nregs = ix86_regparm;
4638 if (cum->call_abi != DEFAULT_ABI)
4639 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4644 cum->sse_nregs = SSE_REGPARM_MAX;
4647 if (cum->call_abi != DEFAULT_ABI)
4648 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4649 : X64_SSE_REGPARM_MAX;
4653 cum->mmx_nregs = MMX_REGPARM_MAX;
4654 cum->warn_avx = true;
4655 cum->warn_sse = true;
4656 cum->warn_mmx = true;
4658 /* Because type might mismatch in between caller and callee, we need to
4659 use actual type of function for local calls.
4660 FIXME: cgraph_analyze can be told to actually record if function uses
4661 va_start so for local functions maybe_vaarg can be made aggressive
4663 FIXME: once typesytem is fixed, we won't need this code anymore. */
4665 fntype = TREE_TYPE (fndecl);
4666 cum->maybe_vaarg = (fntype
4667 ? (!prototype_p (fntype) || stdarg_p (fntype))
4672 /* If there are variable arguments, then we won't pass anything
4673 in registers in 32-bit mode. */
4674 if (stdarg_p (fntype))
4685 /* Use ecx and edx registers if function has fastcall attribute,
4686 else look for regparm information. */
4689 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4695 cum->nregs = ix86_function_regparm (fntype, fndecl);
4698 /* Set up the number of SSE registers used for passing SFmode
4699 and DFmode arguments. Warn for mismatching ABI. */
4700 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4704 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4705 But in the case of vector types, it is some vector mode.
4707 When we have only some of our vector isa extensions enabled, then there
4708 are some modes for which vector_mode_supported_p is false. For these
4709 modes, the generic vector support in gcc will choose some non-vector mode
4710 in order to implement the type. By computing the natural mode, we'll
4711 select the proper ABI location for the operand and not depend on whatever
4712 the middle-end decides to do with these vector types. */
4714 static enum machine_mode
4715 type_natural_mode (const_tree type)
4717 enum machine_mode mode = TYPE_MODE (type);
4719 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4721 HOST_WIDE_INT size = int_size_in_bytes (type);
4722 if ((size == 8 || size == 16)
4723 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4724 && TYPE_VECTOR_SUBPARTS (type) > 1)
4726 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4728 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4729 mode = MIN_MODE_VECTOR_FLOAT;
4731 mode = MIN_MODE_VECTOR_INT;
4733 /* Get the mode which has this inner mode and number of units. */
4734 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4735 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4736 && GET_MODE_INNER (mode) == innermode)
4746 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4747 this may not agree with the mode that the type system has chosen for the
4748 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4749 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4752 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4757 if (orig_mode != BLKmode)
4758 tmp = gen_rtx_REG (orig_mode, regno);
4761 tmp = gen_rtx_REG (mode, regno);
4762 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4763 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4769 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4770 of this code is to classify each 8bytes of incoming argument by the register
4771 class and assign registers accordingly. */
4773 /* Return the union class of CLASS1 and CLASS2.
4774 See the x86-64 PS ABI for details. */
4776 static enum x86_64_reg_class
4777 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4779 /* Rule #1: If both classes are equal, this is the resulting class. */
4780 if (class1 == class2)
4783 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4785 if (class1 == X86_64_NO_CLASS)
4787 if (class2 == X86_64_NO_CLASS)
4790 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4791 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4792 return X86_64_MEMORY_CLASS;
4794 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4795 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4796 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4797 return X86_64_INTEGERSI_CLASS;
4798 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4799 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4800 return X86_64_INTEGER_CLASS;
4802 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4804 if (class1 == X86_64_X87_CLASS
4805 || class1 == X86_64_X87UP_CLASS
4806 || class1 == X86_64_COMPLEX_X87_CLASS
4807 || class2 == X86_64_X87_CLASS
4808 || class2 == X86_64_X87UP_CLASS
4809 || class2 == X86_64_COMPLEX_X87_CLASS)
4810 return X86_64_MEMORY_CLASS;
4812 /* Rule #6: Otherwise class SSE is used. */
4813 return X86_64_SSE_CLASS;
4816 /* Classify the argument of type TYPE and mode MODE.
4817 CLASSES will be filled by the register class used to pass each word
4818 of the operand. The number of words is returned. In case the parameter
4819 should be passed in memory, 0 is returned. As a special case for zero
4820 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4822 BIT_OFFSET is used internally for handling records and specifies offset
4823 of the offset in bits modulo 256 to avoid overflow cases.
4825 See the x86-64 PS ABI for details.
4829 classify_argument (enum machine_mode mode, const_tree type,
4830 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4832 HOST_WIDE_INT bytes =
4833 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4834 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4836 /* Variable sized entities are always passed/returned in memory. */
4840 if (mode != VOIDmode
4841 && targetm.calls.must_pass_in_stack (mode, type))
4844 if (type && AGGREGATE_TYPE_P (type))
4848 enum x86_64_reg_class subclasses[MAX_CLASSES];
4850 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
4854 for (i = 0; i < words; i++)
4855 classes[i] = X86_64_NO_CLASS;
4857 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4858 signalize memory class, so handle it as special case. */
4861 classes[0] = X86_64_NO_CLASS;
4865 /* Classify each field of record and merge classes. */
4866 switch (TREE_CODE (type))
4869 /* And now merge the fields of structure. */
4870 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4872 if (TREE_CODE (field) == FIELD_DECL)
4876 if (TREE_TYPE (field) == error_mark_node)
4879 /* Bitfields are always classified as integer. Handle them
4880 early, since later code would consider them to be
4881 misaligned integers. */
4882 if (DECL_BIT_FIELD (field))
4884 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4885 i < ((int_bit_position (field) + (bit_offset % 64))
4886 + tree_low_cst (DECL_SIZE (field), 0)
4889 merge_classes (X86_64_INTEGER_CLASS,
4894 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4895 TREE_TYPE (field), subclasses,
4896 (int_bit_position (field)
4897 + bit_offset) % 256);
4900 for (i = 0; i < num; i++)
4903 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4905 merge_classes (subclasses[i], classes[i + pos]);
4913 /* Arrays are handled as small records. */
4916 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4917 TREE_TYPE (type), subclasses, bit_offset);
4921 /* The partial classes are now full classes. */
4922 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4923 subclasses[0] = X86_64_SSE_CLASS;
4924 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
4925 subclasses[0] = X86_64_INTEGER_CLASS;
4927 for (i = 0; i < words; i++)
4928 classes[i] = subclasses[i % num];
4933 case QUAL_UNION_TYPE:
4934 /* Unions are similar to RECORD_TYPE but offset is always 0.
4936 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4938 if (TREE_CODE (field) == FIELD_DECL)
4942 if (TREE_TYPE (field) == error_mark_node)
4945 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4946 TREE_TYPE (field), subclasses,
4950 for (i = 0; i < num; i++)
4951 classes[i] = merge_classes (subclasses[i], classes[i]);
4960 /* Final merger cleanup. */
4961 for (i = 0; i < words; i++)
4963 /* If one class is MEMORY, everything should be passed in
4965 if (classes[i] == X86_64_MEMORY_CLASS)
4968 /* The X86_64_SSEUP_CLASS should be always preceded by
4969 X86_64_SSE_CLASS. */
4970 if (classes[i] == X86_64_SSEUP_CLASS
4971 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4972 classes[i] = X86_64_SSE_CLASS;
4974 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4975 if (classes[i] == X86_64_X87UP_CLASS
4976 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4977 classes[i] = X86_64_SSE_CLASS;
4982 /* Compute alignment needed. We align all types to natural boundaries with
4983 exception of XFmode that is aligned to 64bits. */
4984 if (mode != VOIDmode && mode != BLKmode)
4986 int mode_alignment = GET_MODE_BITSIZE (mode);
4989 mode_alignment = 128;
4990 else if (mode == XCmode)
4991 mode_alignment = 256;
4992 if (COMPLEX_MODE_P (mode))
4993 mode_alignment /= 2;
4994 /* Misaligned fields are always returned in memory. */
4995 if (bit_offset % mode_alignment)
4999 /* for V1xx modes, just use the base mode */
5000 if (VECTOR_MODE_P (mode) && mode != V1DImode
5001 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5002 mode = GET_MODE_INNER (mode);
5004 /* Classification of atomic types. */
5009 classes[0] = X86_64_SSE_CLASS;
5012 classes[0] = X86_64_SSE_CLASS;
5013 classes[1] = X86_64_SSEUP_CLASS;
5022 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5023 classes[0] = X86_64_INTEGERSI_CLASS;
5025 classes[0] = X86_64_INTEGER_CLASS;
5029 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5036 if (!(bit_offset % 64))
5037 classes[0] = X86_64_SSESF_CLASS;
5039 classes[0] = X86_64_SSE_CLASS;
5042 classes[0] = X86_64_SSEDF_CLASS;
5045 classes[0] = X86_64_X87_CLASS;
5046 classes[1] = X86_64_X87UP_CLASS;
5049 classes[0] = X86_64_SSE_CLASS;
5050 classes[1] = X86_64_SSEUP_CLASS;
5053 classes[0] = X86_64_SSE_CLASS;
5056 classes[0] = X86_64_SSEDF_CLASS;
5057 classes[1] = X86_64_SSEDF_CLASS;
5060 classes[0] = X86_64_COMPLEX_X87_CLASS;
5063 /* This modes is larger than 16 bytes. */
5071 classes[0] = X86_64_AVX_CLASS;
5079 classes[0] = X86_64_SSE_CLASS;
5080 classes[1] = X86_64_SSEUP_CLASS;
5087 classes[0] = X86_64_SSE_CLASS;
5093 gcc_assert (VECTOR_MODE_P (mode));
5098 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5100 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5101 classes[0] = X86_64_INTEGERSI_CLASS;
5103 classes[0] = X86_64_INTEGER_CLASS;
5104 classes[1] = X86_64_INTEGER_CLASS;
5105 return 1 + (bytes > 8);
5109 /* Examine the argument and return set number of register required in each
5110 class. Return 0 iff parameter should be passed in memory. */
5112 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5113 int *int_nregs, int *sse_nregs)
5115 enum x86_64_reg_class regclass[MAX_CLASSES];
5116 int n = classify_argument (mode, type, regclass, 0);
5122 for (n--; n >= 0; n--)
5123 switch (regclass[n])
5125 case X86_64_INTEGER_CLASS:
5126 case X86_64_INTEGERSI_CLASS:
5129 case X86_64_AVX_CLASS:
5130 case X86_64_SSE_CLASS:
5131 case X86_64_SSESF_CLASS:
5132 case X86_64_SSEDF_CLASS:
5135 case X86_64_NO_CLASS:
5136 case X86_64_SSEUP_CLASS:
5138 case X86_64_X87_CLASS:
5139 case X86_64_X87UP_CLASS:
5143 case X86_64_COMPLEX_X87_CLASS:
5144 return in_return ? 2 : 0;
5145 case X86_64_MEMORY_CLASS:
5151 /* Construct container for the argument used by GCC interface. See
5152 FUNCTION_ARG for the detailed description. */
5155 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5156 const_tree type, int in_return, int nintregs, int nsseregs,
5157 const int *intreg, int sse_regno)
5159 /* The following variables hold the static issued_error state. */
5160 static bool issued_sse_arg_error;
5161 static bool issued_sse_ret_error;
5162 static bool issued_x87_ret_error;
5164 enum machine_mode tmpmode;
5166 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5167 enum x86_64_reg_class regclass[MAX_CLASSES];
5171 int needed_sseregs, needed_intregs;
5172 rtx exp[MAX_CLASSES];
5175 n = classify_argument (mode, type, regclass, 0);
5178 if (!examine_argument (mode, type, in_return, &needed_intregs,
5181 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5184 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5185 some less clueful developer tries to use floating-point anyway. */
5186 if (needed_sseregs && !TARGET_SSE)
5190 if (!issued_sse_ret_error)
5192 error ("SSE register return with SSE disabled");
5193 issued_sse_ret_error = true;
5196 else if (!issued_sse_arg_error)
5198 error ("SSE register argument with SSE disabled");
5199 issued_sse_arg_error = true;
5204 /* Likewise, error if the ABI requires us to return values in the
5205 x87 registers and the user specified -mno-80387. */
5206 if (!TARGET_80387 && in_return)
5207 for (i = 0; i < n; i++)
5208 if (regclass[i] == X86_64_X87_CLASS
5209 || regclass[i] == X86_64_X87UP_CLASS
5210 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5212 if (!issued_x87_ret_error)
5214 error ("x87 register return with x87 disabled");
5215 issued_x87_ret_error = true;
5220 /* First construct simple cases. Avoid SCmode, since we want to use
5221 single register to pass this type. */
5222 if (n == 1 && mode != SCmode)
5223 switch (regclass[0])
5225 case X86_64_INTEGER_CLASS:
5226 case X86_64_INTEGERSI_CLASS:
5227 return gen_rtx_REG (mode, intreg[0]);
5228 case X86_64_AVX_CLASS:
5229 case X86_64_SSE_CLASS:
5230 case X86_64_SSESF_CLASS:
5231 case X86_64_SSEDF_CLASS:
5232 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5233 case X86_64_X87_CLASS:
5234 case X86_64_COMPLEX_X87_CLASS:
5235 return gen_rtx_REG (mode, FIRST_STACK_REG);
5236 case X86_64_NO_CLASS:
5237 /* Zero sized array, struct or class. */
5242 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5243 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5244 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5247 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5248 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5249 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5250 && regclass[1] == X86_64_INTEGER_CLASS
5251 && (mode == CDImode || mode == TImode || mode == TFmode)
5252 && intreg[0] + 1 == intreg[1])
5253 return gen_rtx_REG (mode, intreg[0]);
5255 /* Otherwise figure out the entries of the PARALLEL. */
5256 for (i = 0; i < n; i++)
5258 switch (regclass[i])
5260 case X86_64_NO_CLASS:
5262 case X86_64_INTEGER_CLASS:
5263 case X86_64_INTEGERSI_CLASS:
5264 /* Merge TImodes on aligned occasions here too. */
5265 if (i * 8 + 8 > bytes)
5266 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5267 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5271 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5272 if (tmpmode == BLKmode)
5274 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5275 gen_rtx_REG (tmpmode, *intreg),
5279 case X86_64_SSESF_CLASS:
5280 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5281 gen_rtx_REG (SFmode,
5282 SSE_REGNO (sse_regno)),
5286 case X86_64_SSEDF_CLASS:
5287 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5288 gen_rtx_REG (DFmode,
5289 SSE_REGNO (sse_regno)),
5293 case X86_64_SSE_CLASS:
5294 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
5298 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5299 gen_rtx_REG (tmpmode,
5300 SSE_REGNO (sse_regno)),
5302 if (tmpmode == TImode)
5311 /* Empty aligned struct, union or class. */
5315 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5316 for (i = 0; i < nexps; i++)
5317 XVECEXP (ret, 0, i) = exp [i];
5321 /* Update the data in CUM to advance over an argument of mode MODE
5322 and data type TYPE. (TYPE is null for libcalls where that information
5323 may not be available.) */
5326 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5327 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5343 cum->words += words;
5344 cum->nregs -= words;
5345 cum->regno += words;
5347 if (cum->nregs <= 0)
5355 if (cum->float_in_sse < 2)
5358 if (cum->float_in_sse < 1)
5376 if (!type || !AGGREGATE_TYPE_P (type))
5378 cum->sse_words += words;
5379 cum->sse_nregs -= 1;
5380 cum->sse_regno += 1;
5381 if (cum->sse_nregs <= 0)
5394 if (!type || !AGGREGATE_TYPE_P (type))
5396 cum->mmx_words += words;
5397 cum->mmx_nregs -= 1;
5398 cum->mmx_regno += 1;
5399 if (cum->mmx_nregs <= 0)
5410 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5411 tree type, HOST_WIDE_INT words, int named)
5413 int int_nregs, sse_nregs;
5415 /* Unnamed 256bit vector mode parameters are passed on stack. */
5416 if (!named && VALID_AVX256_REG_MODE (mode))
5419 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5420 cum->words += words;
5421 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5423 cum->nregs -= int_nregs;
5424 cum->sse_nregs -= sse_nregs;
5425 cum->regno += int_nregs;
5426 cum->sse_regno += sse_nregs;
5429 cum->words += words;
5433 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5434 HOST_WIDE_INT words)
5436 /* Otherwise, this should be passed indirect. */
5437 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5439 cum->words += words;
5448 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5449 tree type, int named)
5451 HOST_WIDE_INT bytes, words;
5453 if (mode == BLKmode)
5454 bytes = int_size_in_bytes (type);
5456 bytes = GET_MODE_SIZE (mode);
5457 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5460 mode = type_natural_mode (type);
5462 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5463 function_arg_advance_ms_64 (cum, bytes, words);
5464 else if (TARGET_64BIT)
5465 function_arg_advance_64 (cum, mode, type, words, named);
5467 function_arg_advance_32 (cum, mode, type, bytes, words);
5470 /* Define where to put the arguments to a function.
5471 Value is zero to push the argument on the stack,
5472 or a hard register in which to store the argument.
5474 MODE is the argument's machine mode.
5475 TYPE is the data type of the argument (as a tree).
5476 This is null for libcalls where that information may
5478 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5479 the preceding args and about the function being called.
5480 NAMED is nonzero if this argument is a named parameter
5481 (otherwise it is an extra parameter matching an ellipsis). */
5484 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5485 enum machine_mode orig_mode, tree type,
5486 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5488 static bool warnedavx, warnedsse, warnedmmx;
5490 /* Avoid the AL settings for the Unix64 ABI. */
5491 if (mode == VOIDmode)
5507 if (words <= cum->nregs)
5509 int regno = cum->regno;
5511 /* Fastcall allocates the first two DWORD (SImode) or
5512 smaller arguments to ECX and EDX if it isn't an
5518 || (type && AGGREGATE_TYPE_P (type)))
5521 /* ECX not EAX is the first allocated register. */
5522 if (regno == AX_REG)
5525 return gen_rtx_REG (mode, regno);
5530 if (cum->float_in_sse < 2)
5533 if (cum->float_in_sse < 1)
5537 /* In 32bit, we pass TImode in xmm registers. */
5544 if (!type || !AGGREGATE_TYPE_P (type))
5546 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5549 warning (0, "SSE vector argument without SSE enabled "
5553 return gen_reg_or_parallel (mode, orig_mode,
5554 cum->sse_regno + FIRST_SSE_REG);
5559 /* In 32bit, we pass OImode in ymm registers. */
5566 if (!type || !AGGREGATE_TYPE_P (type))
5568 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5571 warning (0, "AVX vector argument without AVX enabled "
5575 return gen_reg_or_parallel (mode, orig_mode,
5576 cum->sse_regno + FIRST_SSE_REG);
5585 if (!type || !AGGREGATE_TYPE_P (type))
5587 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5590 warning (0, "MMX vector argument without MMX enabled "
5594 return gen_reg_or_parallel (mode, orig_mode,
5595 cum->mmx_regno + FIRST_MMX_REG);
5604 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5605 enum machine_mode orig_mode, tree type, int named)
5607 static bool warnedavx;
5609 /* Handle a hidden AL argument containing number of registers
5610 for varargs x86-64 functions. */
5611 if (mode == VOIDmode)
5612 return GEN_INT (cum->maybe_vaarg
5613 ? (cum->sse_nregs < 0
5614 ? (cum->call_abi == DEFAULT_ABI
5616 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5617 : X64_SSE_REGPARM_MAX))
5632 /* In 64bit, we pass TImode in interger registers and OImode on
5634 if (!type || !AGGREGATE_TYPE_P (type))
5636 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5639 warning (0, "AVX vector argument without AVX enabled "
5644 /* Unnamed 256bit vector mode parameters are passed on stack. */
5650 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5652 &x86_64_int_parameter_registers [cum->regno],
5657 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5658 enum machine_mode orig_mode, int named,
5659 HOST_WIDE_INT bytes)
5663 /* Avoid the AL settings for the Unix64 ABI. */
5664 if (mode == VOIDmode)
5667 /* If we've run out of registers, it goes on the stack. */
5668 if (cum->nregs == 0)
5671 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5673 /* Only floating point modes are passed in anything but integer regs. */
5674 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5677 regno = cum->regno + FIRST_SSE_REG;
5682 /* Unnamed floating parameters are passed in both the
5683 SSE and integer registers. */
5684 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5685 t2 = gen_rtx_REG (mode, regno);
5686 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5687 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5688 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5691 /* Handle aggregated types passed in register. */
5692 if (orig_mode == BLKmode)
5694 if (bytes > 0 && bytes <= 8)
5695 mode = (bytes > 4 ? DImode : SImode);
5696 if (mode == BLKmode)
5700 return gen_reg_or_parallel (mode, orig_mode, regno);
5704 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5705 tree type, int named)
5707 enum machine_mode mode = omode;
5708 HOST_WIDE_INT bytes, words;
5710 if (mode == BLKmode)
5711 bytes = int_size_in_bytes (type);
5713 bytes = GET_MODE_SIZE (mode);
5714 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5716 /* To simplify the code below, represent vector types with a vector mode
5717 even if MMX/SSE are not active. */
5718 if (type && TREE_CODE (type) == VECTOR_TYPE)
5719 mode = type_natural_mode (type);
5721 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5722 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5723 else if (TARGET_64BIT)
5724 return function_arg_64 (cum, mode, omode, type, named);
5726 return function_arg_32 (cum, mode, omode, type, bytes, words);
5729 /* A C expression that indicates when an argument must be passed by
5730 reference. If nonzero for an argument, a copy of that argument is
5731 made in memory and a pointer to the argument is passed instead of
5732 the argument itself. The pointer is passed in whatever way is
5733 appropriate for passing a pointer to that type. */
5736 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5737 enum machine_mode mode ATTRIBUTE_UNUSED,
5738 const_tree type, bool named ATTRIBUTE_UNUSED)
5740 /* See Windows x64 Software Convention. */
5741 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5743 int msize = (int) GET_MODE_SIZE (mode);
5746 /* Arrays are passed by reference. */
5747 if (TREE_CODE (type) == ARRAY_TYPE)
5750 if (AGGREGATE_TYPE_P (type))
5752 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5753 are passed by reference. */
5754 msize = int_size_in_bytes (type);
5758 /* __m128 is passed by reference. */
5760 case 1: case 2: case 4: case 8:
5766 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5772 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5775 contains_aligned_value_p (tree type)
5777 enum machine_mode mode = TYPE_MODE (type);
5778 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5782 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5784 if (TYPE_ALIGN (type) < 128)
5787 if (AGGREGATE_TYPE_P (type))
5789 /* Walk the aggregates recursively. */
5790 switch (TREE_CODE (type))
5794 case QUAL_UNION_TYPE:
5798 /* Walk all the structure fields. */
5799 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5801 if (TREE_CODE (field) == FIELD_DECL
5802 && contains_aligned_value_p (TREE_TYPE (field)))
5809 /* Just for use if some languages passes arrays by value. */
5810 if (contains_aligned_value_p (TREE_TYPE (type)))
5821 /* Gives the alignment boundary, in bits, of an argument with the
5822 specified mode and type. */
5825 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5830 /* Since canonical type is used for call, we convert it to
5831 canonical type if needed. */
5832 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5833 type = TYPE_CANONICAL (type);
5834 align = TYPE_ALIGN (type);
5837 align = GET_MODE_ALIGNMENT (mode);
5838 if (align < PARM_BOUNDARY)
5839 align = PARM_BOUNDARY;
5840 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5841 natural boundaries. */
5842 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5844 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5845 make an exception for SSE modes since these require 128bit
5848 The handling here differs from field_alignment. ICC aligns MMX
5849 arguments to 4 byte boundaries, while structure fields are aligned
5850 to 8 byte boundaries. */
5853 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
5854 align = PARM_BOUNDARY;
5858 if (!contains_aligned_value_p (type))
5859 align = PARM_BOUNDARY;
5862 if (align > BIGGEST_ALIGNMENT)
5863 align = BIGGEST_ALIGNMENT;
5867 /* Return true if N is a possible register number of function value. */
5870 ix86_function_value_regno_p (int regno)
5877 case FIRST_FLOAT_REG:
5878 /* TODO: The function should depend on current function ABI but
5879 builtins.c would need updating then. Therefore we use the
5881 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
5883 return TARGET_FLOAT_RETURNS_IN_80387;
5889 if (TARGET_MACHO || TARGET_64BIT)
5897 /* Define how to find the value returned by a function.
5898 VALTYPE is the data type of the value (as a tree).
5899 If the precise function being called is known, FUNC is its FUNCTION_DECL;
5900 otherwise, FUNC is 0. */
5903 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
5904 const_tree fntype, const_tree fn)
5908 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
5909 we normally prevent this case when mmx is not available. However
5910 some ABIs may require the result to be returned like DImode. */
5911 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5912 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
5914 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
5915 we prevent this case when sse is not available. However some ABIs
5916 may require the result to be returned like integer TImode. */
5917 else if (mode == TImode
5918 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5919 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
5921 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
5922 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
5923 regno = FIRST_FLOAT_REG;
5925 /* Most things go in %eax. */
5928 /* Override FP return register with %xmm0 for local functions when
5929 SSE math is enabled or for functions with sseregparm attribute. */
5930 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
5932 int sse_level = ix86_function_sseregparm (fntype, fn, false);
5933 if ((sse_level >= 1 && mode == SFmode)
5934 || (sse_level == 2 && mode == DFmode))
5935 regno = FIRST_SSE_REG;
5938 return gen_rtx_REG (orig_mode, regno);
5942 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
5947 /* Handle libcalls, which don't provide a type node. */
5948 if (valtype == NULL)
5960 return gen_rtx_REG (mode, FIRST_SSE_REG);
5963 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
5967 return gen_rtx_REG (mode, AX_REG);
5971 ret = construct_container (mode, orig_mode, valtype, 1,
5972 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5973 x86_64_int_return_registers, 0);
5975 /* For zero sized structures, construct_container returns NULL, but we
5976 need to keep rest of compiler happy by returning meaningful value. */
5978 ret = gen_rtx_REG (orig_mode, AX_REG);
5984 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
5986 unsigned int regno = AX_REG;
5990 switch (GET_MODE_SIZE (mode))
5993 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5994 && !COMPLEX_MODE_P (mode))
5995 regno = FIRST_SSE_REG;
5999 if (mode == SFmode || mode == DFmode)
6000 regno = FIRST_SSE_REG;
6006 return gen_rtx_REG (orig_mode, regno);
6010 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6011 enum machine_mode orig_mode, enum machine_mode mode)
6013 const_tree fn, fntype;
6016 if (fntype_or_decl && DECL_P (fntype_or_decl))
6017 fn = fntype_or_decl;
6018 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6020 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6021 return function_value_ms_64 (orig_mode, mode);
6022 else if (TARGET_64BIT)
6023 return function_value_64 (orig_mode, mode, valtype);
6025 return function_value_32 (orig_mode, mode, fntype, fn);
6029 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6030 bool outgoing ATTRIBUTE_UNUSED)
6032 enum machine_mode mode, orig_mode;
6034 orig_mode = TYPE_MODE (valtype);
6035 mode = type_natural_mode (valtype);
6036 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6040 ix86_libcall_value (enum machine_mode mode)
6042 return ix86_function_value_1 (NULL, NULL, mode, mode);
6045 /* Return true iff type is returned in memory. */
6047 static int ATTRIBUTE_UNUSED
6048 return_in_memory_32 (const_tree type, enum machine_mode mode)
6052 if (mode == BLKmode)
6055 size = int_size_in_bytes (type);
6057 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6060 if (VECTOR_MODE_P (mode) || mode == TImode)
6062 /* User-created vectors small enough to fit in EAX. */
6066 /* MMX/3dNow values are returned in MM0,
6067 except when it doesn't exits. */
6069 return (TARGET_MMX ? 0 : 1);
6071 /* SSE values are returned in XMM0, except when it doesn't exist. */
6073 return (TARGET_SSE ? 0 : 1);
6084 static int ATTRIBUTE_UNUSED
6085 return_in_memory_64 (const_tree type, enum machine_mode mode)
6087 int needed_intregs, needed_sseregs;
6088 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6091 static int ATTRIBUTE_UNUSED
6092 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6094 HOST_WIDE_INT size = int_size_in_bytes (type);
6096 /* __m128 is returned in xmm0. */
6097 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6098 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6101 /* Otherwise, the size must be exactly in [1248]. */
6102 return (size != 1 && size != 2 && size != 4 && size != 8);
6106 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6108 #ifdef SUBTARGET_RETURN_IN_MEMORY
6109 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6111 const enum machine_mode mode = type_natural_mode (type);
6113 if (TARGET_64BIT_MS_ABI)
6114 return return_in_memory_ms_64 (type, mode);
6115 else if (TARGET_64BIT)
6116 return return_in_memory_64 (type, mode);
6118 return return_in_memory_32 (type, mode);
6122 /* Return false iff TYPE is returned in memory. This version is used
6123 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6124 but differs notably in that when MMX is available, 8-byte vectors
6125 are returned in memory, rather than in MMX registers. */
6128 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6131 enum machine_mode mode = type_natural_mode (type);
6134 return return_in_memory_64 (type, mode);
6136 if (mode == BLKmode)
6139 size = int_size_in_bytes (type);
6141 if (VECTOR_MODE_P (mode))
6143 /* Return in memory only if MMX registers *are* available. This
6144 seems backwards, but it is consistent with the existing
6151 else if (mode == TImode)
6153 else if (mode == XFmode)
6159 /* When returning SSE vector types, we have a choice of either
6160 (1) being abi incompatible with a -march switch, or
6161 (2) generating an error.
6162 Given no good solution, I think the safest thing is one warning.
6163 The user won't be able to use -Werror, but....
6165 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6166 called in response to actually generating a caller or callee that
6167 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6168 via aggregate_value_p for general type probing from tree-ssa. */
6171 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6173 static bool warnedsse, warnedmmx;
6175 if (!TARGET_64BIT && type)
6177 /* Look at the return type of the function, not the function type. */
6178 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6180 if (!TARGET_SSE && !warnedsse)
6183 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6186 warning (0, "SSE vector return without SSE enabled "
6191 if (!TARGET_MMX && !warnedmmx)
6193 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6196 warning (0, "MMX vector return without MMX enabled "
6206 /* Create the va_list data type. */
6208 /* Returns the calling convention specific va_list date type.
6209 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6212 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6214 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6216 /* For i386 we use plain pointer to argument area. */
6217 if (!TARGET_64BIT || abi == MS_ABI)
6218 return build_pointer_type (char_type_node);
6220 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6221 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6223 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6224 unsigned_type_node);
6225 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6226 unsigned_type_node);
6227 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6229 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6232 va_list_gpr_counter_field = f_gpr;
6233 va_list_fpr_counter_field = f_fpr;
6235 DECL_FIELD_CONTEXT (f_gpr) = record;
6236 DECL_FIELD_CONTEXT (f_fpr) = record;
6237 DECL_FIELD_CONTEXT (f_ovf) = record;
6238 DECL_FIELD_CONTEXT (f_sav) = record;
6240 TREE_CHAIN (record) = type_decl;
6241 TYPE_NAME (record) = type_decl;
6242 TYPE_FIELDS (record) = f_gpr;
6243 TREE_CHAIN (f_gpr) = f_fpr;
6244 TREE_CHAIN (f_fpr) = f_ovf;
6245 TREE_CHAIN (f_ovf) = f_sav;
6247 layout_type (record);
6249 /* The correct type is an array type of one element. */
6250 return build_array_type (record, build_index_type (size_zero_node));
6253 /* Setup the builtin va_list data type and for 64-bit the additional
6254 calling convention specific va_list data types. */
6257 ix86_build_builtin_va_list (void)
6259 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6261 /* Initialize abi specific va_list builtin types. */
6265 if (DEFAULT_ABI == MS_ABI)
6267 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6268 if (TREE_CODE (t) != RECORD_TYPE)
6269 t = build_variant_type_copy (t);
6270 sysv_va_list_type_node = t;
6275 if (TREE_CODE (t) != RECORD_TYPE)
6276 t = build_variant_type_copy (t);
6277 sysv_va_list_type_node = t;
6279 if (DEFAULT_ABI != MS_ABI)
6281 t = ix86_build_builtin_va_list_abi (MS_ABI);
6282 if (TREE_CODE (t) != RECORD_TYPE)
6283 t = build_variant_type_copy (t);
6284 ms_va_list_type_node = t;
6289 if (TREE_CODE (t) != RECORD_TYPE)
6290 t = build_variant_type_copy (t);
6291 ms_va_list_type_node = t;
6298 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6301 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6310 int regparm = ix86_regparm;
6312 if (cum->call_abi != DEFAULT_ABI)
6313 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6315 /* GPR size of varargs save area. */
6316 if (cfun->va_list_gpr_size)
6317 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6319 ix86_varargs_gpr_size = 0;
6321 /* FPR size of varargs save area. We don't need it if we don't pass
6322 anything in SSE registers. */
6323 if (cum->sse_nregs && cfun->va_list_fpr_size)
6324 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6326 ix86_varargs_fpr_size = 0;
6328 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6331 save_area = frame_pointer_rtx;
6332 set = get_varargs_alias_set ();
6334 for (i = cum->regno;
6336 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6339 mem = gen_rtx_MEM (Pmode,
6340 plus_constant (save_area, i * UNITS_PER_WORD));
6341 MEM_NOTRAP_P (mem) = 1;
6342 set_mem_alias_set (mem, set);
6343 emit_move_insn (mem, gen_rtx_REG (Pmode,
6344 x86_64_int_parameter_registers[i]));
6347 if (ix86_varargs_fpr_size)
6349 /* Now emit code to save SSE registers. The AX parameter contains number
6350 of SSE parameter registers used to call this function. We use
6351 sse_prologue_save insn template that produces computed jump across
6352 SSE saves. We need some preparation work to get this working. */
6354 label = gen_label_rtx ();
6355 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6357 /* Compute address to jump to :
6358 label - eax*4 + nnamed_sse_arguments*4 Or
6359 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6360 tmp_reg = gen_reg_rtx (Pmode);
6361 nsse_reg = gen_reg_rtx (Pmode);
6362 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6363 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6364 gen_rtx_MULT (Pmode, nsse_reg,
6367 /* vmovaps is one byte longer than movaps. */
6369 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6370 gen_rtx_PLUS (Pmode, tmp_reg,
6376 gen_rtx_CONST (DImode,
6377 gen_rtx_PLUS (DImode,
6379 GEN_INT (cum->sse_regno
6380 * (TARGET_AVX ? 5 : 4)))));
6382 emit_move_insn (nsse_reg, label_ref);
6383 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6385 /* Compute address of memory block we save into. We always use pointer
6386 pointing 127 bytes after first byte to store - this is needed to keep
6387 instruction size limited by 4 bytes (5 bytes for AVX) with one
6388 byte displacement. */
6389 tmp_reg = gen_reg_rtx (Pmode);
6390 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6391 plus_constant (save_area,
6392 ix86_varargs_gpr_size + 127)));
6393 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6394 MEM_NOTRAP_P (mem) = 1;
6395 set_mem_alias_set (mem, set);
6396 set_mem_align (mem, BITS_PER_WORD);
6398 /* And finally do the dirty job! */
6399 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6400 GEN_INT (cum->sse_regno), label));
6405 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6407 alias_set_type set = get_varargs_alias_set ();
6410 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6414 mem = gen_rtx_MEM (Pmode,
6415 plus_constant (virtual_incoming_args_rtx,
6416 i * UNITS_PER_WORD));
6417 MEM_NOTRAP_P (mem) = 1;
6418 set_mem_alias_set (mem, set);
6420 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6421 emit_move_insn (mem, reg);
6426 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6427 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6430 CUMULATIVE_ARGS next_cum;
6433 /* This argument doesn't appear to be used anymore. Which is good,
6434 because the old code here didn't suppress rtl generation. */
6435 gcc_assert (!no_rtl);
6440 fntype = TREE_TYPE (current_function_decl);
6442 /* For varargs, we do not want to skip the dummy va_dcl argument.
6443 For stdargs, we do want to skip the last named argument. */
6445 if (stdarg_p (fntype))
6446 function_arg_advance (&next_cum, mode, type, 1);
6448 if (cum->call_abi == MS_ABI)
6449 setup_incoming_varargs_ms_64 (&next_cum);
6451 setup_incoming_varargs_64 (&next_cum);
6454 /* Checks if TYPE is of kind va_list char *. */
6457 is_va_list_char_pointer (tree type)
6461 /* For 32-bit it is always true. */
6464 canonic = ix86_canonical_va_list_type (type);
6465 return (canonic == ms_va_list_type_node
6466 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6469 /* Implement va_start. */
6472 ix86_va_start (tree valist, rtx nextarg)
6474 HOST_WIDE_INT words, n_gpr, n_fpr;
6475 tree f_gpr, f_fpr, f_ovf, f_sav;
6476 tree gpr, fpr, ovf, sav, t;
6479 /* Only 64bit target needs something special. */
6480 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6482 std_expand_builtin_va_start (valist, nextarg);
6486 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6487 f_fpr = TREE_CHAIN (f_gpr);
6488 f_ovf = TREE_CHAIN (f_fpr);
6489 f_sav = TREE_CHAIN (f_ovf);
6491 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6492 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6493 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6494 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6495 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6497 /* Count number of gp and fp argument registers used. */
6498 words = crtl->args.info.words;
6499 n_gpr = crtl->args.info.regno;
6500 n_fpr = crtl->args.info.sse_regno;
6502 if (cfun->va_list_gpr_size)
6504 type = TREE_TYPE (gpr);
6505 t = build2 (MODIFY_EXPR, type,
6506 gpr, build_int_cst (type, n_gpr * 8));
6507 TREE_SIDE_EFFECTS (t) = 1;
6508 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6511 if (TARGET_SSE && cfun->va_list_fpr_size)
6513 type = TREE_TYPE (fpr);
6514 t = build2 (MODIFY_EXPR, type, fpr,
6515 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6516 TREE_SIDE_EFFECTS (t) = 1;
6517 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6520 /* Find the overflow area. */
6521 type = TREE_TYPE (ovf);
6522 t = make_tree (type, crtl->args.internal_arg_pointer);
6524 t = build2 (POINTER_PLUS_EXPR, type, t,
6525 size_int (words * UNITS_PER_WORD));
6526 t = build2 (MODIFY_EXPR, type, ovf, t);
6527 TREE_SIDE_EFFECTS (t) = 1;
6528 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6530 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6532 /* Find the register save area.
6533 Prologue of the function save it right above stack frame. */
6534 type = TREE_TYPE (sav);
6535 t = make_tree (type, frame_pointer_rtx);
6536 if (!ix86_varargs_gpr_size)
6537 t = build2 (POINTER_PLUS_EXPR, type, t,
6538 size_int (-8 * X86_64_REGPARM_MAX));
6539 t = build2 (MODIFY_EXPR, type, sav, t);
6540 TREE_SIDE_EFFECTS (t) = 1;
6541 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6545 /* Implement va_arg. */
6548 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6551 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6552 tree f_gpr, f_fpr, f_ovf, f_sav;
6553 tree gpr, fpr, ovf, sav, t;
6555 tree lab_false, lab_over = NULL_TREE;
6560 enum machine_mode nat_mode;
6563 /* Only 64bit target needs something special. */
6564 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6565 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6567 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6568 f_fpr = TREE_CHAIN (f_gpr);
6569 f_ovf = TREE_CHAIN (f_fpr);
6570 f_sav = TREE_CHAIN (f_ovf);
6572 valist = build_va_arg_indirect_ref (valist);
6573 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6574 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6575 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6576 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6578 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6580 type = build_pointer_type (type);
6581 size = int_size_in_bytes (type);
6582 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6584 nat_mode = type_natural_mode (type);
6593 /* Unnamed 256bit vector mode parameters are passed on stack. */
6594 if (ix86_cfun_abi () == SYSV_ABI)
6601 container = construct_container (nat_mode, TYPE_MODE (type),
6602 type, 0, X86_64_REGPARM_MAX,
6603 X86_64_SSE_REGPARM_MAX, intreg,
6608 /* Pull the value out of the saved registers. */
6610 addr = create_tmp_var (ptr_type_node, "addr");
6611 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6615 int needed_intregs, needed_sseregs;
6617 tree int_addr, sse_addr;
6619 lab_false = create_artificial_label ();
6620 lab_over = create_artificial_label ();
6622 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6624 need_temp = (!REG_P (container)
6625 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6626 || TYPE_ALIGN (type) > 128));
6628 /* In case we are passing structure, verify that it is consecutive block
6629 on the register save area. If not we need to do moves. */
6630 if (!need_temp && !REG_P (container))
6632 /* Verify that all registers are strictly consecutive */
6633 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6637 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6639 rtx slot = XVECEXP (container, 0, i);
6640 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6641 || INTVAL (XEXP (slot, 1)) != i * 16)
6649 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6651 rtx slot = XVECEXP (container, 0, i);
6652 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6653 || INTVAL (XEXP (slot, 1)) != i * 8)
6665 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6666 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6667 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6668 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6671 /* First ensure that we fit completely in registers. */
6674 t = build_int_cst (TREE_TYPE (gpr),
6675 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6676 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6677 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6678 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6679 gimplify_and_add (t, pre_p);
6683 t = build_int_cst (TREE_TYPE (fpr),
6684 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6685 + X86_64_REGPARM_MAX * 8);
6686 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6687 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6688 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6689 gimplify_and_add (t, pre_p);
6692 /* Compute index to start of area used for integer regs. */
6695 /* int_addr = gpr + sav; */
6696 t = fold_convert (sizetype, gpr);
6697 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6698 gimplify_assign (int_addr, t, pre_p);
6702 /* sse_addr = fpr + sav; */
6703 t = fold_convert (sizetype, fpr);
6704 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6705 gimplify_assign (sse_addr, t, pre_p);
6710 tree temp = create_tmp_var (type, "va_arg_tmp");
6713 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6714 gimplify_assign (addr, t, pre_p);
6716 for (i = 0; i < XVECLEN (container, 0); i++)
6718 rtx slot = XVECEXP (container, 0, i);
6719 rtx reg = XEXP (slot, 0);
6720 enum machine_mode mode = GET_MODE (reg);
6721 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6722 tree addr_type = build_pointer_type (piece_type);
6725 tree dest_addr, dest;
6727 if (SSE_REGNO_P (REGNO (reg)))
6729 src_addr = sse_addr;
6730 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6734 src_addr = int_addr;
6735 src_offset = REGNO (reg) * 8;
6737 src_addr = fold_convert (addr_type, src_addr);
6738 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6739 size_int (src_offset));
6740 src = build_va_arg_indirect_ref (src_addr);
6742 dest_addr = fold_convert (addr_type, addr);
6743 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
6744 size_int (INTVAL (XEXP (slot, 1))));
6745 dest = build_va_arg_indirect_ref (dest_addr);
6747 gimplify_assign (dest, src, pre_p);
6753 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6754 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6755 gimplify_assign (gpr, t, pre_p);
6760 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6761 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6762 gimplify_assign (fpr, t, pre_p);
6765 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
6767 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
6770 /* ... otherwise out of the overflow area. */
6772 /* When we align parameter on stack for caller, if the parameter
6773 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
6774 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
6775 here with caller. */
6776 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6777 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
6778 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
6780 /* Care for on-stack alignment if needed. */
6781 if (arg_boundary <= 64
6782 || integer_zerop (TYPE_SIZE (type)))
6786 HOST_WIDE_INT align = arg_boundary / 8;
6787 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6788 size_int (align - 1));
6789 t = fold_convert (sizetype, t);
6790 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6792 t = fold_convert (TREE_TYPE (ovf), t);
6794 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6795 gimplify_assign (addr, t, pre_p);
6797 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6798 size_int (rsize * UNITS_PER_WORD));
6799 gimplify_assign (unshare_expr (ovf), t, pre_p);
6802 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
6804 ptrtype = build_pointer_type (type);
6805 addr = fold_convert (ptrtype, addr);
6808 addr = build_va_arg_indirect_ref (addr);
6809 return build_va_arg_indirect_ref (addr);
6812 /* Return nonzero if OPNUM's MEM should be matched
6813 in movabs* patterns. */
6816 ix86_check_movabs (rtx insn, int opnum)
6820 set = PATTERN (insn);
6821 if (GET_CODE (set) == PARALLEL)
6822 set = XVECEXP (set, 0, 0);
6823 gcc_assert (GET_CODE (set) == SET);
6824 mem = XEXP (set, opnum);
6825 while (GET_CODE (mem) == SUBREG)
6826 mem = SUBREG_REG (mem);
6827 gcc_assert (MEM_P (mem));
6828 return (volatile_ok || !MEM_VOLATILE_P (mem));
6831 /* Initialize the table of extra 80387 mathematical constants. */
6834 init_ext_80387_constants (void)
6836 static const char * cst[5] =
6838 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
6839 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
6840 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
6841 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
6842 "3.1415926535897932385128089594061862044", /* 4: fldpi */
6846 for (i = 0; i < 5; i++)
6848 real_from_string (&ext_80387_constants_table[i], cst[i]);
6849 /* Ensure each constant is rounded to XFmode precision. */
6850 real_convert (&ext_80387_constants_table[i],
6851 XFmode, &ext_80387_constants_table[i]);
6854 ext_80387_constants_init = 1;
6857 /* Return true if the constant is something that can be loaded with
6858 a special instruction. */
6861 standard_80387_constant_p (rtx x)
6863 enum machine_mode mode = GET_MODE (x);
6867 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
6870 if (x == CONST0_RTX (mode))
6872 if (x == CONST1_RTX (mode))
6875 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6877 /* For XFmode constants, try to find a special 80387 instruction when
6878 optimizing for size or on those CPUs that benefit from them. */
6880 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
6884 if (! ext_80387_constants_init)
6885 init_ext_80387_constants ();
6887 for (i = 0; i < 5; i++)
6888 if (real_identical (&r, &ext_80387_constants_table[i]))
6892 /* Load of the constant -0.0 or -1.0 will be split as
6893 fldz;fchs or fld1;fchs sequence. */
6894 if (real_isnegzero (&r))
6896 if (real_identical (&r, &dconstm1))
6902 /* Return the opcode of the special instruction to be used to load
6906 standard_80387_constant_opcode (rtx x)
6908 switch (standard_80387_constant_p (x))
6932 /* Return the CONST_DOUBLE representing the 80387 constant that is
6933 loaded by the specified special instruction. The argument IDX
6934 matches the return value from standard_80387_constant_p. */
6937 standard_80387_constant_rtx (int idx)
6941 if (! ext_80387_constants_init)
6942 init_ext_80387_constants ();
6958 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
6962 /* Return 1 if mode is a valid mode for sse. */
6964 standard_sse_mode_p (enum machine_mode mode)
6981 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
6982 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
6983 modes and AVX is enabled. */
6986 standard_sse_constant_p (rtx x)
6988 enum machine_mode mode = GET_MODE (x);
6990 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
6992 if (vector_all_ones_operand (x, mode))
6994 if (standard_sse_mode_p (mode))
6995 return TARGET_SSE2 ? 2 : -2;
6996 else if (VALID_AVX256_REG_MODE (mode))
6997 return TARGET_AVX ? 3 : -3;
7003 /* Return the opcode of the special instruction to be used to load
7007 standard_sse_constant_opcode (rtx insn, rtx x)
7009 switch (standard_sse_constant_p (x))
7012 switch (get_attr_mode (insn))
7015 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7017 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7019 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7021 return "vxorps\t%x0, %x0, %x0";
7023 return "vxorpd\t%x0, %x0, %x0";
7025 return "vpxor\t%x0, %x0, %x0";
7031 switch (get_attr_mode (insn))
7036 return "vpcmpeqd\t%0, %0, %0";
7042 return "pcmpeqd\t%0, %0";
7047 /* Returns 1 if OP contains a symbol reference */
7050 symbolic_reference_mentioned_p (rtx op)
7055 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7058 fmt = GET_RTX_FORMAT (GET_CODE (op));
7059 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7065 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7066 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7070 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7077 /* Return 1 if it is appropriate to emit `ret' instructions in the
7078 body of a function. Do this only if the epilogue is simple, needing a
7079 couple of insns. Prior to reloading, we can't tell how many registers
7080 must be saved, so return 0 then. Return 0 if there is no frame
7081 marker to de-allocate. */
7084 ix86_can_use_return_insn_p (void)
7086 struct ix86_frame frame;
7088 if (! reload_completed || frame_pointer_needed)
7091 /* Don't allow more than 32 pop, since that's all we can do
7092 with one instruction. */
7093 if (crtl->args.pops_args
7094 && crtl->args.size >= 32768)
7097 ix86_compute_frame_layout (&frame);
7098 return frame.to_allocate == 0 && frame.nregs == 0;
7101 /* Value should be nonzero if functions must have frame pointers.
7102 Zero means the frame pointer need not be set up (and parms may
7103 be accessed via the stack pointer) in functions that seem suitable. */
7106 ix86_frame_pointer_required (void)
7108 /* If we accessed previous frames, then the generated code expects
7109 to be able to access the saved ebp value in our frame. */
7110 if (cfun->machine->accesses_prev_frame)
7113 /* Several x86 os'es need a frame pointer for other reasons,
7114 usually pertaining to setjmp. */
7115 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7118 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7119 the frame pointer by default. Turn it back on now if we've not
7120 got a leaf function. */
7121 if (TARGET_OMIT_LEAF_FRAME_POINTER
7122 && (!current_function_is_leaf
7123 || ix86_current_function_calls_tls_descriptor))
7132 /* Record that the current function accesses previous call frames. */
7135 ix86_setup_frame_addresses (void)
7137 cfun->machine->accesses_prev_frame = 1;
7140 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7141 # define USE_HIDDEN_LINKONCE 1
7143 # define USE_HIDDEN_LINKONCE 0
7146 static int pic_labels_used;
7148 /* Fills in the label name that should be used for a pc thunk for
7149 the given register. */
7152 get_pc_thunk_name (char name[32], unsigned int regno)
7154 gcc_assert (!TARGET_64BIT);
7156 if (USE_HIDDEN_LINKONCE)
7157 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7159 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7163 /* This function generates code for -fpic that loads %ebx with
7164 the return address of the caller and then returns. */
7167 ix86_file_end (void)
7172 for (regno = 0; regno < 8; ++regno)
7176 if (! ((pic_labels_used >> regno) & 1))
7179 get_pc_thunk_name (name, regno);
7184 switch_to_section (darwin_sections[text_coal_section]);
7185 fputs ("\t.weak_definition\t", asm_out_file);
7186 assemble_name (asm_out_file, name);
7187 fputs ("\n\t.private_extern\t", asm_out_file);
7188 assemble_name (asm_out_file, name);
7189 fputs ("\n", asm_out_file);
7190 ASM_OUTPUT_LABEL (asm_out_file, name);
7194 if (USE_HIDDEN_LINKONCE)
7198 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7200 TREE_PUBLIC (decl) = 1;
7201 TREE_STATIC (decl) = 1;
7202 DECL_ONE_ONLY (decl) = 1;
7204 (*targetm.asm_out.unique_section) (decl, 0);
7205 switch_to_section (get_named_section (decl, NULL, 0));
7207 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7208 fputs ("\t.hidden\t", asm_out_file);
7209 assemble_name (asm_out_file, name);
7210 fputc ('\n', asm_out_file);
7211 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7215 switch_to_section (text_section);
7216 ASM_OUTPUT_LABEL (asm_out_file, name);
7219 xops[0] = gen_rtx_REG (Pmode, regno);
7220 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7221 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7222 output_asm_insn ("ret", xops);
7225 if (NEED_INDICATE_EXEC_STACK)
7226 file_end_indicate_exec_stack ();
7229 /* Emit code for the SET_GOT patterns. */
7232 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7238 if (TARGET_VXWORKS_RTP && flag_pic)
7240 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7241 xops[2] = gen_rtx_MEM (Pmode,
7242 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7243 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7245 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7246 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7247 an unadorned address. */
7248 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7249 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7250 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7254 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7256 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7258 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7261 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7263 output_asm_insn ("call\t%a2", xops);
7266 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7267 is what will be referenced by the Mach-O PIC subsystem. */
7269 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7272 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7273 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7276 output_asm_insn ("pop%z0\t%0", xops);
7281 get_pc_thunk_name (name, REGNO (dest));
7282 pic_labels_used |= 1 << REGNO (dest);
7284 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7285 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7286 output_asm_insn ("call\t%X2", xops);
7287 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7288 is what will be referenced by the Mach-O PIC subsystem. */
7291 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7293 targetm.asm_out.internal_label (asm_out_file, "L",
7294 CODE_LABEL_NUMBER (label));
7301 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7302 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7304 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7309 /* Generate an "push" pattern for input ARG. */
7314 return gen_rtx_SET (VOIDmode,
7316 gen_rtx_PRE_DEC (Pmode,
7317 stack_pointer_rtx)),
7321 /* Return >= 0 if there is an unused call-clobbered register available
7322 for the entire function. */
7325 ix86_select_alt_pic_regnum (void)
7327 if (current_function_is_leaf && !crtl->profile
7328 && !ix86_current_function_calls_tls_descriptor)
7331 /* Can't use the same register for both PIC and DRAP. */
7333 drap = REGNO (crtl->drap_reg);
7336 for (i = 2; i >= 0; --i)
7337 if (i != drap && !df_regs_ever_live_p (i))
7341 return INVALID_REGNUM;
7344 /* Return 1 if we need to save REGNO. */
7346 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7348 if (pic_offset_table_rtx
7349 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7350 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7352 || crtl->calls_eh_return
7353 || crtl->uses_const_pool))
7355 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7360 if (crtl->calls_eh_return && maybe_eh_return)
7365 unsigned test = EH_RETURN_DATA_REGNO (i);
7366 if (test == INVALID_REGNUM)
7374 && regno == REGNO (crtl->drap_reg))
7377 return (df_regs_ever_live_p (regno)
7378 && !call_used_regs[regno]
7379 && !fixed_regs[regno]
7380 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7383 /* Return number of registers to be saved on the stack. */
7386 ix86_nsaved_regs (void)
7391 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7392 if (ix86_save_reg (regno, true))
7397 /* Given FROM and TO register numbers, say whether this elimination is
7398 allowed. If stack alignment is needed, we can only replace argument
7399 pointer with hard frame pointer, or replace frame pointer with stack
7400 pointer. Otherwise, frame pointer elimination is automatically
7401 handled and all other eliminations are valid. */
7404 ix86_can_eliminate (int from, int to)
7406 if (stack_realign_fp)
7407 return ((from == ARG_POINTER_REGNUM
7408 && to == HARD_FRAME_POINTER_REGNUM)
7409 || (from == FRAME_POINTER_REGNUM
7410 && to == STACK_POINTER_REGNUM));
7412 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7415 /* Return the offset between two registers, one to be eliminated, and the other
7416 its replacement, at the start of a routine. */
7419 ix86_initial_elimination_offset (int from, int to)
7421 struct ix86_frame frame;
7422 ix86_compute_frame_layout (&frame);
7424 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7425 return frame.hard_frame_pointer_offset;
7426 else if (from == FRAME_POINTER_REGNUM
7427 && to == HARD_FRAME_POINTER_REGNUM)
7428 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7431 gcc_assert (to == STACK_POINTER_REGNUM);
7433 if (from == ARG_POINTER_REGNUM)
7434 return frame.stack_pointer_offset;
7436 gcc_assert (from == FRAME_POINTER_REGNUM);
7437 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7441 /* Fill structure ix86_frame about frame of currently computed function. */
7444 ix86_compute_frame_layout (struct ix86_frame *frame)
7446 HOST_WIDE_INT total_size;
7447 unsigned int stack_alignment_needed;
7448 HOST_WIDE_INT offset;
7449 unsigned int preferred_alignment;
7450 HOST_WIDE_INT size = get_frame_size ();
7452 frame->nregs = ix86_nsaved_regs ();
7455 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7456 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7458 gcc_assert (!size || stack_alignment_needed);
7459 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7460 gcc_assert (preferred_alignment <= stack_alignment_needed);
7462 /* During reload iteration the amount of registers saved can change.
7463 Recompute the value as needed. Do not recompute when amount of registers
7464 didn't change as reload does multiple calls to the function and does not
7465 expect the decision to change within single iteration. */
7466 if (!optimize_function_for_size_p (cfun)
7467 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7469 int count = frame->nregs;
7471 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7472 /* The fast prologue uses move instead of push to save registers. This
7473 is significantly longer, but also executes faster as modern hardware
7474 can execute the moves in parallel, but can't do that for push/pop.
7476 Be careful about choosing what prologue to emit: When function takes
7477 many instructions to execute we may use slow version as well as in
7478 case function is known to be outside hot spot (this is known with
7479 feedback only). Weight the size of function by number of registers
7480 to save as it is cheap to use one or two push instructions but very
7481 slow to use many of them. */
7483 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7484 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7485 || (flag_branch_probabilities
7486 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7487 cfun->machine->use_fast_prologue_epilogue = false;
7489 cfun->machine->use_fast_prologue_epilogue
7490 = !expensive_function_p (count);
7492 if (TARGET_PROLOGUE_USING_MOVE
7493 && cfun->machine->use_fast_prologue_epilogue)
7494 frame->save_regs_using_mov = true;
7496 frame->save_regs_using_mov = false;
7499 /* Skip return address and saved base pointer. */
7500 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7502 frame->hard_frame_pointer_offset = offset;
7504 /* Set offset to aligned because the realigned frame starts from
7506 if (stack_realign_fp)
7507 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7509 /* Register save area */
7510 offset += frame->nregs * UNITS_PER_WORD;
7513 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7514 offset += frame->va_arg_size;
7516 /* Align start of frame for local function. */
7517 frame->padding1 = ((offset + stack_alignment_needed - 1)
7518 & -stack_alignment_needed) - offset;
7520 offset += frame->padding1;
7522 /* Frame pointer points here. */
7523 frame->frame_pointer_offset = offset;
7527 /* Add outgoing arguments area. Can be skipped if we eliminated
7528 all the function calls as dead code.
7529 Skipping is however impossible when function calls alloca. Alloca
7530 expander assumes that last crtl->outgoing_args_size
7531 of stack frame are unused. */
7532 if (ACCUMULATE_OUTGOING_ARGS
7533 && (!current_function_is_leaf || cfun->calls_alloca
7534 || ix86_current_function_calls_tls_descriptor))
7536 offset += crtl->outgoing_args_size;
7537 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7540 frame->outgoing_arguments_size = 0;
7542 /* Align stack boundary. Only needed if we're calling another function
7544 if (!current_function_is_leaf || cfun->calls_alloca
7545 || ix86_current_function_calls_tls_descriptor)
7546 frame->padding2 = ((offset + preferred_alignment - 1)
7547 & -preferred_alignment) - offset;
7549 frame->padding2 = 0;
7551 offset += frame->padding2;
7553 /* We've reached end of stack frame. */
7554 frame->stack_pointer_offset = offset;
7556 /* Size prologue needs to allocate. */
7557 frame->to_allocate =
7558 (size + frame->padding1 + frame->padding2
7559 + frame->outgoing_arguments_size + frame->va_arg_size);
7561 if ((!frame->to_allocate && frame->nregs <= 1)
7562 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7563 frame->save_regs_using_mov = false;
7565 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7566 && current_function_is_leaf
7567 && !ix86_current_function_calls_tls_descriptor)
7569 frame->red_zone_size = frame->to_allocate;
7570 if (frame->save_regs_using_mov)
7571 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7572 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7573 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7576 frame->red_zone_size = 0;
7577 frame->to_allocate -= frame->red_zone_size;
7578 frame->stack_pointer_offset -= frame->red_zone_size;
7580 fprintf (stderr, "\n");
7581 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7582 fprintf (stderr, "size: %ld\n", (long)size);
7583 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7584 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7585 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7586 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7587 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7588 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7589 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7590 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7591 (long)frame->hard_frame_pointer_offset);
7592 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7593 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7594 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7595 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7599 /* Emit code to save registers in the prologue. */
7602 ix86_emit_save_regs (void)
7607 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
7608 if (ix86_save_reg (regno, true))
7610 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7611 RTX_FRAME_RELATED_P (insn) = 1;
7615 /* Emit code to save registers using MOV insns. First register
7616 is restored from POINTER + OFFSET. */
7618 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7623 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7624 if (ix86_save_reg (regno, true))
7626 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7628 gen_rtx_REG (Pmode, regno));
7629 RTX_FRAME_RELATED_P (insn) = 1;
7630 offset += UNITS_PER_WORD;
7634 /* Expand prologue or epilogue stack adjustment.
7635 The pattern exist to put a dependency on all ebp-based memory accesses.
7636 STYLE should be negative if instructions should be marked as frame related,
7637 zero if %r11 register is live and cannot be freely used and positive
7641 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7646 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7647 else if (x86_64_immediate_operand (offset, DImode))
7648 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7652 /* r11 is used by indirect sibcall return as well, set before the
7653 epilogue and used after the epilogue. ATM indirect sibcall
7654 shouldn't be used together with huge frame sizes in one
7655 function because of the frame_size check in sibcall.c. */
7657 r11 = gen_rtx_REG (DImode, R11_REG);
7658 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7660 RTX_FRAME_RELATED_P (insn) = 1;
7661 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7665 RTX_FRAME_RELATED_P (insn) = 1;
7668 /* Find an available register to be used as dynamic realign argument
7669 pointer regsiter. Such a register will be written in prologue and
7670 used in begin of body, so it must not be
7671 1. parameter passing register.
7673 We reuse static-chain register if it is available. Otherwise, we
7674 use DI for i386 and R13 for x86-64. We chose R13 since it has
7677 Return: the regno of chosen register. */
7680 find_drap_reg (void)
7682 tree decl = cfun->decl;
7686 /* Use R13 for nested function or function need static chain.
7687 Since function with tail call may use any caller-saved
7688 registers in epilogue, DRAP must not use caller-saved
7689 register in such case. */
7690 if ((decl_function_context (decl)
7691 && !DECL_NO_STATIC_CHAIN (decl))
7692 || crtl->tail_call_emit)
7699 /* Use DI for nested function or function need static chain.
7700 Since function with tail call may use any caller-saved
7701 registers in epilogue, DRAP must not use caller-saved
7702 register in such case. */
7703 if ((decl_function_context (decl)
7704 && !DECL_NO_STATIC_CHAIN (decl))
7705 || crtl->tail_call_emit)
7708 /* Reuse static chain register if it isn't used for parameter
7710 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
7711 && !lookup_attribute ("fastcall",
7712 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
7719 /* Update incoming stack boundary and estimated stack alignment. */
7722 ix86_update_stack_boundary (void)
7724 /* Prefer the one specified at command line. */
7725 ix86_incoming_stack_boundary
7726 = (ix86_user_incoming_stack_boundary
7727 ? ix86_user_incoming_stack_boundary
7728 : ix86_default_incoming_stack_boundary);
7730 /* Incoming stack alignment can be changed on individual functions
7731 via force_align_arg_pointer attribute. We use the smallest
7732 incoming stack boundary. */
7733 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
7734 && lookup_attribute (ix86_force_align_arg_pointer_string,
7735 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7736 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
7738 /* Stack at entrance of main is aligned by runtime. We use the
7739 smallest incoming stack boundary. */
7740 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
7741 && DECL_NAME (current_function_decl)
7742 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7743 && DECL_FILE_SCOPE_P (current_function_decl))
7744 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7746 /* x86_64 vararg needs 16byte stack alignment for register save
7750 && crtl->stack_alignment_estimated < 128)
7751 crtl->stack_alignment_estimated = 128;
7754 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7755 needed or an rtx for DRAP otherwise. */
7758 ix86_get_drap_rtx (void)
7760 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
7761 crtl->need_drap = true;
7763 if (stack_realign_drap)
7765 /* Assign DRAP to vDRAP and returns vDRAP */
7766 unsigned int regno = find_drap_reg ();
7771 arg_ptr = gen_rtx_REG (Pmode, regno);
7772 crtl->drap_reg = arg_ptr;
7775 drap_vreg = copy_to_reg (arg_ptr);
7779 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7780 RTX_FRAME_RELATED_P (insn) = 1;
7787 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7790 ix86_internal_arg_pointer (void)
7792 return virtual_incoming_args_rtx;
7795 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
7796 This is called from dwarf2out.c to emit call frame instructions
7797 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
7799 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
7801 rtx unspec = SET_SRC (pattern);
7802 gcc_assert (GET_CODE (unspec) == UNSPEC);
7806 case UNSPEC_REG_SAVE:
7807 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
7808 SET_DEST (pattern));
7810 case UNSPEC_DEF_CFA:
7811 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
7812 INTVAL (XVECEXP (unspec, 0, 0)));
7819 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
7820 to be generated in correct form. */
7822 ix86_finalize_stack_realign_flags (void)
7824 /* Check if stack realign is really needed after reload, and
7825 stores result in cfun */
7826 unsigned int incoming_stack_boundary
7827 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7828 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7829 unsigned int stack_realign = (incoming_stack_boundary
7830 < (current_function_is_leaf
7831 ? crtl->max_used_stack_slot_alignment
7832 : crtl->stack_alignment_needed));
7834 if (crtl->stack_realign_finalized)
7836 /* After stack_realign_needed is finalized, we can't no longer
7838 gcc_assert (crtl->stack_realign_needed == stack_realign);
7842 crtl->stack_realign_needed = stack_realign;
7843 crtl->stack_realign_finalized = true;
7847 /* Expand the prologue into a bunch of separate insns. */
7850 ix86_expand_prologue (void)
7854 struct ix86_frame frame;
7855 HOST_WIDE_INT allocate;
7857 ix86_finalize_stack_realign_flags ();
7859 /* DRAP should not coexist with stack_realign_fp */
7860 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
7862 ix86_compute_frame_layout (&frame);
7864 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7865 of DRAP is needed and stack realignment is really needed after reload */
7866 if (crtl->drap_reg && crtl->stack_realign_needed)
7869 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7870 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
7871 ? 0 : UNITS_PER_WORD);
7873 gcc_assert (stack_realign_drap);
7875 /* Grab the argument pointer. */
7876 x = plus_constant (stack_pointer_rtx,
7877 (UNITS_PER_WORD + param_ptr_offset));
7880 /* Only need to push parameter pointer reg if it is caller
7882 if (!call_used_regs[REGNO (crtl->drap_reg)])
7884 /* Push arg pointer reg */
7885 insn = emit_insn (gen_push (y));
7886 RTX_FRAME_RELATED_P (insn) = 1;
7889 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
7890 RTX_FRAME_RELATED_P (insn) = 1;
7892 /* Align the stack. */
7893 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7895 GEN_INT (-align_bytes)));
7896 RTX_FRAME_RELATED_P (insn) = 1;
7898 /* Replicate the return address on the stack so that return
7899 address can be reached via (argp - 1) slot. This is needed
7900 to implement macro RETURN_ADDR_RTX and intrinsic function
7901 expand_builtin_return_addr etc. */
7903 x = gen_frame_mem (Pmode,
7904 plus_constant (x, -UNITS_PER_WORD));
7905 insn = emit_insn (gen_push (x));
7906 RTX_FRAME_RELATED_P (insn) = 1;
7909 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7910 slower on all targets. Also sdb doesn't like it. */
7912 if (frame_pointer_needed)
7914 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
7915 RTX_FRAME_RELATED_P (insn) = 1;
7917 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
7918 RTX_FRAME_RELATED_P (insn) = 1;
7921 if (stack_realign_fp)
7923 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7924 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
7926 /* Align the stack. */
7927 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7929 GEN_INT (-align_bytes)));
7930 RTX_FRAME_RELATED_P (insn) = 1;
7933 allocate = frame.to_allocate;
7935 if (!frame.save_regs_using_mov)
7936 ix86_emit_save_regs ();
7938 allocate += frame.nregs * UNITS_PER_WORD;
7940 /* When using red zone we may start register saving before allocating
7941 the stack frame saving one cycle of the prologue. However I will
7942 avoid doing this if I am going to have to probe the stack since
7943 at least on x86_64 the stack probe can turn into a call that clobbers
7944 a red zone location */
7945 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
7946 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
7947 ix86_emit_save_regs_using_mov ((frame_pointer_needed
7948 && !crtl->stack_realign_needed)
7949 ? hard_frame_pointer_rtx
7950 : stack_pointer_rtx,
7951 -frame.nregs * UNITS_PER_WORD);
7955 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
7956 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7957 GEN_INT (-allocate), -1);
7960 /* Only valid for Win32. */
7961 rtx eax = gen_rtx_REG (Pmode, AX_REG);
7965 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
7967 if (cfun->machine->call_abi == MS_ABI)
7970 eax_live = ix86_eax_live_at_start_p ();
7974 emit_insn (gen_push (eax));
7975 allocate -= UNITS_PER_WORD;
7978 emit_move_insn (eax, GEN_INT (allocate));
7981 insn = gen_allocate_stack_worker_64 (eax);
7983 insn = gen_allocate_stack_worker_32 (eax);
7984 insn = emit_insn (insn);
7985 RTX_FRAME_RELATED_P (insn) = 1;
7986 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
7987 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
7988 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
7989 t, REG_NOTES (insn));
7993 if (frame_pointer_needed)
7994 t = plus_constant (hard_frame_pointer_rtx,
7997 - frame.nregs * UNITS_PER_WORD);
7999 t = plus_constant (stack_pointer_rtx, allocate);
8000 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8004 if (frame.save_regs_using_mov
8005 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8006 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8008 if (!frame_pointer_needed
8009 || !frame.to_allocate
8010 || crtl->stack_realign_needed)
8011 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8014 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8015 -frame.nregs * UNITS_PER_WORD);
8018 pic_reg_used = false;
8019 if (pic_offset_table_rtx
8020 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8023 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8025 if (alt_pic_reg_used != INVALID_REGNUM)
8026 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8028 pic_reg_used = true;
8035 if (ix86_cmodel == CM_LARGE_PIC)
8037 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8038 rtx label = gen_label_rtx ();
8040 LABEL_PRESERVE_P (label) = 1;
8041 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8042 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8043 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8044 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8045 pic_offset_table_rtx, tmp_reg));
8048 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8051 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8054 /* Prevent function calls from being scheduled before the call to mcount.
8055 In the pic_reg_used case, make sure that the got load isn't deleted. */
8059 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8060 emit_insn (gen_blockage ());
8063 if (crtl->drap_reg && !crtl->stack_realign_needed)
8065 /* vDRAP is setup but after reload it turns out stack realign
8066 isn't necessary, here we will emit prologue to setup DRAP
8067 without stack realign adjustment */
8068 int drap_bp_offset = UNITS_PER_WORD * 2;
8069 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8070 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8073 /* Emit cld instruction if stringops are used in the function. */
8074 if (TARGET_CLD && ix86_current_function_needs_cld)
8075 emit_insn (gen_cld ());
8078 /* Emit code to restore saved registers using MOV insns. First register
8079 is restored from POINTER + OFFSET. */
8081 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8082 int maybe_eh_return)
8085 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8087 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8088 if (ix86_save_reg (regno, maybe_eh_return))
8090 /* Ensure that adjust_address won't be forced to produce pointer
8091 out of range allowed by x86-64 instruction set. */
8092 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8096 r11 = gen_rtx_REG (DImode, R11_REG);
8097 emit_move_insn (r11, GEN_INT (offset));
8098 emit_insn (gen_adddi3 (r11, r11, pointer));
8099 base_address = gen_rtx_MEM (Pmode, r11);
8102 emit_move_insn (gen_rtx_REG (Pmode, regno),
8103 adjust_address (base_address, Pmode, offset));
8104 offset += UNITS_PER_WORD;
8108 /* Restore function stack, frame, and registers. */
8111 ix86_expand_epilogue (int style)
8115 struct ix86_frame frame;
8116 HOST_WIDE_INT offset;
8118 ix86_finalize_stack_realign_flags ();
8120 /* When stack is realigned, SP must be valid. */
8121 sp_valid = (!frame_pointer_needed
8122 || current_function_sp_is_unchanging
8123 || stack_realign_fp);
8125 ix86_compute_frame_layout (&frame);
8127 /* Calculate start of saved registers relative to ebp. Special care
8128 must be taken for the normal return case of a function using
8129 eh_return: the eax and edx registers are marked as saved, but not
8130 restored along this path. */
8131 offset = frame.nregs;
8132 if (crtl->calls_eh_return && style != 2)
8134 offset *= -UNITS_PER_WORD;
8136 /* If we're only restoring one register and sp is not valid then
8137 using a move instruction to restore the register since it's
8138 less work than reloading sp and popping the register.
8140 The default code result in stack adjustment using add/lea instruction,
8141 while this code results in LEAVE instruction (or discrete equivalent),
8142 so it is profitable in some other cases as well. Especially when there
8143 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8144 and there is exactly one register to pop. This heuristic may need some
8145 tuning in future. */
8146 if ((!sp_valid && frame.nregs <= 1)
8147 || (TARGET_EPILOGUE_USING_MOVE
8148 && cfun->machine->use_fast_prologue_epilogue
8149 && (frame.nregs > 1 || frame.to_allocate))
8150 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
8151 || (frame_pointer_needed && TARGET_USE_LEAVE
8152 && cfun->machine->use_fast_prologue_epilogue
8153 && frame.nregs == 1)
8154 || crtl->calls_eh_return)
8156 /* Restore registers. We can use ebp or esp to address the memory
8157 locations. If both are available, default to ebp, since offsets
8158 are known to be small. Only exception is esp pointing directly
8159 to the end of block of saved registers, where we may simplify
8162 If we are realigning stack with bp and sp, regs restore can't
8163 be addressed by bp. sp must be used instead. */
8165 if (!frame_pointer_needed
8166 || (sp_valid && !frame.to_allocate)
8167 || stack_realign_fp)
8168 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8169 frame.to_allocate, style == 2);
8171 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8172 offset, style == 2);
8174 /* eh_return epilogues need %ecx added to the stack pointer. */
8177 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8179 /* Stack align doesn't work with eh_return. */
8180 gcc_assert (!crtl->stack_realign_needed);
8182 if (frame_pointer_needed)
8184 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8185 tmp = plus_constant (tmp, UNITS_PER_WORD);
8186 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8188 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8189 emit_move_insn (hard_frame_pointer_rtx, tmp);
8191 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8196 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8197 tmp = plus_constant (tmp, (frame.to_allocate
8198 + frame.nregs * UNITS_PER_WORD));
8199 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8202 else if (!frame_pointer_needed)
8203 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8204 GEN_INT (frame.to_allocate
8205 + frame.nregs * UNITS_PER_WORD),
8207 /* If not an i386, mov & pop is faster than "leave". */
8208 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8209 || !cfun->machine->use_fast_prologue_epilogue)
8210 emit_insn ((*ix86_gen_leave) ());
8213 pro_epilogue_adjust_stack (stack_pointer_rtx,
8214 hard_frame_pointer_rtx,
8217 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8222 /* First step is to deallocate the stack frame so that we can
8225 If we realign stack with frame pointer, then stack pointer
8226 won't be able to recover via lea $offset(%bp), %sp, because
8227 there is a padding area between bp and sp for realign.
8228 "add $to_allocate, %sp" must be used instead. */
8231 gcc_assert (frame_pointer_needed);
8232 gcc_assert (!stack_realign_fp);
8233 pro_epilogue_adjust_stack (stack_pointer_rtx,
8234 hard_frame_pointer_rtx,
8235 GEN_INT (offset), style);
8237 else if (frame.to_allocate)
8238 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8239 GEN_INT (frame.to_allocate), style);
8241 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8242 if (ix86_save_reg (regno, false))
8243 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8244 if (frame_pointer_needed)
8246 /* Leave results in shorter dependency chains on CPUs that are
8247 able to grok it fast. */
8248 if (TARGET_USE_LEAVE)
8249 emit_insn ((*ix86_gen_leave) ());
8252 /* For stack realigned really happens, recover stack
8253 pointer to hard frame pointer is a must, if not using
8255 if (stack_realign_fp)
8256 pro_epilogue_adjust_stack (stack_pointer_rtx,
8257 hard_frame_pointer_rtx,
8259 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8264 if (crtl->drap_reg && crtl->stack_realign_needed)
8266 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8267 ? 0 : UNITS_PER_WORD);
8268 gcc_assert (stack_realign_drap);
8269 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8271 GEN_INT (-(UNITS_PER_WORD
8272 + param_ptr_offset))));
8273 if (!call_used_regs[REGNO (crtl->drap_reg)])
8274 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8278 /* Sibcall epilogues don't want a return instruction. */
8282 if (crtl->args.pops_args && crtl->args.size)
8284 rtx popc = GEN_INT (crtl->args.pops_args);
8286 /* i386 can only pop 64K bytes. If asked to pop more, pop
8287 return address, do explicit add, and jump indirectly to the
8290 if (crtl->args.pops_args >= 65536)
8292 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8294 /* There is no "pascal" calling convention in any 64bit ABI. */
8295 gcc_assert (!TARGET_64BIT);
8297 emit_insn (gen_popsi1 (ecx));
8298 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8299 emit_jump_insn (gen_return_indirect_internal (ecx));
8302 emit_jump_insn (gen_return_pop_internal (popc));
8305 emit_jump_insn (gen_return_internal ());
8308 /* Reset from the function's potential modifications. */
8311 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8312 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8314 if (pic_offset_table_rtx)
8315 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8317 /* Mach-O doesn't support labels at the end of objects, so if
8318 it looks like we might want one, insert a NOP. */
8320 rtx insn = get_last_insn ();
8323 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8324 insn = PREV_INSN (insn);
8328 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8329 fputs ("\tnop\n", file);
8335 /* Extract the parts of an RTL expression that is a valid memory address
8336 for an instruction. Return 0 if the structure of the address is
8337 grossly off. Return -1 if the address contains ASHIFT, so it is not
8338 strictly valid, but still used for computing length of lea instruction. */
8341 ix86_decompose_address (rtx addr, struct ix86_address *out)
8343 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8344 rtx base_reg, index_reg;
8345 HOST_WIDE_INT scale = 1;
8346 rtx scale_rtx = NULL_RTX;
8348 enum ix86_address_seg seg = SEG_DEFAULT;
8350 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8352 else if (GET_CODE (addr) == PLUS)
8362 addends[n++] = XEXP (op, 1);
8365 while (GET_CODE (op) == PLUS);
8370 for (i = n; i >= 0; --i)
8373 switch (GET_CODE (op))
8378 index = XEXP (op, 0);
8379 scale_rtx = XEXP (op, 1);
8383 if (XINT (op, 1) == UNSPEC_TP
8384 && TARGET_TLS_DIRECT_SEG_REFS
8385 && seg == SEG_DEFAULT)
8386 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8415 else if (GET_CODE (addr) == MULT)
8417 index = XEXP (addr, 0); /* index*scale */
8418 scale_rtx = XEXP (addr, 1);
8420 else if (GET_CODE (addr) == ASHIFT)
8424 /* We're called for lea too, which implements ashift on occasion. */
8425 index = XEXP (addr, 0);
8426 tmp = XEXP (addr, 1);
8427 if (!CONST_INT_P (tmp))
8429 scale = INTVAL (tmp);
8430 if ((unsigned HOST_WIDE_INT) scale > 3)
8436 disp = addr; /* displacement */
8438 /* Extract the integral value of scale. */
8441 if (!CONST_INT_P (scale_rtx))
8443 scale = INTVAL (scale_rtx);
8446 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8447 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8449 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8450 if (base_reg && index_reg && scale == 1
8451 && (index_reg == arg_pointer_rtx
8452 || index_reg == frame_pointer_rtx
8453 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8456 tmp = base, base = index, index = tmp;
8457 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8460 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8461 if ((base_reg == hard_frame_pointer_rtx
8462 || base_reg == frame_pointer_rtx
8463 || base_reg == arg_pointer_rtx) && !disp)
8466 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8467 Avoid this by transforming to [%esi+0].
8468 Reload calls address legitimization without cfun defined, so we need
8469 to test cfun for being non-NULL. */
8470 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8471 && base_reg && !index_reg && !disp
8473 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8476 /* Special case: encode reg+reg instead of reg*2. */
8477 if (!base && index && scale && scale == 2)
8478 base = index, base_reg = index_reg, scale = 1;
8480 /* Special case: scaling cannot be encoded without base or displacement. */
8481 if (!base && !disp && index && scale != 1)
8493 /* Return cost of the memory address x.
8494 For i386, it is better to use a complex address than let gcc copy
8495 the address into a reg and make a new pseudo. But not if the address
8496 requires to two regs - that would mean more pseudos with longer
8499 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8501 struct ix86_address parts;
8503 int ok = ix86_decompose_address (x, &parts);
8507 if (parts.base && GET_CODE (parts.base) == SUBREG)
8508 parts.base = SUBREG_REG (parts.base);
8509 if (parts.index && GET_CODE (parts.index) == SUBREG)
8510 parts.index = SUBREG_REG (parts.index);
8512 /* Attempt to minimize number of registers in the address. */
8514 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8516 && (!REG_P (parts.index)
8517 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8521 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8523 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8524 && parts.base != parts.index)
8527 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8528 since it's predecode logic can't detect the length of instructions
8529 and it degenerates to vector decoded. Increase cost of such
8530 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8531 to split such addresses or even refuse such addresses at all.
8533 Following addressing modes are affected:
8538 The first and last case may be avoidable by explicitly coding the zero in
8539 memory address, but I don't have AMD-K6 machine handy to check this
8543 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8544 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8545 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8551 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8552 this is used for to form addresses to local data when -fPIC is in
8556 darwin_local_data_pic (rtx disp)
8558 if (GET_CODE (disp) == MINUS)
8560 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
8561 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
8562 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
8564 const char *sym_name = XSTR (XEXP (disp, 1), 0);
8565 if (! strcmp (sym_name, "<pic base>"))
8573 /* Determine if a given RTX is a valid constant. We already know this
8574 satisfies CONSTANT_P. */
8577 legitimate_constant_p (rtx x)
8579 switch (GET_CODE (x))
8584 if (GET_CODE (x) == PLUS)
8586 if (!CONST_INT_P (XEXP (x, 1)))
8591 if (TARGET_MACHO && darwin_local_data_pic (x))
8594 /* Only some unspecs are valid as "constants". */
8595 if (GET_CODE (x) == UNSPEC)
8596 switch (XINT (x, 1))
8601 return TARGET_64BIT;
8604 x = XVECEXP (x, 0, 0);
8605 return (GET_CODE (x) == SYMBOL_REF
8606 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8608 x = XVECEXP (x, 0, 0);
8609 return (GET_CODE (x) == SYMBOL_REF
8610 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8615 /* We must have drilled down to a symbol. */
8616 if (GET_CODE (x) == LABEL_REF)
8618 if (GET_CODE (x) != SYMBOL_REF)
8623 /* TLS symbols are never valid. */
8624 if (SYMBOL_REF_TLS_MODEL (x))
8627 /* DLLIMPORT symbols are never valid. */
8628 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8629 && SYMBOL_REF_DLLIMPORT_P (x))
8634 if (GET_MODE (x) == TImode
8635 && x != CONST0_RTX (TImode)
8641 if (x == CONST0_RTX (GET_MODE (x)))
8649 /* Otherwise we handle everything else in the move patterns. */
8653 /* Determine if it's legal to put X into the constant pool. This
8654 is not possible for the address of thread-local symbols, which
8655 is checked above. */
8658 ix86_cannot_force_const_mem (rtx x)
8660 /* We can always put integral constants and vectors in memory. */
8661 switch (GET_CODE (x))
8671 return !legitimate_constant_p (x);
8674 /* Determine if a given RTX is a valid constant address. */
8677 constant_address_p (rtx x)
8679 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8682 /* Nonzero if the constant value X is a legitimate general operand
8683 when generating PIC code. It is given that flag_pic is on and
8684 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
8687 legitimate_pic_operand_p (rtx x)
8691 switch (GET_CODE (x))
8694 inner = XEXP (x, 0);
8695 if (GET_CODE (inner) == PLUS
8696 && CONST_INT_P (XEXP (inner, 1)))
8697 inner = XEXP (inner, 0);
8699 /* Only some unspecs are valid as "constants". */
8700 if (GET_CODE (inner) == UNSPEC)
8701 switch (XINT (inner, 1))
8706 return TARGET_64BIT;
8708 x = XVECEXP (inner, 0, 0);
8709 return (GET_CODE (x) == SYMBOL_REF
8710 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8718 return legitimate_pic_address_disp_p (x);
8725 /* Determine if a given CONST RTX is a valid memory displacement
8729 legitimate_pic_address_disp_p (rtx disp)
8733 /* In 64bit mode we can allow direct addresses of symbols and labels
8734 when they are not dynamic symbols. */
8737 rtx op0 = disp, op1;
8739 switch (GET_CODE (disp))
8745 if (GET_CODE (XEXP (disp, 0)) != PLUS)
8747 op0 = XEXP (XEXP (disp, 0), 0);
8748 op1 = XEXP (XEXP (disp, 0), 1);
8749 if (!CONST_INT_P (op1)
8750 || INTVAL (op1) >= 16*1024*1024
8751 || INTVAL (op1) < -16*1024*1024)
8753 if (GET_CODE (op0) == LABEL_REF)
8755 if (GET_CODE (op0) != SYMBOL_REF)
8760 /* TLS references should always be enclosed in UNSPEC. */
8761 if (SYMBOL_REF_TLS_MODEL (op0))
8763 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
8764 && ix86_cmodel != CM_LARGE_PIC)
8772 if (GET_CODE (disp) != CONST)
8774 disp = XEXP (disp, 0);
8778 /* We are unsafe to allow PLUS expressions. This limit allowed distance
8779 of GOT tables. We should not need these anyway. */
8780 if (GET_CODE (disp) != UNSPEC
8781 || (XINT (disp, 1) != UNSPEC_GOTPCREL
8782 && XINT (disp, 1) != UNSPEC_GOTOFF
8783 && XINT (disp, 1) != UNSPEC_PLTOFF))
8786 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
8787 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
8793 if (GET_CODE (disp) == PLUS)
8795 if (!CONST_INT_P (XEXP (disp, 1)))
8797 disp = XEXP (disp, 0);
8801 if (TARGET_MACHO && darwin_local_data_pic (disp))
8804 if (GET_CODE (disp) != UNSPEC)
8807 switch (XINT (disp, 1))
8812 /* We need to check for both symbols and labels because VxWorks loads
8813 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
8815 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8816 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8818 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
8819 While ABI specify also 32bit relocation but we don't produce it in
8820 small PIC model at all. */
8821 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8822 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
8824 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
8826 case UNSPEC_GOTTPOFF:
8827 case UNSPEC_GOTNTPOFF:
8828 case UNSPEC_INDNTPOFF:
8831 disp = XVECEXP (disp, 0, 0);
8832 return (GET_CODE (disp) == SYMBOL_REF
8833 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
8835 disp = XVECEXP (disp, 0, 0);
8836 return (GET_CODE (disp) == SYMBOL_REF
8837 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
8839 disp = XVECEXP (disp, 0, 0);
8840 return (GET_CODE (disp) == SYMBOL_REF
8841 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
8847 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
8848 memory address for an instruction. The MODE argument is the machine mode
8849 for the MEM expression that wants to use this address.
8851 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
8852 convert common non-canonical forms to canonical form so that they will
8856 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
8857 rtx addr, int strict)
8859 struct ix86_address parts;
8860 rtx base, index, disp;
8861 HOST_WIDE_INT scale;
8862 const char *reason = NULL;
8863 rtx reason_rtx = NULL_RTX;
8865 if (ix86_decompose_address (addr, &parts) <= 0)
8867 reason = "decomposition failed";
8872 index = parts.index;
8874 scale = parts.scale;
8876 /* Validate base register.
8878 Don't allow SUBREG's that span more than a word here. It can lead to spill
8879 failures when the base is one word out of a two word structure, which is
8880 represented internally as a DImode int. */
8889 else if (GET_CODE (base) == SUBREG
8890 && REG_P (SUBREG_REG (base))
8891 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
8893 reg = SUBREG_REG (base);
8896 reason = "base is not a register";
8900 if (GET_MODE (base) != Pmode)
8902 reason = "base is not in Pmode";
8906 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
8907 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
8909 reason = "base is not valid";
8914 /* Validate index register.
8916 Don't allow SUBREG's that span more than a word here -- same as above. */
8925 else if (GET_CODE (index) == SUBREG
8926 && REG_P (SUBREG_REG (index))
8927 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
8929 reg = SUBREG_REG (index);
8932 reason = "index is not a register";
8936 if (GET_MODE (index) != Pmode)
8938 reason = "index is not in Pmode";
8942 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
8943 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
8945 reason = "index is not valid";
8950 /* Validate scale factor. */
8953 reason_rtx = GEN_INT (scale);
8956 reason = "scale without index";
8960 if (scale != 2 && scale != 4 && scale != 8)
8962 reason = "scale is not a valid multiplier";
8967 /* Validate displacement. */
8972 if (GET_CODE (disp) == CONST
8973 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
8974 switch (XINT (XEXP (disp, 0), 1))
8976 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
8977 used. While ABI specify also 32bit relocations, we don't produce
8978 them at all and use IP relative instead. */
8981 gcc_assert (flag_pic);
8983 goto is_legitimate_pic;
8984 reason = "64bit address unspec";
8987 case UNSPEC_GOTPCREL:
8988 gcc_assert (flag_pic);
8989 goto is_legitimate_pic;
8991 case UNSPEC_GOTTPOFF:
8992 case UNSPEC_GOTNTPOFF:
8993 case UNSPEC_INDNTPOFF:
8999 reason = "invalid address unspec";
9003 else if (SYMBOLIC_CONST (disp)
9007 && MACHOPIC_INDIRECT
9008 && !machopic_operand_p (disp)
9014 if (TARGET_64BIT && (index || base))
9016 /* foo@dtpoff(%rX) is ok. */
9017 if (GET_CODE (disp) != CONST
9018 || GET_CODE (XEXP (disp, 0)) != PLUS
9019 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9020 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9021 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9022 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9024 reason = "non-constant pic memory reference";
9028 else if (! legitimate_pic_address_disp_p (disp))
9030 reason = "displacement is an invalid pic construct";
9034 /* This code used to verify that a symbolic pic displacement
9035 includes the pic_offset_table_rtx register.
9037 While this is good idea, unfortunately these constructs may
9038 be created by "adds using lea" optimization for incorrect
9047 This code is nonsensical, but results in addressing
9048 GOT table with pic_offset_table_rtx base. We can't
9049 just refuse it easily, since it gets matched by
9050 "addsi3" pattern, that later gets split to lea in the
9051 case output register differs from input. While this
9052 can be handled by separate addsi pattern for this case
9053 that never results in lea, this seems to be easier and
9054 correct fix for crash to disable this test. */
9056 else if (GET_CODE (disp) != LABEL_REF
9057 && !CONST_INT_P (disp)
9058 && (GET_CODE (disp) != CONST
9059 || !legitimate_constant_p (disp))
9060 && (GET_CODE (disp) != SYMBOL_REF
9061 || !legitimate_constant_p (disp)))
9063 reason = "displacement is not constant";
9066 else if (TARGET_64BIT
9067 && !x86_64_immediate_operand (disp, VOIDmode))
9069 reason = "displacement is out of range";
9074 /* Everything looks valid. */
9081 /* Return a unique alias set for the GOT. */
9083 static alias_set_type
9084 ix86_GOT_alias_set (void)
9086 static alias_set_type set = -1;
9088 set = new_alias_set ();
9092 /* Return a legitimate reference for ORIG (an address) using the
9093 register REG. If REG is 0, a new pseudo is generated.
9095 There are two types of references that must be handled:
9097 1. Global data references must load the address from the GOT, via
9098 the PIC reg. An insn is emitted to do this load, and the reg is
9101 2. Static data references, constant pool addresses, and code labels
9102 compute the address as an offset from the GOT, whose base is in
9103 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9104 differentiate them from global data objects. The returned
9105 address is the PIC reg + an unspec constant.
9107 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9108 reg also appears in the address. */
9111 legitimize_pic_address (rtx orig, rtx reg)
9118 if (TARGET_MACHO && !TARGET_64BIT)
9121 reg = gen_reg_rtx (Pmode);
9122 /* Use the generic Mach-O PIC machinery. */
9123 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9127 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9129 else if (TARGET_64BIT
9130 && ix86_cmodel != CM_SMALL_PIC
9131 && gotoff_operand (addr, Pmode))
9134 /* This symbol may be referenced via a displacement from the PIC
9135 base address (@GOTOFF). */
9137 if (reload_in_progress)
9138 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9139 if (GET_CODE (addr) == CONST)
9140 addr = XEXP (addr, 0);
9141 if (GET_CODE (addr) == PLUS)
9143 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9145 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9148 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9149 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9151 tmpreg = gen_reg_rtx (Pmode);
9154 emit_move_insn (tmpreg, new_rtx);
9158 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9159 tmpreg, 1, OPTAB_DIRECT);
9162 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9164 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9166 /* This symbol may be referenced via a displacement from the PIC
9167 base address (@GOTOFF). */
9169 if (reload_in_progress)
9170 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9171 if (GET_CODE (addr) == CONST)
9172 addr = XEXP (addr, 0);
9173 if (GET_CODE (addr) == PLUS)
9175 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9177 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9180 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9181 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9182 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9186 emit_move_insn (reg, new_rtx);
9190 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9191 /* We can't use @GOTOFF for text labels on VxWorks;
9192 see gotoff_operand. */
9193 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9195 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9197 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9198 return legitimize_dllimport_symbol (addr, true);
9199 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9200 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9201 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9203 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9204 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9208 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9210 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9211 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9212 new_rtx = gen_const_mem (Pmode, new_rtx);
9213 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9216 reg = gen_reg_rtx (Pmode);
9217 /* Use directly gen_movsi, otherwise the address is loaded
9218 into register for CSE. We don't want to CSE this addresses,
9219 instead we CSE addresses from the GOT table, so skip this. */
9220 emit_insn (gen_movsi (reg, new_rtx));
9225 /* This symbol must be referenced via a load from the
9226 Global Offset Table (@GOT). */
9228 if (reload_in_progress)
9229 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9230 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9231 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9233 new_rtx = force_reg (Pmode, new_rtx);
9234 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9235 new_rtx = gen_const_mem (Pmode, new_rtx);
9236 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9239 reg = gen_reg_rtx (Pmode);
9240 emit_move_insn (reg, new_rtx);
9246 if (CONST_INT_P (addr)
9247 && !x86_64_immediate_operand (addr, VOIDmode))
9251 emit_move_insn (reg, addr);
9255 new_rtx = force_reg (Pmode, addr);
9257 else if (GET_CODE (addr) == CONST)
9259 addr = XEXP (addr, 0);
9261 /* We must match stuff we generate before. Assume the only
9262 unspecs that can get here are ours. Not that we could do
9263 anything with them anyway.... */
9264 if (GET_CODE (addr) == UNSPEC
9265 || (GET_CODE (addr) == PLUS
9266 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9268 gcc_assert (GET_CODE (addr) == PLUS);
9270 if (GET_CODE (addr) == PLUS)
9272 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9274 /* Check first to see if this is a constant offset from a @GOTOFF
9275 symbol reference. */
9276 if (gotoff_operand (op0, Pmode)
9277 && CONST_INT_P (op1))
9281 if (reload_in_progress)
9282 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9283 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9285 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9286 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9287 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9291 emit_move_insn (reg, new_rtx);
9297 if (INTVAL (op1) < -16*1024*1024
9298 || INTVAL (op1) >= 16*1024*1024)
9300 if (!x86_64_immediate_operand (op1, Pmode))
9301 op1 = force_reg (Pmode, op1);
9302 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9308 base = legitimize_pic_address (XEXP (addr, 0), reg);
9309 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9310 base == reg ? NULL_RTX : reg);
9312 if (CONST_INT_P (new_rtx))
9313 new_rtx = plus_constant (base, INTVAL (new_rtx));
9316 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9318 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9319 new_rtx = XEXP (new_rtx, 1);
9321 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9329 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9332 get_thread_pointer (int to_reg)
9336 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9340 reg = gen_reg_rtx (Pmode);
9341 insn = gen_rtx_SET (VOIDmode, reg, tp);
9342 insn = emit_insn (insn);
9347 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9348 false if we expect this to be used for a memory address and true if
9349 we expect to load the address into a register. */
9352 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9354 rtx dest, base, off, pic, tp;
9359 case TLS_MODEL_GLOBAL_DYNAMIC:
9360 dest = gen_reg_rtx (Pmode);
9361 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9363 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9365 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9368 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9369 insns = get_insns ();
9372 RTL_CONST_CALL_P (insns) = 1;
9373 emit_libcall_block (insns, dest, rax, x);
9375 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9376 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9378 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9380 if (TARGET_GNU2_TLS)
9382 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9384 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9388 case TLS_MODEL_LOCAL_DYNAMIC:
9389 base = gen_reg_rtx (Pmode);
9390 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9392 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9394 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9397 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9398 insns = get_insns ();
9401 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9402 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9403 RTL_CONST_CALL_P (insns) = 1;
9404 emit_libcall_block (insns, base, rax, note);
9406 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9407 emit_insn (gen_tls_local_dynamic_base_64 (base));
9409 emit_insn (gen_tls_local_dynamic_base_32 (base));
9411 if (TARGET_GNU2_TLS)
9413 rtx x = ix86_tls_module_base ();
9415 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9416 gen_rtx_MINUS (Pmode, x, tp));
9419 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9420 off = gen_rtx_CONST (Pmode, off);
9422 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9424 if (TARGET_GNU2_TLS)
9426 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9428 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9433 case TLS_MODEL_INITIAL_EXEC:
9437 type = UNSPEC_GOTNTPOFF;
9441 if (reload_in_progress)
9442 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9443 pic = pic_offset_table_rtx;
9444 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9446 else if (!TARGET_ANY_GNU_TLS)
9448 pic = gen_reg_rtx (Pmode);
9449 emit_insn (gen_set_got (pic));
9450 type = UNSPEC_GOTTPOFF;
9455 type = UNSPEC_INDNTPOFF;
9458 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9459 off = gen_rtx_CONST (Pmode, off);
9461 off = gen_rtx_PLUS (Pmode, pic, off);
9462 off = gen_const_mem (Pmode, off);
9463 set_mem_alias_set (off, ix86_GOT_alias_set ());
9465 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9467 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9468 off = force_reg (Pmode, off);
9469 return gen_rtx_PLUS (Pmode, base, off);
9473 base = get_thread_pointer (true);
9474 dest = gen_reg_rtx (Pmode);
9475 emit_insn (gen_subsi3 (dest, base, off));
9479 case TLS_MODEL_LOCAL_EXEC:
9480 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9481 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9482 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9483 off = gen_rtx_CONST (Pmode, off);
9485 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9487 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9488 return gen_rtx_PLUS (Pmode, base, off);
9492 base = get_thread_pointer (true);
9493 dest = gen_reg_rtx (Pmode);
9494 emit_insn (gen_subsi3 (dest, base, off));
9505 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9508 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9509 htab_t dllimport_map;
9512 get_dllimport_decl (tree decl)
9514 struct tree_map *h, in;
9518 size_t namelen, prefixlen;
9524 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9526 in.hash = htab_hash_pointer (decl);
9527 in.base.from = decl;
9528 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9529 h = (struct tree_map *) *loc;
9533 *loc = h = GGC_NEW (struct tree_map);
9535 h->base.from = decl;
9536 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9537 DECL_ARTIFICIAL (to) = 1;
9538 DECL_IGNORED_P (to) = 1;
9539 DECL_EXTERNAL (to) = 1;
9540 TREE_READONLY (to) = 1;
9542 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9543 name = targetm.strip_name_encoding (name);
9544 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9545 ? "*__imp_" : "*__imp__";
9546 namelen = strlen (name);
9547 prefixlen = strlen (prefix);
9548 imp_name = (char *) alloca (namelen + prefixlen + 1);
9549 memcpy (imp_name, prefix, prefixlen);
9550 memcpy (imp_name + prefixlen, name, namelen + 1);
9552 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9553 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9554 SET_SYMBOL_REF_DECL (rtl, to);
9555 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9557 rtl = gen_const_mem (Pmode, rtl);
9558 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9560 SET_DECL_RTL (to, rtl);
9561 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9566 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9567 true if we require the result be a register. */
9570 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9575 gcc_assert (SYMBOL_REF_DECL (symbol));
9576 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9578 x = DECL_RTL (imp_decl);
9580 x = force_reg (Pmode, x);
9584 /* Try machine-dependent ways of modifying an illegitimate address
9585 to be legitimate. If we find one, return the new, valid address.
9586 This macro is used in only one place: `memory_address' in explow.c.
9588 OLDX is the address as it was before break_out_memory_refs was called.
9589 In some cases it is useful to look at this to decide what needs to be done.
9591 MODE and WIN are passed so that this macro can use
9592 GO_IF_LEGITIMATE_ADDRESS.
9594 It is always safe for this macro to do nothing. It exists to recognize
9595 opportunities to optimize the output.
9597 For the 80386, we handle X+REG by loading X into a register R and
9598 using R+REG. R will go in a general reg and indexing will be used.
9599 However, if REG is a broken-out memory address or multiplication,
9600 nothing needs to be done because REG can certainly go in a general reg.
9602 When -fpic is used, special handling is needed for symbolic references.
9603 See comments by legitimize_pic_address in i386.c for details. */
9606 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9611 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9613 return legitimize_tls_address (x, (enum tls_model) log, false);
9614 if (GET_CODE (x) == CONST
9615 && GET_CODE (XEXP (x, 0)) == PLUS
9616 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9617 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9619 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9620 (enum tls_model) log, false);
9621 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9624 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9626 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9627 return legitimize_dllimport_symbol (x, true);
9628 if (GET_CODE (x) == CONST
9629 && GET_CODE (XEXP (x, 0)) == PLUS
9630 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9631 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9633 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9634 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9638 if (flag_pic && SYMBOLIC_CONST (x))
9639 return legitimize_pic_address (x, 0);
9641 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9642 if (GET_CODE (x) == ASHIFT
9643 && CONST_INT_P (XEXP (x, 1))
9644 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9647 log = INTVAL (XEXP (x, 1));
9648 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9649 GEN_INT (1 << log));
9652 if (GET_CODE (x) == PLUS)
9654 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9656 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9657 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9658 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9661 log = INTVAL (XEXP (XEXP (x, 0), 1));
9662 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9663 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9664 GEN_INT (1 << log));
9667 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9668 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9669 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9672 log = INTVAL (XEXP (XEXP (x, 1), 1));
9673 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9674 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9675 GEN_INT (1 << log));
9678 /* Put multiply first if it isn't already. */
9679 if (GET_CODE (XEXP (x, 1)) == MULT)
9681 rtx tmp = XEXP (x, 0);
9682 XEXP (x, 0) = XEXP (x, 1);
9687 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
9688 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
9689 created by virtual register instantiation, register elimination, and
9690 similar optimizations. */
9691 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
9694 x = gen_rtx_PLUS (Pmode,
9695 gen_rtx_PLUS (Pmode, XEXP (x, 0),
9696 XEXP (XEXP (x, 1), 0)),
9697 XEXP (XEXP (x, 1), 1));
9701 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
9702 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
9703 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
9704 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9705 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
9706 && CONSTANT_P (XEXP (x, 1)))
9709 rtx other = NULL_RTX;
9711 if (CONST_INT_P (XEXP (x, 1)))
9713 constant = XEXP (x, 1);
9714 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
9716 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
9718 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
9719 other = XEXP (x, 1);
9727 x = gen_rtx_PLUS (Pmode,
9728 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
9729 XEXP (XEXP (XEXP (x, 0), 1), 0)),
9730 plus_constant (other, INTVAL (constant)));
9734 if (changed && legitimate_address_p (mode, x, FALSE))
9737 if (GET_CODE (XEXP (x, 0)) == MULT)
9740 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
9743 if (GET_CODE (XEXP (x, 1)) == MULT)
9746 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
9750 && REG_P (XEXP (x, 1))
9751 && REG_P (XEXP (x, 0)))
9754 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
9757 x = legitimize_pic_address (x, 0);
9760 if (changed && legitimate_address_p (mode, x, FALSE))
9763 if (REG_P (XEXP (x, 0)))
9765 rtx temp = gen_reg_rtx (Pmode);
9766 rtx val = force_operand (XEXP (x, 1), temp);
9768 emit_move_insn (temp, val);
9774 else if (REG_P (XEXP (x, 1)))
9776 rtx temp = gen_reg_rtx (Pmode);
9777 rtx val = force_operand (XEXP (x, 0), temp);
9779 emit_move_insn (temp, val);
9789 /* Print an integer constant expression in assembler syntax. Addition
9790 and subtraction are the only arithmetic that may appear in these
9791 expressions. FILE is the stdio stream to write to, X is the rtx, and
9792 CODE is the operand print code from the output string. */
9795 output_pic_addr_const (FILE *file, rtx x, int code)
9799 switch (GET_CODE (x))
9802 gcc_assert (flag_pic);
9807 if (! TARGET_MACHO || TARGET_64BIT)
9808 output_addr_const (file, x);
9811 const char *name = XSTR (x, 0);
9813 /* Mark the decl as referenced so that cgraph will
9814 output the function. */
9815 if (SYMBOL_REF_DECL (x))
9816 mark_decl_referenced (SYMBOL_REF_DECL (x));
9819 if (MACHOPIC_INDIRECT
9820 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
9821 name = machopic_indirection_name (x, /*stub_p=*/true);
9823 assemble_name (file, name);
9825 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
9826 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
9827 fputs ("@PLT", file);
9834 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
9835 assemble_name (asm_out_file, buf);
9839 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9843 /* This used to output parentheses around the expression,
9844 but that does not work on the 386 (either ATT or BSD assembler). */
9845 output_pic_addr_const (file, XEXP (x, 0), code);
9849 if (GET_MODE (x) == VOIDmode)
9851 /* We can use %d if the number is <32 bits and positive. */
9852 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
9853 fprintf (file, "0x%lx%08lx",
9854 (unsigned long) CONST_DOUBLE_HIGH (x),
9855 (unsigned long) CONST_DOUBLE_LOW (x));
9857 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
9860 /* We can't handle floating point constants;
9861 PRINT_OPERAND must handle them. */
9862 output_operand_lossage ("floating constant misused");
9866 /* Some assemblers need integer constants to appear first. */
9867 if (CONST_INT_P (XEXP (x, 0)))
9869 output_pic_addr_const (file, XEXP (x, 0), code);
9871 output_pic_addr_const (file, XEXP (x, 1), code);
9875 gcc_assert (CONST_INT_P (XEXP (x, 1)));
9876 output_pic_addr_const (file, XEXP (x, 1), code);
9878 output_pic_addr_const (file, XEXP (x, 0), code);
9884 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
9885 output_pic_addr_const (file, XEXP (x, 0), code);
9887 output_pic_addr_const (file, XEXP (x, 1), code);
9889 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
9893 gcc_assert (XVECLEN (x, 0) == 1);
9894 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
9895 switch (XINT (x, 1))
9898 fputs ("@GOT", file);
9901 fputs ("@GOTOFF", file);
9904 fputs ("@PLTOFF", file);
9906 case UNSPEC_GOTPCREL:
9907 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9908 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
9910 case UNSPEC_GOTTPOFF:
9911 /* FIXME: This might be @TPOFF in Sun ld too. */
9912 fputs ("@GOTTPOFF", file);
9915 fputs ("@TPOFF", file);
9919 fputs ("@TPOFF", file);
9921 fputs ("@NTPOFF", file);
9924 fputs ("@DTPOFF", file);
9926 case UNSPEC_GOTNTPOFF:
9928 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9929 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
9931 fputs ("@GOTNTPOFF", file);
9933 case UNSPEC_INDNTPOFF:
9934 fputs ("@INDNTPOFF", file);
9937 output_operand_lossage ("invalid UNSPEC as operand");
9943 output_operand_lossage ("invalid expression as operand");
9947 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9948 We need to emit DTP-relative relocations. */
9950 static void ATTRIBUTE_UNUSED
9951 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
9953 fputs (ASM_LONG, file);
9954 output_addr_const (file, x);
9955 fputs ("@DTPOFF", file);
9961 fputs (", 0", file);
9968 /* In the name of slightly smaller debug output, and to cater to
9969 general assembler lossage, recognize PIC+GOTOFF and turn it back
9970 into a direct symbol reference.
9972 On Darwin, this is necessary to avoid a crash, because Darwin
9973 has a different PIC label for each routine but the DWARF debugging
9974 information is not associated with any particular routine, so it's
9975 necessary to remove references to the PIC label from RTL stored by
9976 the DWARF output code. */
9979 ix86_delegitimize_address (rtx orig_x)
9982 /* reg_addend is NULL or a multiple of some register. */
9983 rtx reg_addend = NULL_RTX;
9984 /* const_addend is NULL or a const_int. */
9985 rtx const_addend = NULL_RTX;
9986 /* This is the result, or NULL. */
9987 rtx result = NULL_RTX;
9994 if (GET_CODE (x) != CONST
9995 || GET_CODE (XEXP (x, 0)) != UNSPEC
9996 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
9999 return XVECEXP (XEXP (x, 0), 0, 0);
10002 if (GET_CODE (x) != PLUS
10003 || GET_CODE (XEXP (x, 1)) != CONST)
10006 if (REG_P (XEXP (x, 0))
10007 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
10008 /* %ebx + GOT/GOTOFF */
10010 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10012 /* %ebx + %reg * scale + GOT/GOTOFF */
10013 reg_addend = XEXP (x, 0);
10014 if (REG_P (XEXP (reg_addend, 0))
10015 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
10016 reg_addend = XEXP (reg_addend, 1);
10017 else if (REG_P (XEXP (reg_addend, 1))
10018 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
10019 reg_addend = XEXP (reg_addend, 0);
10022 if (!REG_P (reg_addend)
10023 && GET_CODE (reg_addend) != MULT
10024 && GET_CODE (reg_addend) != ASHIFT)
10030 x = XEXP (XEXP (x, 1), 0);
10031 if (GET_CODE (x) == PLUS
10032 && CONST_INT_P (XEXP (x, 1)))
10034 const_addend = XEXP (x, 1);
10038 if (GET_CODE (x) == UNSPEC
10039 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10040 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10041 result = XVECEXP (x, 0, 0);
10043 if (TARGET_MACHO && darwin_local_data_pic (x)
10044 && !MEM_P (orig_x))
10045 result = XEXP (x, 0);
10051 result = gen_rtx_PLUS (Pmode, result, const_addend);
10053 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10057 /* If X is a machine specific address (i.e. a symbol or label being
10058 referenced as a displacement from the GOT implemented using an
10059 UNSPEC), then return the base term. Otherwise return X. */
10062 ix86_find_base_term (rtx x)
10068 if (GET_CODE (x) != CONST)
10070 term = XEXP (x, 0);
10071 if (GET_CODE (term) == PLUS
10072 && (CONST_INT_P (XEXP (term, 1))
10073 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10074 term = XEXP (term, 0);
10075 if (GET_CODE (term) != UNSPEC
10076 || XINT (term, 1) != UNSPEC_GOTPCREL)
10079 term = XVECEXP (term, 0, 0);
10081 if (GET_CODE (term) != SYMBOL_REF
10082 && GET_CODE (term) != LABEL_REF)
10088 term = ix86_delegitimize_address (x);
10090 if (GET_CODE (term) != SYMBOL_REF
10091 && GET_CODE (term) != LABEL_REF)
10098 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10099 int fp, FILE *file)
10101 const char *suffix;
10103 if (mode == CCFPmode || mode == CCFPUmode)
10105 enum rtx_code second_code, bypass_code;
10106 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10107 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10108 code = ix86_fp_compare_code_to_integer (code);
10112 code = reverse_condition (code);
10163 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10167 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10168 Those same assemblers have the same but opposite lossage on cmov. */
10169 if (mode == CCmode)
10170 suffix = fp ? "nbe" : "a";
10171 else if (mode == CCCmode)
10174 gcc_unreachable ();
10190 gcc_unreachable ();
10194 gcc_assert (mode == CCmode || mode == CCCmode);
10211 gcc_unreachable ();
10215 /* ??? As above. */
10216 gcc_assert (mode == CCmode || mode == CCCmode);
10217 suffix = fp ? "nb" : "ae";
10220 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10224 /* ??? As above. */
10225 if (mode == CCmode)
10227 else if (mode == CCCmode)
10228 suffix = fp ? "nb" : "ae";
10230 gcc_unreachable ();
10233 suffix = fp ? "u" : "p";
10236 suffix = fp ? "nu" : "np";
10239 gcc_unreachable ();
10241 fputs (suffix, file);
10244 /* Print the name of register X to FILE based on its machine mode and number.
10245 If CODE is 'w', pretend the mode is HImode.
10246 If CODE is 'b', pretend the mode is QImode.
10247 If CODE is 'k', pretend the mode is SImode.
10248 If CODE is 'q', pretend the mode is DImode.
10249 If CODE is 'x', pretend the mode is V4SFmode.
10250 If CODE is 't', pretend the mode is V8SFmode.
10251 If CODE is 'h', pretend the reg is the 'high' byte register.
10252 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10253 If CODE is 'd', duplicate the operand for AVX instruction.
10257 print_reg (rtx x, int code, FILE *file)
10260 bool duplicated = code == 'd' && TARGET_AVX;
10262 gcc_assert (x == pc_rtx
10263 || (REGNO (x) != ARG_POINTER_REGNUM
10264 && REGNO (x) != FRAME_POINTER_REGNUM
10265 && REGNO (x) != FLAGS_REG
10266 && REGNO (x) != FPSR_REG
10267 && REGNO (x) != FPCR_REG));
10269 if (ASSEMBLER_DIALECT == ASM_ATT)
10274 gcc_assert (TARGET_64BIT);
10275 fputs ("rip", file);
10279 if (code == 'w' || MMX_REG_P (x))
10281 else if (code == 'b')
10283 else if (code == 'k')
10285 else if (code == 'q')
10287 else if (code == 'y')
10289 else if (code == 'h')
10291 else if (code == 'x')
10293 else if (code == 't')
10296 code = GET_MODE_SIZE (GET_MODE (x));
10298 /* Irritatingly, AMD extended registers use different naming convention
10299 from the normal registers. */
10300 if (REX_INT_REG_P (x))
10302 gcc_assert (TARGET_64BIT);
10306 error ("extended registers have no high halves");
10309 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10312 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10315 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10318 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10321 error ("unsupported operand size for extended register");
10331 if (STACK_TOP_P (x))
10340 if (! ANY_FP_REG_P (x))
10341 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10346 reg = hi_reg_name[REGNO (x)];
10349 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10351 reg = qi_reg_name[REGNO (x)];
10354 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10356 reg = qi_high_reg_name[REGNO (x)];
10361 gcc_assert (!duplicated);
10363 fputs (hi_reg_name[REGNO (x)] + 1, file);
10368 gcc_unreachable ();
10374 if (ASSEMBLER_DIALECT == ASM_ATT)
10375 fprintf (file, ", %%%s", reg);
10377 fprintf (file, ", %s", reg);
10381 /* Locate some local-dynamic symbol still in use by this function
10382 so that we can print its name in some tls_local_dynamic_base
10386 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10390 if (GET_CODE (x) == SYMBOL_REF
10391 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10393 cfun->machine->some_ld_name = XSTR (x, 0);
10400 static const char *
10401 get_some_local_dynamic_name (void)
10405 if (cfun->machine->some_ld_name)
10406 return cfun->machine->some_ld_name;
10408 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10410 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10411 return cfun->machine->some_ld_name;
10413 gcc_unreachable ();
10416 /* Meaning of CODE:
10417 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10418 C -- print opcode suffix for set/cmov insn.
10419 c -- like C, but print reversed condition
10420 E,e -- likewise, but for compare-and-branch fused insn.
10421 F,f -- likewise, but for floating-point.
10422 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10424 R -- print the prefix for register names.
10425 z -- print the opcode suffix for the size of the current operand.
10426 * -- print a star (in certain assembler syntax)
10427 A -- print an absolute memory reference.
10428 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10429 s -- print a shift double count, followed by the assemblers argument
10431 b -- print the QImode name of the register for the indicated operand.
10432 %b0 would print %al if operands[0] is reg 0.
10433 w -- likewise, print the HImode name of the register.
10434 k -- likewise, print the SImode name of the register.
10435 q -- likewise, print the DImode name of the register.
10436 x -- likewise, print the V4SFmode name of the register.
10437 t -- likewise, print the V8SFmode name of the register.
10438 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10439 y -- print "st(0)" instead of "st" as a register.
10440 d -- print duplicated register operand for AVX instruction.
10441 D -- print condition for SSE cmp instruction.
10442 P -- if PIC, print an @PLT suffix.
10443 X -- don't print any sort of PIC '@' suffix for a symbol.
10444 & -- print some in-use local-dynamic symbol name.
10445 H -- print a memory address offset by 8; used for sse high-parts
10446 Y -- print condition for SSE5 com* instruction.
10447 + -- print a branch hint as 'cs' or 'ds' prefix
10448 ; -- print a semicolon (after prefixes due to bug in older gas).
10452 print_operand (FILE *file, rtx x, int code)
10459 if (ASSEMBLER_DIALECT == ASM_ATT)
10464 assemble_name (file, get_some_local_dynamic_name ());
10468 switch (ASSEMBLER_DIALECT)
10475 /* Intel syntax. For absolute addresses, registers should not
10476 be surrounded by braces. */
10480 PRINT_OPERAND (file, x, 0);
10487 gcc_unreachable ();
10490 PRINT_OPERAND (file, x, 0);
10495 if (ASSEMBLER_DIALECT == ASM_ATT)
10500 if (ASSEMBLER_DIALECT == ASM_ATT)
10505 if (ASSEMBLER_DIALECT == ASM_ATT)
10510 if (ASSEMBLER_DIALECT == ASM_ATT)
10515 if (ASSEMBLER_DIALECT == ASM_ATT)
10520 if (ASSEMBLER_DIALECT == ASM_ATT)
10525 /* 387 opcodes don't get size suffixes if the operands are
10527 if (STACK_REG_P (x))
10530 /* Likewise if using Intel opcodes. */
10531 if (ASSEMBLER_DIALECT == ASM_INTEL)
10534 /* This is the size of op from size of operand. */
10535 switch (GET_MODE_SIZE (GET_MODE (x)))
10544 #ifdef HAVE_GAS_FILDS_FISTS
10554 if (GET_MODE (x) == SFmode)
10569 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10573 #ifdef GAS_MNEMONICS
10588 gcc_unreachable ();
10605 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10607 PRINT_OPERAND (file, x, 0);
10608 fputs (", ", file);
10613 /* Little bit of braindamage here. The SSE compare instructions
10614 does use completely different names for the comparisons that the
10615 fp conditional moves. */
10618 switch (GET_CODE (x))
10621 fputs ("eq", file);
10624 fputs ("eq_us", file);
10627 fputs ("lt", file);
10630 fputs ("nge", file);
10633 fputs ("le", file);
10636 fputs ("ngt", file);
10639 fputs ("unord", file);
10642 fputs ("neq", file);
10645 fputs ("neq_oq", file);
10648 fputs ("ge", file);
10651 fputs ("nlt", file);
10654 fputs ("gt", file);
10657 fputs ("nle", file);
10660 fputs ("ord", file);
10663 gcc_unreachable ();
10668 switch (GET_CODE (x))
10672 fputs ("eq", file);
10676 fputs ("lt", file);
10680 fputs ("le", file);
10683 fputs ("unord", file);
10687 fputs ("neq", file);
10691 fputs ("nlt", file);
10695 fputs ("nle", file);
10698 fputs ("ord", file);
10701 gcc_unreachable ();
10706 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10707 if (ASSEMBLER_DIALECT == ASM_ATT)
10709 switch (GET_MODE (x))
10711 case HImode: putc ('w', file); break;
10713 case SFmode: putc ('l', file); break;
10715 case DFmode: putc ('q', file); break;
10716 default: gcc_unreachable ();
10723 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
10726 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10727 if (ASSEMBLER_DIALECT == ASM_ATT)
10730 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
10733 /* Like above, but reverse condition */
10735 /* Check to see if argument to %c is really a constant
10736 and not a condition code which needs to be reversed. */
10737 if (!COMPARISON_P (x))
10739 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
10742 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
10745 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10746 if (ASSEMBLER_DIALECT == ASM_ATT)
10749 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
10753 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
10757 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
10761 /* It doesn't actually matter what mode we use here, as we're
10762 only going to use this for printing. */
10763 x = adjust_address_nv (x, DImode, 8);
10771 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
10774 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
10777 int pred_val = INTVAL (XEXP (x, 0));
10779 if (pred_val < REG_BR_PROB_BASE * 45 / 100
10780 || pred_val > REG_BR_PROB_BASE * 55 / 100)
10782 int taken = pred_val > REG_BR_PROB_BASE / 2;
10783 int cputaken = final_forward_branch_p (current_output_insn) == 0;
10785 /* Emit hints only in the case default branch prediction
10786 heuristics would fail. */
10787 if (taken != cputaken)
10789 /* We use 3e (DS) prefix for taken branches and
10790 2e (CS) prefix for not taken branches. */
10792 fputs ("ds ; ", file);
10794 fputs ("cs ; ", file);
10802 switch (GET_CODE (x))
10805 fputs ("neq", file);
10808 fputs ("eq", file);
10812 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
10816 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
10820 fputs ("le", file);
10824 fputs ("lt", file);
10827 fputs ("unord", file);
10830 fputs ("ord", file);
10833 fputs ("ueq", file);
10836 fputs ("nlt", file);
10839 fputs ("nle", file);
10842 fputs ("ule", file);
10845 fputs ("ult", file);
10848 fputs ("une", file);
10851 gcc_unreachable ();
10857 fputs (" ; ", file);
10864 output_operand_lossage ("invalid operand code '%c'", code);
10869 print_reg (x, code, file);
10871 else if (MEM_P (x))
10873 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
10874 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
10875 && GET_MODE (x) != BLKmode)
10878 switch (GET_MODE_SIZE (GET_MODE (x)))
10880 case 1: size = "BYTE"; break;
10881 case 2: size = "WORD"; break;
10882 case 4: size = "DWORD"; break;
10883 case 8: size = "QWORD"; break;
10884 case 12: size = "XWORD"; break;
10886 if (GET_MODE (x) == XFmode)
10892 gcc_unreachable ();
10895 /* Check for explicit size override (codes 'b', 'w' and 'k') */
10898 else if (code == 'w')
10900 else if (code == 'k')
10903 fputs (size, file);
10904 fputs (" PTR ", file);
10908 /* Avoid (%rip) for call operands. */
10909 if (CONSTANT_ADDRESS_P (x) && code == 'P'
10910 && !CONST_INT_P (x))
10911 output_addr_const (file, x);
10912 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
10913 output_operand_lossage ("invalid constraints for operand");
10915 output_address (x);
10918 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
10923 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10924 REAL_VALUE_TO_TARGET_SINGLE (r, l);
10926 if (ASSEMBLER_DIALECT == ASM_ATT)
10928 fprintf (file, "0x%08lx", (long unsigned int) l);
10931 /* These float cases don't actually occur as immediate operands. */
10932 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
10936 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10937 fprintf (file, "%s", dstr);
10940 else if (GET_CODE (x) == CONST_DOUBLE
10941 && GET_MODE (x) == XFmode)
10945 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10946 fprintf (file, "%s", dstr);
10951 /* We have patterns that allow zero sets of memory, for instance.
10952 In 64-bit mode, we should probably support all 8-byte vectors,
10953 since we can in fact encode that into an immediate. */
10954 if (GET_CODE (x) == CONST_VECTOR)
10956 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
10962 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
10964 if (ASSEMBLER_DIALECT == ASM_ATT)
10967 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
10968 || GET_CODE (x) == LABEL_REF)
10970 if (ASSEMBLER_DIALECT == ASM_ATT)
10973 fputs ("OFFSET FLAT:", file);
10976 if (CONST_INT_P (x))
10977 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10979 output_pic_addr_const (file, x, code);
10981 output_addr_const (file, x);
10985 /* Print a memory operand whose address is ADDR. */
10988 print_operand_address (FILE *file, rtx addr)
10990 struct ix86_address parts;
10991 rtx base, index, disp;
10993 int ok = ix86_decompose_address (addr, &parts);
10998 index = parts.index;
11000 scale = parts.scale;
11008 if (ASSEMBLER_DIALECT == ASM_ATT)
11010 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11013 gcc_unreachable ();
11016 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11017 if (TARGET_64BIT && !base && !index)
11021 if (GET_CODE (disp) == CONST
11022 && GET_CODE (XEXP (disp, 0)) == PLUS
11023 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11024 symbol = XEXP (XEXP (disp, 0), 0);
11026 if (GET_CODE (symbol) == LABEL_REF
11027 || (GET_CODE (symbol) == SYMBOL_REF
11028 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11031 if (!base && !index)
11033 /* Displacement only requires special attention. */
11035 if (CONST_INT_P (disp))
11037 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11038 fputs ("ds:", file);
11039 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11042 output_pic_addr_const (file, disp, 0);
11044 output_addr_const (file, disp);
11048 if (ASSEMBLER_DIALECT == ASM_ATT)
11053 output_pic_addr_const (file, disp, 0);
11054 else if (GET_CODE (disp) == LABEL_REF)
11055 output_asm_label (disp);
11057 output_addr_const (file, disp);
11062 print_reg (base, 0, file);
11066 print_reg (index, 0, file);
11068 fprintf (file, ",%d", scale);
11074 rtx offset = NULL_RTX;
11078 /* Pull out the offset of a symbol; print any symbol itself. */
11079 if (GET_CODE (disp) == CONST
11080 && GET_CODE (XEXP (disp, 0)) == PLUS
11081 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11083 offset = XEXP (XEXP (disp, 0), 1);
11084 disp = gen_rtx_CONST (VOIDmode,
11085 XEXP (XEXP (disp, 0), 0));
11089 output_pic_addr_const (file, disp, 0);
11090 else if (GET_CODE (disp) == LABEL_REF)
11091 output_asm_label (disp);
11092 else if (CONST_INT_P (disp))
11095 output_addr_const (file, disp);
11101 print_reg (base, 0, file);
11104 if (INTVAL (offset) >= 0)
11106 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11110 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11117 print_reg (index, 0, file);
11119 fprintf (file, "*%d", scale);
11127 output_addr_const_extra (FILE *file, rtx x)
11131 if (GET_CODE (x) != UNSPEC)
11134 op = XVECEXP (x, 0, 0);
11135 switch (XINT (x, 1))
11137 case UNSPEC_GOTTPOFF:
11138 output_addr_const (file, op);
11139 /* FIXME: This might be @TPOFF in Sun ld. */
11140 fputs ("@GOTTPOFF", file);
11143 output_addr_const (file, op);
11144 fputs ("@TPOFF", file);
11146 case UNSPEC_NTPOFF:
11147 output_addr_const (file, op);
11149 fputs ("@TPOFF", file);
11151 fputs ("@NTPOFF", file);
11153 case UNSPEC_DTPOFF:
11154 output_addr_const (file, op);
11155 fputs ("@DTPOFF", file);
11157 case UNSPEC_GOTNTPOFF:
11158 output_addr_const (file, op);
11160 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11161 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11163 fputs ("@GOTNTPOFF", file);
11165 case UNSPEC_INDNTPOFF:
11166 output_addr_const (file, op);
11167 fputs ("@INDNTPOFF", file);
11177 /* Split one or more DImode RTL references into pairs of SImode
11178 references. The RTL can be REG, offsettable MEM, integer constant, or
11179 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11180 split and "num" is its length. lo_half and hi_half are output arrays
11181 that parallel "operands". */
11184 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11188 rtx op = operands[num];
11190 /* simplify_subreg refuse to split volatile memory addresses,
11191 but we still have to handle it. */
11194 lo_half[num] = adjust_address (op, SImode, 0);
11195 hi_half[num] = adjust_address (op, SImode, 4);
11199 lo_half[num] = simplify_gen_subreg (SImode, op,
11200 GET_MODE (op) == VOIDmode
11201 ? DImode : GET_MODE (op), 0);
11202 hi_half[num] = simplify_gen_subreg (SImode, op,
11203 GET_MODE (op) == VOIDmode
11204 ? DImode : GET_MODE (op), 4);
11208 /* Split one or more TImode RTL references into pairs of DImode
11209 references. The RTL can be REG, offsettable MEM, integer constant, or
11210 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11211 split and "num" is its length. lo_half and hi_half are output arrays
11212 that parallel "operands". */
11215 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11219 rtx op = operands[num];
11221 /* simplify_subreg refuse to split volatile memory addresses, but we
11222 still have to handle it. */
11225 lo_half[num] = adjust_address (op, DImode, 0);
11226 hi_half[num] = adjust_address (op, DImode, 8);
11230 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11231 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11236 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11237 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11238 is the expression of the binary operation. The output may either be
11239 emitted here, or returned to the caller, like all output_* functions.
11241 There is no guarantee that the operands are the same mode, as they
11242 might be within FLOAT or FLOAT_EXTEND expressions. */
11244 #ifndef SYSV386_COMPAT
11245 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11246 wants to fix the assemblers because that causes incompatibility
11247 with gcc. No-one wants to fix gcc because that causes
11248 incompatibility with assemblers... You can use the option of
11249 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11250 #define SYSV386_COMPAT 1
11254 output_387_binary_op (rtx insn, rtx *operands)
11256 static char buf[40];
11259 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11261 #ifdef ENABLE_CHECKING
11262 /* Even if we do not want to check the inputs, this documents input
11263 constraints. Which helps in understanding the following code. */
11264 if (STACK_REG_P (operands[0])
11265 && ((REG_P (operands[1])
11266 && REGNO (operands[0]) == REGNO (operands[1])
11267 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11268 || (REG_P (operands[2])
11269 && REGNO (operands[0]) == REGNO (operands[2])
11270 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11271 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11274 gcc_assert (is_sse);
11277 switch (GET_CODE (operands[3]))
11280 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11281 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11289 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11290 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11298 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11299 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11307 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11308 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11316 gcc_unreachable ();
11323 strcpy (buf, ssep);
11324 if (GET_MODE (operands[0]) == SFmode)
11325 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11327 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11331 strcpy (buf, ssep + 1);
11332 if (GET_MODE (operands[0]) == SFmode)
11333 strcat (buf, "ss\t{%2, %0|%0, %2}");
11335 strcat (buf, "sd\t{%2, %0|%0, %2}");
11341 switch (GET_CODE (operands[3]))
11345 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11347 rtx temp = operands[2];
11348 operands[2] = operands[1];
11349 operands[1] = temp;
11352 /* know operands[0] == operands[1]. */
11354 if (MEM_P (operands[2]))
11360 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11362 if (STACK_TOP_P (operands[0]))
11363 /* How is it that we are storing to a dead operand[2]?
11364 Well, presumably operands[1] is dead too. We can't
11365 store the result to st(0) as st(0) gets popped on this
11366 instruction. Instead store to operands[2] (which I
11367 think has to be st(1)). st(1) will be popped later.
11368 gcc <= 2.8.1 didn't have this check and generated
11369 assembly code that the Unixware assembler rejected. */
11370 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11372 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11376 if (STACK_TOP_P (operands[0]))
11377 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11379 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11384 if (MEM_P (operands[1]))
11390 if (MEM_P (operands[2]))
11396 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11399 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11400 derived assemblers, confusingly reverse the direction of
11401 the operation for fsub{r} and fdiv{r} when the
11402 destination register is not st(0). The Intel assembler
11403 doesn't have this brain damage. Read !SYSV386_COMPAT to
11404 figure out what the hardware really does. */
11405 if (STACK_TOP_P (operands[0]))
11406 p = "{p\t%0, %2|rp\t%2, %0}";
11408 p = "{rp\t%2, %0|p\t%0, %2}";
11410 if (STACK_TOP_P (operands[0]))
11411 /* As above for fmul/fadd, we can't store to st(0). */
11412 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11414 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11419 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11422 if (STACK_TOP_P (operands[0]))
11423 p = "{rp\t%0, %1|p\t%1, %0}";
11425 p = "{p\t%1, %0|rp\t%0, %1}";
11427 if (STACK_TOP_P (operands[0]))
11428 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11430 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11435 if (STACK_TOP_P (operands[0]))
11437 if (STACK_TOP_P (operands[1]))
11438 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11440 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11443 else if (STACK_TOP_P (operands[1]))
11446 p = "{\t%1, %0|r\t%0, %1}";
11448 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11454 p = "{r\t%2, %0|\t%0, %2}";
11456 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11462 gcc_unreachable ();
11469 /* Return needed mode for entity in optimize_mode_switching pass. */
11472 ix86_mode_needed (int entity, rtx insn)
11474 enum attr_i387_cw mode;
11476 /* The mode UNINITIALIZED is used to store control word after a
11477 function call or ASM pattern. The mode ANY specify that function
11478 has no requirements on the control word and make no changes in the
11479 bits we are interested in. */
11482 || (NONJUMP_INSN_P (insn)
11483 && (asm_noperands (PATTERN (insn)) >= 0
11484 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11485 return I387_CW_UNINITIALIZED;
11487 if (recog_memoized (insn) < 0)
11488 return I387_CW_ANY;
11490 mode = get_attr_i387_cw (insn);
11495 if (mode == I387_CW_TRUNC)
11500 if (mode == I387_CW_FLOOR)
11505 if (mode == I387_CW_CEIL)
11510 if (mode == I387_CW_MASK_PM)
11515 gcc_unreachable ();
11518 return I387_CW_ANY;
11521 /* Output code to initialize control word copies used by trunc?f?i and
11522 rounding patterns. CURRENT_MODE is set to current control word,
11523 while NEW_MODE is set to new control word. */
11526 emit_i387_cw_initialization (int mode)
11528 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11531 enum ix86_stack_slot slot;
11533 rtx reg = gen_reg_rtx (HImode);
11535 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11536 emit_move_insn (reg, copy_rtx (stored_mode));
11538 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11539 || optimize_function_for_size_p (cfun))
11543 case I387_CW_TRUNC:
11544 /* round toward zero (truncate) */
11545 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11546 slot = SLOT_CW_TRUNC;
11549 case I387_CW_FLOOR:
11550 /* round down toward -oo */
11551 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11552 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11553 slot = SLOT_CW_FLOOR;
11557 /* round up toward +oo */
11558 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11559 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11560 slot = SLOT_CW_CEIL;
11563 case I387_CW_MASK_PM:
11564 /* mask precision exception for nearbyint() */
11565 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11566 slot = SLOT_CW_MASK_PM;
11570 gcc_unreachable ();
11577 case I387_CW_TRUNC:
11578 /* round toward zero (truncate) */
11579 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11580 slot = SLOT_CW_TRUNC;
11583 case I387_CW_FLOOR:
11584 /* round down toward -oo */
11585 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11586 slot = SLOT_CW_FLOOR;
11590 /* round up toward +oo */
11591 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11592 slot = SLOT_CW_CEIL;
11595 case I387_CW_MASK_PM:
11596 /* mask precision exception for nearbyint() */
11597 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11598 slot = SLOT_CW_MASK_PM;
11602 gcc_unreachable ();
11606 gcc_assert (slot < MAX_386_STACK_LOCALS);
11608 new_mode = assign_386_stack_local (HImode, slot);
11609 emit_move_insn (new_mode, reg);
11612 /* Output code for INSN to convert a float to a signed int. OPERANDS
11613 are the insn operands. The output may be [HSD]Imode and the input
11614 operand may be [SDX]Fmode. */
11617 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11619 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11620 int dimode_p = GET_MODE (operands[0]) == DImode;
11621 int round_mode = get_attr_i387_cw (insn);
11623 /* Jump through a hoop or two for DImode, since the hardware has no
11624 non-popping instruction. We used to do this a different way, but
11625 that was somewhat fragile and broke with post-reload splitters. */
11626 if ((dimode_p || fisttp) && !stack_top_dies)
11627 output_asm_insn ("fld\t%y1", operands);
11629 gcc_assert (STACK_TOP_P (operands[1]));
11630 gcc_assert (MEM_P (operands[0]));
11631 gcc_assert (GET_MODE (operands[1]) != TFmode);
11634 output_asm_insn ("fisttp%z0\t%0", operands);
11637 if (round_mode != I387_CW_ANY)
11638 output_asm_insn ("fldcw\t%3", operands);
11639 if (stack_top_dies || dimode_p)
11640 output_asm_insn ("fistp%z0\t%0", operands);
11642 output_asm_insn ("fist%z0\t%0", operands);
11643 if (round_mode != I387_CW_ANY)
11644 output_asm_insn ("fldcw\t%2", operands);
11650 /* Output code for x87 ffreep insn. The OPNO argument, which may only
11651 have the values zero or one, indicates the ffreep insn's operand
11652 from the OPERANDS array. */
11654 static const char *
11655 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
11657 if (TARGET_USE_FFREEP)
11658 #if HAVE_AS_IX86_FFREEP
11659 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
11662 static char retval[] = ".word\t0xc_df";
11663 int regno = REGNO (operands[opno]);
11665 gcc_assert (FP_REGNO_P (regno));
11667 retval[9] = '0' + (regno - FIRST_STACK_REG);
11672 return opno ? "fstp\t%y1" : "fstp\t%y0";
11676 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
11677 should be used. UNORDERED_P is true when fucom should be used. */
11680 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
11682 int stack_top_dies;
11683 rtx cmp_op0, cmp_op1;
11684 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
11688 cmp_op0 = operands[0];
11689 cmp_op1 = operands[1];
11693 cmp_op0 = operands[1];
11694 cmp_op1 = operands[2];
11699 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
11700 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
11701 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
11702 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
11704 if (GET_MODE (operands[0]) == SFmode)
11706 return &ucomiss[TARGET_AVX ? 0 : 1];
11708 return &comiss[TARGET_AVX ? 0 : 1];
11711 return &ucomisd[TARGET_AVX ? 0 : 1];
11713 return &comisd[TARGET_AVX ? 0 : 1];
11716 gcc_assert (STACK_TOP_P (cmp_op0));
11718 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11720 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
11722 if (stack_top_dies)
11724 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
11725 return output_387_ffreep (operands, 1);
11728 return "ftst\n\tfnstsw\t%0";
11731 if (STACK_REG_P (cmp_op1)
11733 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
11734 && REGNO (cmp_op1) != FIRST_STACK_REG)
11736 /* If both the top of the 387 stack dies, and the other operand
11737 is also a stack register that dies, then this must be a
11738 `fcompp' float compare */
11742 /* There is no double popping fcomi variant. Fortunately,
11743 eflags is immune from the fstp's cc clobbering. */
11745 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
11747 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
11748 return output_387_ffreep (operands, 0);
11753 return "fucompp\n\tfnstsw\t%0";
11755 return "fcompp\n\tfnstsw\t%0";
11760 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
11762 static const char * const alt[16] =
11764 "fcom%z2\t%y2\n\tfnstsw\t%0",
11765 "fcomp%z2\t%y2\n\tfnstsw\t%0",
11766 "fucom%z2\t%y2\n\tfnstsw\t%0",
11767 "fucomp%z2\t%y2\n\tfnstsw\t%0",
11769 "ficom%z2\t%y2\n\tfnstsw\t%0",
11770 "ficomp%z2\t%y2\n\tfnstsw\t%0",
11774 "fcomi\t{%y1, %0|%0, %y1}",
11775 "fcomip\t{%y1, %0|%0, %y1}",
11776 "fucomi\t{%y1, %0|%0, %y1}",
11777 "fucomip\t{%y1, %0|%0, %y1}",
11788 mask = eflags_p << 3;
11789 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
11790 mask |= unordered_p << 1;
11791 mask |= stack_top_dies;
11793 gcc_assert (mask < 16);
11802 ix86_output_addr_vec_elt (FILE *file, int value)
11804 const char *directive = ASM_LONG;
11808 directive = ASM_QUAD;
11810 gcc_assert (!TARGET_64BIT);
11813 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
11817 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
11819 const char *directive = ASM_LONG;
11822 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
11823 directive = ASM_QUAD;
11825 gcc_assert (!TARGET_64BIT);
11827 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
11828 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
11829 fprintf (file, "%s%s%d-%s%d\n",
11830 directive, LPREFIX, value, LPREFIX, rel);
11831 else if (HAVE_AS_GOTOFF_IN_DATA)
11832 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
11834 else if (TARGET_MACHO)
11836 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
11837 machopic_output_function_base_name (file);
11838 fprintf(file, "\n");
11842 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
11843 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
11846 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
11850 ix86_expand_clear (rtx dest)
11854 /* We play register width games, which are only valid after reload. */
11855 gcc_assert (reload_completed);
11857 /* Avoid HImode and its attendant prefix byte. */
11858 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
11859 dest = gen_rtx_REG (SImode, REGNO (dest));
11860 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
11862 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
11863 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
11865 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11866 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
11872 /* X is an unchanging MEM. If it is a constant pool reference, return
11873 the constant pool rtx, else NULL. */
11876 maybe_get_pool_constant (rtx x)
11878 x = ix86_delegitimize_address (XEXP (x, 0));
11880 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
11881 return get_pool_constant (x);
11887 ix86_expand_move (enum machine_mode mode, rtx operands[])
11890 enum tls_model model;
11895 if (GET_CODE (op1) == SYMBOL_REF)
11897 model = SYMBOL_REF_TLS_MODEL (op1);
11900 op1 = legitimize_tls_address (op1, model, true);
11901 op1 = force_operand (op1, op0);
11905 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11906 && SYMBOL_REF_DLLIMPORT_P (op1))
11907 op1 = legitimize_dllimport_symbol (op1, false);
11909 else if (GET_CODE (op1) == CONST
11910 && GET_CODE (XEXP (op1, 0)) == PLUS
11911 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
11913 rtx addend = XEXP (XEXP (op1, 0), 1);
11914 rtx symbol = XEXP (XEXP (op1, 0), 0);
11917 model = SYMBOL_REF_TLS_MODEL (symbol);
11919 tmp = legitimize_tls_address (symbol, model, true);
11920 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11921 && SYMBOL_REF_DLLIMPORT_P (symbol))
11922 tmp = legitimize_dllimport_symbol (symbol, true);
11926 tmp = force_operand (tmp, NULL);
11927 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
11928 op0, 1, OPTAB_DIRECT);
11934 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
11936 if (TARGET_MACHO && !TARGET_64BIT)
11941 rtx temp = ((reload_in_progress
11942 || ((op0 && REG_P (op0))
11944 ? op0 : gen_reg_rtx (Pmode));
11945 op1 = machopic_indirect_data_reference (op1, temp);
11946 op1 = machopic_legitimize_pic_address (op1, mode,
11947 temp == op1 ? 0 : temp);
11949 else if (MACHOPIC_INDIRECT)
11950 op1 = machopic_indirect_data_reference (op1, 0);
11958 op1 = force_reg (Pmode, op1);
11959 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
11961 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
11962 op1 = legitimize_pic_address (op1, reg);
11971 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
11972 || !push_operand (op0, mode))
11974 op1 = force_reg (mode, op1);
11976 if (push_operand (op0, mode)
11977 && ! general_no_elim_operand (op1, mode))
11978 op1 = copy_to_mode_reg (mode, op1);
11980 /* Force large constants in 64bit compilation into register
11981 to get them CSEed. */
11982 if (can_create_pseudo_p ()
11983 && (mode == DImode) && TARGET_64BIT
11984 && immediate_operand (op1, mode)
11985 && !x86_64_zext_immediate_operand (op1, VOIDmode)
11986 && !register_operand (op0, mode)
11988 op1 = copy_to_mode_reg (mode, op1);
11990 if (can_create_pseudo_p ()
11991 && FLOAT_MODE_P (mode)
11992 && GET_CODE (op1) == CONST_DOUBLE)
11994 /* If we are loading a floating point constant to a register,
11995 force the value to memory now, since we'll get better code
11996 out the back end. */
11998 op1 = validize_mem (force_const_mem (mode, op1));
11999 if (!register_operand (op0, mode))
12001 rtx temp = gen_reg_rtx (mode);
12002 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12003 emit_move_insn (op0, temp);
12009 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12013 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12015 rtx op0 = operands[0], op1 = operands[1];
12016 unsigned int align = GET_MODE_ALIGNMENT (mode);
12018 /* Force constants other than zero into memory. We do not know how
12019 the instructions used to build constants modify the upper 64 bits
12020 of the register, once we have that information we may be able
12021 to handle some of them more efficiently. */
12022 if (can_create_pseudo_p ()
12023 && register_operand (op0, mode)
12024 && (CONSTANT_P (op1)
12025 || (GET_CODE (op1) == SUBREG
12026 && CONSTANT_P (SUBREG_REG (op1))))
12027 && standard_sse_constant_p (op1) <= 0)
12028 op1 = validize_mem (force_const_mem (mode, op1));
12030 /* We need to check memory alignment for SSE mode since attribute
12031 can make operands unaligned. */
12032 if (can_create_pseudo_p ()
12033 && SSE_REG_MODE_P (mode)
12034 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12035 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12039 /* ix86_expand_vector_move_misalign() does not like constants ... */
12040 if (CONSTANT_P (op1)
12041 || (GET_CODE (op1) == SUBREG
12042 && CONSTANT_P (SUBREG_REG (op1))))
12043 op1 = validize_mem (force_const_mem (mode, op1));
12045 /* ... nor both arguments in memory. */
12046 if (!register_operand (op0, mode)
12047 && !register_operand (op1, mode))
12048 op1 = force_reg (mode, op1);
12050 tmp[0] = op0; tmp[1] = op1;
12051 ix86_expand_vector_move_misalign (mode, tmp);
12055 /* Make operand1 a register if it isn't already. */
12056 if (can_create_pseudo_p ()
12057 && !register_operand (op0, mode)
12058 && !register_operand (op1, mode))
12060 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12064 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12067 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12068 straight to ix86_expand_vector_move. */
12069 /* Code generation for scalar reg-reg moves of single and double precision data:
12070 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12074 if (x86_sse_partial_reg_dependency == true)
12079 Code generation for scalar loads of double precision data:
12080 if (x86_sse_split_regs == true)
12081 movlpd mem, reg (gas syntax)
12085 Code generation for unaligned packed loads of single precision data
12086 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12087 if (x86_sse_unaligned_move_optimal)
12090 if (x86_sse_partial_reg_dependency == true)
12102 Code generation for unaligned packed loads of double precision data
12103 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12104 if (x86_sse_unaligned_move_optimal)
12107 if (x86_sse_split_regs == true)
12120 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12129 switch (GET_MODE_CLASS (mode))
12131 case MODE_VECTOR_INT:
12133 switch (GET_MODE_SIZE (mode))
12136 op0 = gen_lowpart (V16QImode, op0);
12137 op1 = gen_lowpart (V16QImode, op1);
12138 emit_insn (gen_avx_movdqu (op0, op1));
12141 op0 = gen_lowpart (V32QImode, op0);
12142 op1 = gen_lowpart (V32QImode, op1);
12143 emit_insn (gen_avx_movdqu256 (op0, op1));
12146 gcc_unreachable ();
12149 case MODE_VECTOR_FLOAT:
12150 op0 = gen_lowpart (mode, op0);
12151 op1 = gen_lowpart (mode, op1);
12156 emit_insn (gen_avx_movups (op0, op1));
12159 emit_insn (gen_avx_movups256 (op0, op1));
12162 emit_insn (gen_avx_movupd (op0, op1));
12165 emit_insn (gen_avx_movupd256 (op0, op1));
12168 gcc_unreachable ();
12173 gcc_unreachable ();
12181 /* If we're optimizing for size, movups is the smallest. */
12182 if (optimize_insn_for_size_p ())
12184 op0 = gen_lowpart (V4SFmode, op0);
12185 op1 = gen_lowpart (V4SFmode, op1);
12186 emit_insn (gen_sse_movups (op0, op1));
12190 /* ??? If we have typed data, then it would appear that using
12191 movdqu is the only way to get unaligned data loaded with
12193 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12195 op0 = gen_lowpart (V16QImode, op0);
12196 op1 = gen_lowpart (V16QImode, op1);
12197 emit_insn (gen_sse2_movdqu (op0, op1));
12201 if (TARGET_SSE2 && mode == V2DFmode)
12205 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12207 op0 = gen_lowpart (V2DFmode, op0);
12208 op1 = gen_lowpart (V2DFmode, op1);
12209 emit_insn (gen_sse2_movupd (op0, op1));
12213 /* When SSE registers are split into halves, we can avoid
12214 writing to the top half twice. */
12215 if (TARGET_SSE_SPLIT_REGS)
12217 emit_clobber (op0);
12222 /* ??? Not sure about the best option for the Intel chips.
12223 The following would seem to satisfy; the register is
12224 entirely cleared, breaking the dependency chain. We
12225 then store to the upper half, with a dependency depth
12226 of one. A rumor has it that Intel recommends two movsd
12227 followed by an unpacklpd, but this is unconfirmed. And
12228 given that the dependency depth of the unpacklpd would
12229 still be one, I'm not sure why this would be better. */
12230 zero = CONST0_RTX (V2DFmode);
12233 m = adjust_address (op1, DFmode, 0);
12234 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12235 m = adjust_address (op1, DFmode, 8);
12236 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12240 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12242 op0 = gen_lowpart (V4SFmode, op0);
12243 op1 = gen_lowpart (V4SFmode, op1);
12244 emit_insn (gen_sse_movups (op0, op1));
12248 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12249 emit_move_insn (op0, CONST0_RTX (mode));
12251 emit_clobber (op0);
12253 if (mode != V4SFmode)
12254 op0 = gen_lowpart (V4SFmode, op0);
12255 m = adjust_address (op1, V2SFmode, 0);
12256 emit_insn (gen_sse_loadlps (op0, op0, m));
12257 m = adjust_address (op1, V2SFmode, 8);
12258 emit_insn (gen_sse_loadhps (op0, op0, m));
12261 else if (MEM_P (op0))
12263 /* If we're optimizing for size, movups is the smallest. */
12264 if (optimize_insn_for_size_p ())
12266 op0 = gen_lowpart (V4SFmode, op0);
12267 op1 = gen_lowpart (V4SFmode, op1);
12268 emit_insn (gen_sse_movups (op0, op1));
12272 /* ??? Similar to above, only less clear because of quote
12273 typeless stores unquote. */
12274 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12275 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12277 op0 = gen_lowpart (V16QImode, op0);
12278 op1 = gen_lowpart (V16QImode, op1);
12279 emit_insn (gen_sse2_movdqu (op0, op1));
12283 if (TARGET_SSE2 && mode == V2DFmode)
12285 m = adjust_address (op0, DFmode, 0);
12286 emit_insn (gen_sse2_storelpd (m, op1));
12287 m = adjust_address (op0, DFmode, 8);
12288 emit_insn (gen_sse2_storehpd (m, op1));
12292 if (mode != V4SFmode)
12293 op1 = gen_lowpart (V4SFmode, op1);
12294 m = adjust_address (op0, V2SFmode, 0);
12295 emit_insn (gen_sse_storelps (m, op1));
12296 m = adjust_address (op0, V2SFmode, 8);
12297 emit_insn (gen_sse_storehps (m, op1));
12301 gcc_unreachable ();
12304 /* Expand a push in MODE. This is some mode for which we do not support
12305 proper push instructions, at least from the registers that we expect
12306 the value to live in. */
12309 ix86_expand_push (enum machine_mode mode, rtx x)
12313 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12314 GEN_INT (-GET_MODE_SIZE (mode)),
12315 stack_pointer_rtx, 1, OPTAB_DIRECT);
12316 if (tmp != stack_pointer_rtx)
12317 emit_move_insn (stack_pointer_rtx, tmp);
12319 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12320 emit_move_insn (tmp, x);
12323 /* Helper function of ix86_fixup_binary_operands to canonicalize
12324 operand order. Returns true if the operands should be swapped. */
12327 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12330 rtx dst = operands[0];
12331 rtx src1 = operands[1];
12332 rtx src2 = operands[2];
12334 /* If the operation is not commutative, we can't do anything. */
12335 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12338 /* Highest priority is that src1 should match dst. */
12339 if (rtx_equal_p (dst, src1))
12341 if (rtx_equal_p (dst, src2))
12344 /* Next highest priority is that immediate constants come second. */
12345 if (immediate_operand (src2, mode))
12347 if (immediate_operand (src1, mode))
12350 /* Lowest priority is that memory references should come second. */
12360 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12361 destination to use for the operation. If different from the true
12362 destination in operands[0], a copy operation will be required. */
12365 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12368 rtx dst = operands[0];
12369 rtx src1 = operands[1];
12370 rtx src2 = operands[2];
12372 /* Canonicalize operand order. */
12373 if (ix86_swap_binary_operands_p (code, mode, operands))
12377 /* It is invalid to swap operands of different modes. */
12378 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12385 /* Both source operands cannot be in memory. */
12386 if (MEM_P (src1) && MEM_P (src2))
12388 /* Optimization: Only read from memory once. */
12389 if (rtx_equal_p (src1, src2))
12391 src2 = force_reg (mode, src2);
12395 src2 = force_reg (mode, src2);
12398 /* If the destination is memory, and we do not have matching source
12399 operands, do things in registers. */
12400 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12401 dst = gen_reg_rtx (mode);
12403 /* Source 1 cannot be a constant. */
12404 if (CONSTANT_P (src1))
12405 src1 = force_reg (mode, src1);
12407 /* Source 1 cannot be a non-matching memory. */
12408 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12409 src1 = force_reg (mode, src1);
12411 operands[1] = src1;
12412 operands[2] = src2;
12416 /* Similarly, but assume that the destination has already been
12417 set up properly. */
12420 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12421 enum machine_mode mode, rtx operands[])
12423 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12424 gcc_assert (dst == operands[0]);
12427 /* Attempt to expand a binary operator. Make the expansion closer to the
12428 actual machine, then just general_operand, which will allow 3 separate
12429 memory references (one output, two input) in a single insn. */
12432 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12435 rtx src1, src2, dst, op, clob;
12437 dst = ix86_fixup_binary_operands (code, mode, operands);
12438 src1 = operands[1];
12439 src2 = operands[2];
12441 /* Emit the instruction. */
12443 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12444 if (reload_in_progress)
12446 /* Reload doesn't know about the flags register, and doesn't know that
12447 it doesn't want to clobber it. We can only do this with PLUS. */
12448 gcc_assert (code == PLUS);
12453 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12454 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12457 /* Fix up the destination if needed. */
12458 if (dst != operands[0])
12459 emit_move_insn (operands[0], dst);
12462 /* Return TRUE or FALSE depending on whether the binary operator meets the
12463 appropriate constraints. */
12466 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12469 rtx dst = operands[0];
12470 rtx src1 = operands[1];
12471 rtx src2 = operands[2];
12473 /* Both source operands cannot be in memory. */
12474 if (MEM_P (src1) && MEM_P (src2))
12477 /* Canonicalize operand order for commutative operators. */
12478 if (ix86_swap_binary_operands_p (code, mode, operands))
12485 /* If the destination is memory, we must have a matching source operand. */
12486 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12489 /* Source 1 cannot be a constant. */
12490 if (CONSTANT_P (src1))
12493 /* Source 1 cannot be a non-matching memory. */
12494 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12500 /* Attempt to expand a unary operator. Make the expansion closer to the
12501 actual machine, then just general_operand, which will allow 2 separate
12502 memory references (one output, one input) in a single insn. */
12505 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12508 int matching_memory;
12509 rtx src, dst, op, clob;
12514 /* If the destination is memory, and we do not have matching source
12515 operands, do things in registers. */
12516 matching_memory = 0;
12519 if (rtx_equal_p (dst, src))
12520 matching_memory = 1;
12522 dst = gen_reg_rtx (mode);
12525 /* When source operand is memory, destination must match. */
12526 if (MEM_P (src) && !matching_memory)
12527 src = force_reg (mode, src);
12529 /* Emit the instruction. */
12531 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12532 if (reload_in_progress || code == NOT)
12534 /* Reload doesn't know about the flags register, and doesn't know that
12535 it doesn't want to clobber it. */
12536 gcc_assert (code == NOT);
12541 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12542 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12545 /* Fix up the destination if needed. */
12546 if (dst != operands[0])
12547 emit_move_insn (operands[0], dst);
12550 /* Return TRUE or FALSE depending on whether the unary operator meets the
12551 appropriate constraints. */
12554 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
12555 enum machine_mode mode ATTRIBUTE_UNUSED,
12556 rtx operands[2] ATTRIBUTE_UNUSED)
12558 /* If one of operands is memory, source and destination must match. */
12559 if ((MEM_P (operands[0])
12560 || MEM_P (operands[1]))
12561 && ! rtx_equal_p (operands[0], operands[1]))
12566 /* Post-reload splitter for converting an SF or DFmode value in an
12567 SSE register into an unsigned SImode. */
12570 ix86_split_convert_uns_si_sse (rtx operands[])
12572 enum machine_mode vecmode;
12573 rtx value, large, zero_or_two31, input, two31, x;
12575 large = operands[1];
12576 zero_or_two31 = operands[2];
12577 input = operands[3];
12578 two31 = operands[4];
12579 vecmode = GET_MODE (large);
12580 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12582 /* Load up the value into the low element. We must ensure that the other
12583 elements are valid floats -- zero is the easiest such value. */
12586 if (vecmode == V4SFmode)
12587 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12589 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12593 input = gen_rtx_REG (vecmode, REGNO (input));
12594 emit_move_insn (value, CONST0_RTX (vecmode));
12595 if (vecmode == V4SFmode)
12596 emit_insn (gen_sse_movss (value, value, input));
12598 emit_insn (gen_sse2_movsd (value, value, input));
12601 emit_move_insn (large, two31);
12602 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12604 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12605 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12607 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12608 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12610 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12611 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12613 large = gen_rtx_REG (V4SImode, REGNO (large));
12614 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12616 x = gen_rtx_REG (V4SImode, REGNO (value));
12617 if (vecmode == V4SFmode)
12618 emit_insn (gen_sse2_cvttps2dq (x, value));
12620 emit_insn (gen_sse2_cvttpd2dq (x, value));
12623 emit_insn (gen_xorv4si3 (value, value, large));
12626 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12627 Expects the 64-bit DImode to be supplied in a pair of integral
12628 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12629 -mfpmath=sse, !optimize_size only. */
12632 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12634 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12635 rtx int_xmm, fp_xmm;
12636 rtx biases, exponents;
12639 int_xmm = gen_reg_rtx (V4SImode);
12640 if (TARGET_INTER_UNIT_MOVES)
12641 emit_insn (gen_movdi_to_sse (int_xmm, input));
12642 else if (TARGET_SSE_SPLIT_REGS)
12644 emit_clobber (int_xmm);
12645 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
12649 x = gen_reg_rtx (V2DImode);
12650 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
12651 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
12654 x = gen_rtx_CONST_VECTOR (V4SImode,
12655 gen_rtvec (4, GEN_INT (0x43300000UL),
12656 GEN_INT (0x45300000UL),
12657 const0_rtx, const0_rtx));
12658 exponents = validize_mem (force_const_mem (V4SImode, x));
12660 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
12661 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
12663 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
12664 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
12665 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
12666 (0x1.0p84 + double(fp_value_hi_xmm)).
12667 Note these exponents differ by 32. */
12669 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
12671 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
12672 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
12673 real_ldexp (&bias_lo_rvt, &dconst1, 52);
12674 real_ldexp (&bias_hi_rvt, &dconst1, 84);
12675 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
12676 x = const_double_from_real_value (bias_hi_rvt, DFmode);
12677 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
12678 biases = validize_mem (force_const_mem (V2DFmode, biases));
12679 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
12681 /* Add the upper and lower DFmode values together. */
12683 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
12686 x = copy_to_mode_reg (V2DFmode, fp_xmm);
12687 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
12688 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
12691 ix86_expand_vector_extract (false, target, fp_xmm, 0);
12694 /* Not used, but eases macroization of patterns. */
12696 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
12697 rtx input ATTRIBUTE_UNUSED)
12699 gcc_unreachable ();
12702 /* Convert an unsigned SImode value into a DFmode. Only currently used
12703 for SSE, but applicable anywhere. */
12706 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
12708 REAL_VALUE_TYPE TWO31r;
12711 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
12712 NULL, 1, OPTAB_DIRECT);
12714 fp = gen_reg_rtx (DFmode);
12715 emit_insn (gen_floatsidf2 (fp, x));
12717 real_ldexp (&TWO31r, &dconst1, 31);
12718 x = const_double_from_real_value (TWO31r, DFmode);
12720 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
12722 emit_move_insn (target, x);
12725 /* Convert a signed DImode value into a DFmode. Only used for SSE in
12726 32-bit mode; otherwise we have a direct convert instruction. */
12729 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
12731 REAL_VALUE_TYPE TWO32r;
12732 rtx fp_lo, fp_hi, x;
12734 fp_lo = gen_reg_rtx (DFmode);
12735 fp_hi = gen_reg_rtx (DFmode);
12737 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
12739 real_ldexp (&TWO32r, &dconst1, 32);
12740 x = const_double_from_real_value (TWO32r, DFmode);
12741 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
12743 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
12745 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
12748 emit_move_insn (target, x);
12751 /* Convert an unsigned SImode value into a SFmode, using only SSE.
12752 For x86_32, -mfpmath=sse, !optimize_size only. */
12754 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
12756 REAL_VALUE_TYPE ONE16r;
12757 rtx fp_hi, fp_lo, int_hi, int_lo, x;
12759 real_ldexp (&ONE16r, &dconst1, 16);
12760 x = const_double_from_real_value (ONE16r, SFmode);
12761 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
12762 NULL, 0, OPTAB_DIRECT);
12763 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
12764 NULL, 0, OPTAB_DIRECT);
12765 fp_hi = gen_reg_rtx (SFmode);
12766 fp_lo = gen_reg_rtx (SFmode);
12767 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
12768 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
12769 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
12771 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
12773 if (!rtx_equal_p (target, fp_hi))
12774 emit_move_insn (target, fp_hi);
12777 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
12778 then replicate the value for all elements of the vector
12782 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
12789 v = gen_rtvec (4, value, value, value, value);
12790 return gen_rtx_CONST_VECTOR (V4SImode, v);
12794 v = gen_rtvec (2, value, value);
12795 return gen_rtx_CONST_VECTOR (V2DImode, v);
12799 v = gen_rtvec (4, value, value, value, value);
12801 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
12802 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12803 return gen_rtx_CONST_VECTOR (V4SFmode, v);
12807 v = gen_rtvec (2, value, value);
12809 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
12810 return gen_rtx_CONST_VECTOR (V2DFmode, v);
12813 gcc_unreachable ();
12817 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
12818 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
12819 for an SSE register. If VECT is true, then replicate the mask for
12820 all elements of the vector register. If INVERT is true, then create
12821 a mask excluding the sign bit. */
12824 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
12826 enum machine_mode vec_mode, imode;
12827 HOST_WIDE_INT hi, lo;
12832 /* Find the sign bit, sign extended to 2*HWI. */
12838 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
12839 lo = 0x80000000, hi = lo < 0;
12845 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
12846 if (HOST_BITS_PER_WIDE_INT >= 64)
12847 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
12849 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12854 vec_mode = VOIDmode;
12855 if (HOST_BITS_PER_WIDE_INT >= 64)
12858 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
12865 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12869 lo = ~lo, hi = ~hi;
12875 mask = immed_double_const (lo, hi, imode);
12877 vec = gen_rtvec (2, v, mask);
12878 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
12879 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
12886 gcc_unreachable ();
12890 lo = ~lo, hi = ~hi;
12892 /* Force this value into the low part of a fp vector constant. */
12893 mask = immed_double_const (lo, hi, imode);
12894 mask = gen_lowpart (mode, mask);
12896 if (vec_mode == VOIDmode)
12897 return force_reg (mode, mask);
12899 v = ix86_build_const_vector (mode, vect, mask);
12900 return force_reg (vec_mode, v);
12903 /* Generate code for floating point ABS or NEG. */
12906 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
12909 rtx mask, set, use, clob, dst, src;
12910 bool use_sse = false;
12911 bool vector_mode = VECTOR_MODE_P (mode);
12912 enum machine_mode elt_mode = mode;
12916 elt_mode = GET_MODE_INNER (mode);
12919 else if (mode == TFmode)
12921 else if (TARGET_SSE_MATH)
12922 use_sse = SSE_FLOAT_MODE_P (mode);
12924 /* NEG and ABS performed with SSE use bitwise mask operations.
12925 Create the appropriate mask now. */
12927 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
12936 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
12937 set = gen_rtx_SET (VOIDmode, dst, set);
12942 set = gen_rtx_fmt_e (code, mode, src);
12943 set = gen_rtx_SET (VOIDmode, dst, set);
12946 use = gen_rtx_USE (VOIDmode, mask);
12947 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12948 emit_insn (gen_rtx_PARALLEL (VOIDmode,
12949 gen_rtvec (3, set, use, clob)));
12956 /* Expand a copysign operation. Special case operand 0 being a constant. */
12959 ix86_expand_copysign (rtx operands[])
12961 enum machine_mode mode;
12962 rtx dest, op0, op1, mask, nmask;
12964 dest = operands[0];
12968 mode = GET_MODE (dest);
12970 if (GET_CODE (op0) == CONST_DOUBLE)
12972 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
12974 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
12975 op0 = simplify_unary_operation (ABS, mode, op0, mode);
12977 if (mode == SFmode || mode == DFmode)
12979 enum machine_mode vmode;
12981 vmode = mode == SFmode ? V4SFmode : V2DFmode;
12983 if (op0 == CONST0_RTX (mode))
12984 op0 = CONST0_RTX (vmode);
12989 if (mode == SFmode)
12990 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
12991 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12993 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
12995 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
12998 else if (op0 != CONST0_RTX (mode))
12999 op0 = force_reg (mode, op0);
13001 mask = ix86_build_signbit_mask (mode, 0, 0);
13003 if (mode == SFmode)
13004 copysign_insn = gen_copysignsf3_const;
13005 else if (mode == DFmode)
13006 copysign_insn = gen_copysigndf3_const;
13008 copysign_insn = gen_copysigntf3_const;
13010 emit_insn (copysign_insn (dest, op0, op1, mask));
13014 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13016 nmask = ix86_build_signbit_mask (mode, 0, 1);
13017 mask = ix86_build_signbit_mask (mode, 0, 0);
13019 if (mode == SFmode)
13020 copysign_insn = gen_copysignsf3_var;
13021 else if (mode == DFmode)
13022 copysign_insn = gen_copysigndf3_var;
13024 copysign_insn = gen_copysigntf3_var;
13026 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13030 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13031 be a constant, and so has already been expanded into a vector constant. */
13034 ix86_split_copysign_const (rtx operands[])
13036 enum machine_mode mode, vmode;
13037 rtx dest, op0, op1, mask, x;
13039 dest = operands[0];
13042 mask = operands[3];
13044 mode = GET_MODE (dest);
13045 vmode = GET_MODE (mask);
13047 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13048 x = gen_rtx_AND (vmode, dest, mask);
13049 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13051 if (op0 != CONST0_RTX (vmode))
13053 x = gen_rtx_IOR (vmode, dest, op0);
13054 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13058 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13059 so we have to do two masks. */
13062 ix86_split_copysign_var (rtx operands[])
13064 enum machine_mode mode, vmode;
13065 rtx dest, scratch, op0, op1, mask, nmask, x;
13067 dest = operands[0];
13068 scratch = operands[1];
13071 nmask = operands[4];
13072 mask = operands[5];
13074 mode = GET_MODE (dest);
13075 vmode = GET_MODE (mask);
13077 if (rtx_equal_p (op0, op1))
13079 /* Shouldn't happen often (it's useless, obviously), but when it does
13080 we'd generate incorrect code if we continue below. */
13081 emit_move_insn (dest, op0);
13085 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13087 gcc_assert (REGNO (op1) == REGNO (scratch));
13089 x = gen_rtx_AND (vmode, scratch, mask);
13090 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13093 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13094 x = gen_rtx_NOT (vmode, dest);
13095 x = gen_rtx_AND (vmode, x, op0);
13096 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13100 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13102 x = gen_rtx_AND (vmode, scratch, mask);
13104 else /* alternative 2,4 */
13106 gcc_assert (REGNO (mask) == REGNO (scratch));
13107 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13108 x = gen_rtx_AND (vmode, scratch, op1);
13110 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13112 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13114 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13115 x = gen_rtx_AND (vmode, dest, nmask);
13117 else /* alternative 3,4 */
13119 gcc_assert (REGNO (nmask) == REGNO (dest));
13121 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13122 x = gen_rtx_AND (vmode, dest, op0);
13124 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13127 x = gen_rtx_IOR (vmode, dest, scratch);
13128 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13131 /* Return TRUE or FALSE depending on whether the first SET in INSN
13132 has source and destination with matching CC modes, and that the
13133 CC mode is at least as constrained as REQ_MODE. */
13136 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13139 enum machine_mode set_mode;
13141 set = PATTERN (insn);
13142 if (GET_CODE (set) == PARALLEL)
13143 set = XVECEXP (set, 0, 0);
13144 gcc_assert (GET_CODE (set) == SET);
13145 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13147 set_mode = GET_MODE (SET_DEST (set));
13151 if (req_mode != CCNOmode
13152 && (req_mode != CCmode
13153 || XEXP (SET_SRC (set), 1) != const0_rtx))
13157 if (req_mode == CCGCmode)
13161 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13165 if (req_mode == CCZmode)
13176 gcc_unreachable ();
13179 return (GET_MODE (SET_SRC (set)) == set_mode);
13182 /* Generate insn patterns to do an integer compare of OPERANDS. */
13185 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13187 enum machine_mode cmpmode;
13190 cmpmode = SELECT_CC_MODE (code, op0, op1);
13191 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13193 /* This is very simple, but making the interface the same as in the
13194 FP case makes the rest of the code easier. */
13195 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13196 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13198 /* Return the test that should be put into the flags user, i.e.
13199 the bcc, scc, or cmov instruction. */
13200 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13203 /* Figure out whether to use ordered or unordered fp comparisons.
13204 Return the appropriate mode to use. */
13207 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13209 /* ??? In order to make all comparisons reversible, we do all comparisons
13210 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13211 all forms trapping and nontrapping comparisons, we can make inequality
13212 comparisons trapping again, since it results in better code when using
13213 FCOM based compares. */
13214 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13218 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13220 enum machine_mode mode = GET_MODE (op0);
13222 if (SCALAR_FLOAT_MODE_P (mode))
13224 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13225 return ix86_fp_compare_mode (code);
13230 /* Only zero flag is needed. */
13231 case EQ: /* ZF=0 */
13232 case NE: /* ZF!=0 */
13234 /* Codes needing carry flag. */
13235 case GEU: /* CF=0 */
13236 case LTU: /* CF=1 */
13237 /* Detect overflow checks. They need just the carry flag. */
13238 if (GET_CODE (op0) == PLUS
13239 && rtx_equal_p (op1, XEXP (op0, 0)))
13243 case GTU: /* CF=0 & ZF=0 */
13244 case LEU: /* CF=1 | ZF=1 */
13245 /* Detect overflow checks. They need just the carry flag. */
13246 if (GET_CODE (op0) == MINUS
13247 && rtx_equal_p (op1, XEXP (op0, 0)))
13251 /* Codes possibly doable only with sign flag when
13252 comparing against zero. */
13253 case GE: /* SF=OF or SF=0 */
13254 case LT: /* SF<>OF or SF=1 */
13255 if (op1 == const0_rtx)
13258 /* For other cases Carry flag is not required. */
13260 /* Codes doable only with sign flag when comparing
13261 against zero, but we miss jump instruction for it
13262 so we need to use relational tests against overflow
13263 that thus needs to be zero. */
13264 case GT: /* ZF=0 & SF=OF */
13265 case LE: /* ZF=1 | SF<>OF */
13266 if (op1 == const0_rtx)
13270 /* strcmp pattern do (use flags) and combine may ask us for proper
13275 gcc_unreachable ();
13279 /* Return the fixed registers used for condition codes. */
13282 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13289 /* If two condition code modes are compatible, return a condition code
13290 mode which is compatible with both. Otherwise, return
13293 static enum machine_mode
13294 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13299 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13302 if ((m1 == CCGCmode && m2 == CCGOCmode)
13303 || (m1 == CCGOCmode && m2 == CCGCmode))
13309 gcc_unreachable ();
13339 /* These are only compatible with themselves, which we already
13345 /* Split comparison code CODE into comparisons we can do using branch
13346 instructions. BYPASS_CODE is comparison code for branch that will
13347 branch around FIRST_CODE and SECOND_CODE. If some of branches
13348 is not required, set value to UNKNOWN.
13349 We never require more than two branches. */
13352 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13353 enum rtx_code *first_code,
13354 enum rtx_code *second_code)
13356 *first_code = code;
13357 *bypass_code = UNKNOWN;
13358 *second_code = UNKNOWN;
13360 /* The fcomi comparison sets flags as follows:
13370 case GT: /* GTU - CF=0 & ZF=0 */
13371 case GE: /* GEU - CF=0 */
13372 case ORDERED: /* PF=0 */
13373 case UNORDERED: /* PF=1 */
13374 case UNEQ: /* EQ - ZF=1 */
13375 case UNLT: /* LTU - CF=1 */
13376 case UNLE: /* LEU - CF=1 | ZF=1 */
13377 case LTGT: /* EQ - ZF=0 */
13379 case LT: /* LTU - CF=1 - fails on unordered */
13380 *first_code = UNLT;
13381 *bypass_code = UNORDERED;
13383 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13384 *first_code = UNLE;
13385 *bypass_code = UNORDERED;
13387 case EQ: /* EQ - ZF=1 - fails on unordered */
13388 *first_code = UNEQ;
13389 *bypass_code = UNORDERED;
13391 case NE: /* NE - ZF=0 - fails on unordered */
13392 *first_code = LTGT;
13393 *second_code = UNORDERED;
13395 case UNGE: /* GEU - CF=0 - fails on unordered */
13397 *second_code = UNORDERED;
13399 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13401 *second_code = UNORDERED;
13404 gcc_unreachable ();
13406 if (!TARGET_IEEE_FP)
13408 *second_code = UNKNOWN;
13409 *bypass_code = UNKNOWN;
13413 /* Return cost of comparison done fcom + arithmetics operations on AX.
13414 All following functions do use number of instructions as a cost metrics.
13415 In future this should be tweaked to compute bytes for optimize_size and
13416 take into account performance of various instructions on various CPUs. */
13418 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13420 if (!TARGET_IEEE_FP)
13422 /* The cost of code output by ix86_expand_fp_compare. */
13446 gcc_unreachable ();
13450 /* Return cost of comparison done using fcomi operation.
13451 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13453 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13455 enum rtx_code bypass_code, first_code, second_code;
13456 /* Return arbitrarily high cost when instruction is not supported - this
13457 prevents gcc from using it. */
13460 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13461 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
13464 /* Return cost of comparison done using sahf operation.
13465 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13467 ix86_fp_comparison_sahf_cost (enum rtx_code code)
13469 enum rtx_code bypass_code, first_code, second_code;
13470 /* Return arbitrarily high cost when instruction is not preferred - this
13471 avoids gcc from using it. */
13472 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
13474 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13475 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
13478 /* Compute cost of the comparison done using any method.
13479 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13481 ix86_fp_comparison_cost (enum rtx_code code)
13483 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
13486 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
13487 sahf_cost = ix86_fp_comparison_sahf_cost (code);
13489 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
13490 if (min > sahf_cost)
13492 if (min > fcomi_cost)
13497 /* Return true if we should use an FCOMI instruction for this
13501 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
13503 enum rtx_code swapped_code = swap_condition (code);
13505 return ((ix86_fp_comparison_cost (code)
13506 == ix86_fp_comparison_fcomi_cost (code))
13507 || (ix86_fp_comparison_cost (swapped_code)
13508 == ix86_fp_comparison_fcomi_cost (swapped_code)));
13511 /* Swap, force into registers, or otherwise massage the two operands
13512 to a fp comparison. The operands are updated in place; the new
13513 comparison code is returned. */
13515 static enum rtx_code
13516 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
13518 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
13519 rtx op0 = *pop0, op1 = *pop1;
13520 enum machine_mode op_mode = GET_MODE (op0);
13521 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
13523 /* All of the unordered compare instructions only work on registers.
13524 The same is true of the fcomi compare instructions. The XFmode
13525 compare instructions require registers except when comparing
13526 against zero or when converting operand 1 from fixed point to
13530 && (fpcmp_mode == CCFPUmode
13531 || (op_mode == XFmode
13532 && ! (standard_80387_constant_p (op0) == 1
13533 || standard_80387_constant_p (op1) == 1)
13534 && GET_CODE (op1) != FLOAT)
13535 || ix86_use_fcomi_compare (code)))
13537 op0 = force_reg (op_mode, op0);
13538 op1 = force_reg (op_mode, op1);
13542 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
13543 things around if they appear profitable, otherwise force op0
13544 into a register. */
13546 if (standard_80387_constant_p (op0) == 0
13548 && ! (standard_80387_constant_p (op1) == 0
13552 tmp = op0, op0 = op1, op1 = tmp;
13553 code = swap_condition (code);
13557 op0 = force_reg (op_mode, op0);
13559 if (CONSTANT_P (op1))
13561 int tmp = standard_80387_constant_p (op1);
13563 op1 = validize_mem (force_const_mem (op_mode, op1));
13567 op1 = force_reg (op_mode, op1);
13570 op1 = force_reg (op_mode, op1);
13574 /* Try to rearrange the comparison to make it cheaper. */
13575 if (ix86_fp_comparison_cost (code)
13576 > ix86_fp_comparison_cost (swap_condition (code))
13577 && (REG_P (op1) || can_create_pseudo_p ()))
13580 tmp = op0, op0 = op1, op1 = tmp;
13581 code = swap_condition (code);
13583 op0 = force_reg (op_mode, op0);
13591 /* Convert comparison codes we use to represent FP comparison to integer
13592 code that will result in proper branch. Return UNKNOWN if no such code
13596 ix86_fp_compare_code_to_integer (enum rtx_code code)
13625 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13628 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13629 rtx *second_test, rtx *bypass_test)
13631 enum machine_mode fpcmp_mode, intcmp_mode;
13633 int cost = ix86_fp_comparison_cost (code);
13634 enum rtx_code bypass_code, first_code, second_code;
13636 fpcmp_mode = ix86_fp_compare_mode (code);
13637 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
13640 *second_test = NULL_RTX;
13642 *bypass_test = NULL_RTX;
13644 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13646 /* Do fcomi/sahf based test when profitable. */
13647 if (ix86_fp_comparison_arithmetics_cost (code) > cost
13648 && (bypass_code == UNKNOWN || bypass_test)
13649 && (second_code == UNKNOWN || second_test))
13651 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13652 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
13658 gcc_assert (TARGET_SAHF);
13661 scratch = gen_reg_rtx (HImode);
13662 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
13664 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
13667 /* The FP codes work out to act like unsigned. */
13668 intcmp_mode = fpcmp_mode;
13670 if (bypass_code != UNKNOWN)
13671 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
13672 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13674 if (second_code != UNKNOWN)
13675 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
13676 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13681 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
13682 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13683 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
13685 scratch = gen_reg_rtx (HImode);
13686 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
13688 /* In the unordered case, we have to check C2 for NaN's, which
13689 doesn't happen to work out to anything nice combination-wise.
13690 So do some bit twiddling on the value we've got in AH to come
13691 up with an appropriate set of condition codes. */
13693 intcmp_mode = CCNOmode;
13698 if (code == GT || !TARGET_IEEE_FP)
13700 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13705 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13706 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13707 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
13708 intcmp_mode = CCmode;
13714 if (code == LT && TARGET_IEEE_FP)
13716 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13717 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
13718 intcmp_mode = CCmode;
13723 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
13729 if (code == GE || !TARGET_IEEE_FP)
13731 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
13736 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13737 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13744 if (code == LE && TARGET_IEEE_FP)
13746 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13747 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13748 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13749 intcmp_mode = CCmode;
13754 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13760 if (code == EQ && TARGET_IEEE_FP)
13762 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13763 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13764 intcmp_mode = CCmode;
13769 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13776 if (code == NE && TARGET_IEEE_FP)
13778 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13779 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13785 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13791 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13795 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13800 gcc_unreachable ();
13804 /* Return the test that should be put into the flags user, i.e.
13805 the bcc, scc, or cmov instruction. */
13806 return gen_rtx_fmt_ee (code, VOIDmode,
13807 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13812 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
13815 op0 = ix86_compare_op0;
13816 op1 = ix86_compare_op1;
13819 *second_test = NULL_RTX;
13821 *bypass_test = NULL_RTX;
13823 if (ix86_compare_emitted)
13825 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
13826 ix86_compare_emitted = NULL_RTX;
13828 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
13830 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
13831 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
13832 second_test, bypass_test);
13835 ret = ix86_expand_int_compare (code, op0, op1);
13840 /* Return true if the CODE will result in nontrivial jump sequence. */
13842 ix86_fp_jump_nontrivial_p (enum rtx_code code)
13844 enum rtx_code bypass_code, first_code, second_code;
13847 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13848 return bypass_code != UNKNOWN || second_code != UNKNOWN;
13852 ix86_expand_branch (enum rtx_code code, rtx label)
13856 /* If we have emitted a compare insn, go straight to simple.
13857 ix86_expand_compare won't emit anything if ix86_compare_emitted
13859 if (ix86_compare_emitted)
13862 switch (GET_MODE (ix86_compare_op0))
13868 tmp = ix86_expand_compare (code, NULL, NULL);
13869 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13870 gen_rtx_LABEL_REF (VOIDmode, label),
13872 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13881 enum rtx_code bypass_code, first_code, second_code;
13883 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
13884 &ix86_compare_op1);
13886 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13888 /* Check whether we will use the natural sequence with one jump. If
13889 so, we can expand jump early. Otherwise delay expansion by
13890 creating compound insn to not confuse optimizers. */
13891 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
13893 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
13894 gen_rtx_LABEL_REF (VOIDmode, label),
13895 pc_rtx, NULL_RTX, NULL_RTX);
13899 tmp = gen_rtx_fmt_ee (code, VOIDmode,
13900 ix86_compare_op0, ix86_compare_op1);
13901 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13902 gen_rtx_LABEL_REF (VOIDmode, label),
13904 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
13906 use_fcomi = ix86_use_fcomi_compare (code);
13907 vec = rtvec_alloc (3 + !use_fcomi);
13908 RTVEC_ELT (vec, 0) = tmp;
13910 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
13912 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
13915 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
13917 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
13926 /* Expand DImode branch into multiple compare+branch. */
13928 rtx lo[2], hi[2], label2;
13929 enum rtx_code code1, code2, code3;
13930 enum machine_mode submode;
13932 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
13934 tmp = ix86_compare_op0;
13935 ix86_compare_op0 = ix86_compare_op1;
13936 ix86_compare_op1 = tmp;
13937 code = swap_condition (code);
13939 if (GET_MODE (ix86_compare_op0) == DImode)
13941 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
13942 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
13947 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
13948 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
13952 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
13953 avoid two branches. This costs one extra insn, so disable when
13954 optimizing for size. */
13956 if ((code == EQ || code == NE)
13957 && (!optimize_insn_for_size_p ()
13958 || hi[1] == const0_rtx || lo[1] == const0_rtx))
13963 if (hi[1] != const0_rtx)
13964 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
13965 NULL_RTX, 0, OPTAB_WIDEN);
13968 if (lo[1] != const0_rtx)
13969 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
13970 NULL_RTX, 0, OPTAB_WIDEN);
13972 tmp = expand_binop (submode, ior_optab, xor1, xor0,
13973 NULL_RTX, 0, OPTAB_WIDEN);
13975 ix86_compare_op0 = tmp;
13976 ix86_compare_op1 = const0_rtx;
13977 ix86_expand_branch (code, label);
13981 /* Otherwise, if we are doing less-than or greater-or-equal-than,
13982 op1 is a constant and the low word is zero, then we can just
13983 examine the high word. Similarly for low word -1 and
13984 less-or-equal-than or greater-than. */
13986 if (CONST_INT_P (hi[1]))
13989 case LT: case LTU: case GE: case GEU:
13990 if (lo[1] == const0_rtx)
13992 ix86_compare_op0 = hi[0];
13993 ix86_compare_op1 = hi[1];
13994 ix86_expand_branch (code, label);
13998 case LE: case LEU: case GT: case GTU:
13999 if (lo[1] == constm1_rtx)
14001 ix86_compare_op0 = hi[0];
14002 ix86_compare_op1 = hi[1];
14003 ix86_expand_branch (code, label);
14011 /* Otherwise, we need two or three jumps. */
14013 label2 = gen_label_rtx ();
14016 code2 = swap_condition (code);
14017 code3 = unsigned_condition (code);
14021 case LT: case GT: case LTU: case GTU:
14024 case LE: code1 = LT; code2 = GT; break;
14025 case GE: code1 = GT; code2 = LT; break;
14026 case LEU: code1 = LTU; code2 = GTU; break;
14027 case GEU: code1 = GTU; code2 = LTU; break;
14029 case EQ: code1 = UNKNOWN; code2 = NE; break;
14030 case NE: code2 = UNKNOWN; break;
14033 gcc_unreachable ();
14038 * if (hi(a) < hi(b)) goto true;
14039 * if (hi(a) > hi(b)) goto false;
14040 * if (lo(a) < lo(b)) goto true;
14044 ix86_compare_op0 = hi[0];
14045 ix86_compare_op1 = hi[1];
14047 if (code1 != UNKNOWN)
14048 ix86_expand_branch (code1, label);
14049 if (code2 != UNKNOWN)
14050 ix86_expand_branch (code2, label2);
14052 ix86_compare_op0 = lo[0];
14053 ix86_compare_op1 = lo[1];
14054 ix86_expand_branch (code3, label);
14056 if (code2 != UNKNOWN)
14057 emit_label (label2);
14062 gcc_unreachable ();
14066 /* Split branch based on floating point condition. */
14068 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14069 rtx target1, rtx target2, rtx tmp, rtx pushed)
14071 rtx second, bypass;
14072 rtx label = NULL_RTX;
14074 int bypass_probability = -1, second_probability = -1, probability = -1;
14077 if (target2 != pc_rtx)
14080 code = reverse_condition_maybe_unordered (code);
14085 condition = ix86_expand_fp_compare (code, op1, op2,
14086 tmp, &second, &bypass);
14088 /* Remove pushed operand from stack. */
14090 ix86_free_from_memory (GET_MODE (pushed));
14092 if (split_branch_probability >= 0)
14094 /* Distribute the probabilities across the jumps.
14095 Assume the BYPASS and SECOND to be always test
14097 probability = split_branch_probability;
14099 /* Value of 1 is low enough to make no need for probability
14100 to be updated. Later we may run some experiments and see
14101 if unordered values are more frequent in practice. */
14103 bypass_probability = 1;
14105 second_probability = 1;
14107 if (bypass != NULL_RTX)
14109 label = gen_label_rtx ();
14110 i = emit_jump_insn (gen_rtx_SET
14112 gen_rtx_IF_THEN_ELSE (VOIDmode,
14114 gen_rtx_LABEL_REF (VOIDmode,
14117 if (bypass_probability >= 0)
14119 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14120 GEN_INT (bypass_probability),
14123 i = emit_jump_insn (gen_rtx_SET
14125 gen_rtx_IF_THEN_ELSE (VOIDmode,
14126 condition, target1, target2)));
14127 if (probability >= 0)
14129 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14130 GEN_INT (probability),
14132 if (second != NULL_RTX)
14134 i = emit_jump_insn (gen_rtx_SET
14136 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14138 if (second_probability >= 0)
14140 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14141 GEN_INT (second_probability),
14144 if (label != NULL_RTX)
14145 emit_label (label);
14149 ix86_expand_setcc (enum rtx_code code, rtx dest)
14151 rtx ret, tmp, tmpreg, equiv;
14152 rtx second_test, bypass_test;
14154 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14155 return 0; /* FAIL */
14157 gcc_assert (GET_MODE (dest) == QImode);
14159 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14160 PUT_MODE (ret, QImode);
14165 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14166 if (bypass_test || second_test)
14168 rtx test = second_test;
14170 rtx tmp2 = gen_reg_rtx (QImode);
14173 gcc_assert (!second_test);
14174 test = bypass_test;
14176 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14178 PUT_MODE (test, QImode);
14179 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14182 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14184 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14187 /* Attach a REG_EQUAL note describing the comparison result. */
14188 if (ix86_compare_op0 && ix86_compare_op1)
14190 equiv = simplify_gen_relational (code, QImode,
14191 GET_MODE (ix86_compare_op0),
14192 ix86_compare_op0, ix86_compare_op1);
14193 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14196 return 1; /* DONE */
14199 /* Expand comparison setting or clearing carry flag. Return true when
14200 successful and set pop for the operation. */
14202 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14204 enum machine_mode mode =
14205 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14207 /* Do not handle DImode compares that go through special path. */
14208 if (mode == (TARGET_64BIT ? TImode : DImode))
14211 if (SCALAR_FLOAT_MODE_P (mode))
14213 rtx second_test = NULL, bypass_test = NULL;
14214 rtx compare_op, compare_seq;
14216 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14218 /* Shortcut: following common codes never translate
14219 into carry flag compares. */
14220 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14221 || code == ORDERED || code == UNORDERED)
14224 /* These comparisons require zero flag; swap operands so they won't. */
14225 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14226 && !TARGET_IEEE_FP)
14231 code = swap_condition (code);
14234 /* Try to expand the comparison and verify that we end up with
14235 carry flag based comparison. This fails to be true only when
14236 we decide to expand comparison using arithmetic that is not
14237 too common scenario. */
14239 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14240 &second_test, &bypass_test);
14241 compare_seq = get_insns ();
14244 if (second_test || bypass_test)
14247 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14248 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14249 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14251 code = GET_CODE (compare_op);
14253 if (code != LTU && code != GEU)
14256 emit_insn (compare_seq);
14261 if (!INTEGRAL_MODE_P (mode))
14270 /* Convert a==0 into (unsigned)a<1. */
14273 if (op1 != const0_rtx)
14276 code = (code == EQ ? LTU : GEU);
14279 /* Convert a>b into b<a or a>=b-1. */
14282 if (CONST_INT_P (op1))
14284 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14285 /* Bail out on overflow. We still can swap operands but that
14286 would force loading of the constant into register. */
14287 if (op1 == const0_rtx
14288 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14290 code = (code == GTU ? GEU : LTU);
14297 code = (code == GTU ? LTU : GEU);
14301 /* Convert a>=0 into (unsigned)a<0x80000000. */
14304 if (mode == DImode || op1 != const0_rtx)
14306 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14307 code = (code == LT ? GEU : LTU);
14311 if (mode == DImode || op1 != constm1_rtx)
14313 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14314 code = (code == LE ? GEU : LTU);
14320 /* Swapping operands may cause constant to appear as first operand. */
14321 if (!nonimmediate_operand (op0, VOIDmode))
14323 if (!can_create_pseudo_p ())
14325 op0 = force_reg (mode, op0);
14327 ix86_compare_op0 = op0;
14328 ix86_compare_op1 = op1;
14329 *pop = ix86_expand_compare (code, NULL, NULL);
14330 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14335 ix86_expand_int_movcc (rtx operands[])
14337 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14338 rtx compare_seq, compare_op;
14339 rtx second_test, bypass_test;
14340 enum machine_mode mode = GET_MODE (operands[0]);
14341 bool sign_bit_compare_p = false;;
14344 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14345 compare_seq = get_insns ();
14348 compare_code = GET_CODE (compare_op);
14350 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14351 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14352 sign_bit_compare_p = true;
14354 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14355 HImode insns, we'd be swallowed in word prefix ops. */
14357 if ((mode != HImode || TARGET_FAST_PREFIX)
14358 && (mode != (TARGET_64BIT ? TImode : DImode))
14359 && CONST_INT_P (operands[2])
14360 && CONST_INT_P (operands[3]))
14362 rtx out = operands[0];
14363 HOST_WIDE_INT ct = INTVAL (operands[2]);
14364 HOST_WIDE_INT cf = INTVAL (operands[3]);
14365 HOST_WIDE_INT diff;
14368 /* Sign bit compares are better done using shifts than we do by using
14370 if (sign_bit_compare_p
14371 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14372 ix86_compare_op1, &compare_op))
14374 /* Detect overlap between destination and compare sources. */
14377 if (!sign_bit_compare_p)
14379 bool fpcmp = false;
14381 compare_code = GET_CODE (compare_op);
14383 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14384 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14387 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14390 /* To simplify rest of code, restrict to the GEU case. */
14391 if (compare_code == LTU)
14393 HOST_WIDE_INT tmp = ct;
14396 compare_code = reverse_condition (compare_code);
14397 code = reverse_condition (code);
14402 PUT_CODE (compare_op,
14403 reverse_condition_maybe_unordered
14404 (GET_CODE (compare_op)));
14406 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14410 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14411 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14412 tmp = gen_reg_rtx (mode);
14414 if (mode == DImode)
14415 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14417 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14421 if (code == GT || code == GE)
14422 code = reverse_condition (code);
14425 HOST_WIDE_INT tmp = ct;
14430 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14431 ix86_compare_op1, VOIDmode, 0, -1);
14444 tmp = expand_simple_binop (mode, PLUS,
14446 copy_rtx (tmp), 1, OPTAB_DIRECT);
14457 tmp = expand_simple_binop (mode, IOR,
14459 copy_rtx (tmp), 1, OPTAB_DIRECT);
14461 else if (diff == -1 && ct)
14471 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14473 tmp = expand_simple_binop (mode, PLUS,
14474 copy_rtx (tmp), GEN_INT (cf),
14475 copy_rtx (tmp), 1, OPTAB_DIRECT);
14483 * andl cf - ct, dest
14493 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14496 tmp = expand_simple_binop (mode, AND,
14498 gen_int_mode (cf - ct, mode),
14499 copy_rtx (tmp), 1, OPTAB_DIRECT);
14501 tmp = expand_simple_binop (mode, PLUS,
14502 copy_rtx (tmp), GEN_INT (ct),
14503 copy_rtx (tmp), 1, OPTAB_DIRECT);
14506 if (!rtx_equal_p (tmp, out))
14507 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
14509 return 1; /* DONE */
14514 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14517 tmp = ct, ct = cf, cf = tmp;
14520 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14522 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14524 /* We may be reversing unordered compare to normal compare, that
14525 is not valid in general (we may convert non-trapping condition
14526 to trapping one), however on i386 we currently emit all
14527 comparisons unordered. */
14528 compare_code = reverse_condition_maybe_unordered (compare_code);
14529 code = reverse_condition_maybe_unordered (code);
14533 compare_code = reverse_condition (compare_code);
14534 code = reverse_condition (code);
14538 compare_code = UNKNOWN;
14539 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
14540 && CONST_INT_P (ix86_compare_op1))
14542 if (ix86_compare_op1 == const0_rtx
14543 && (code == LT || code == GE))
14544 compare_code = code;
14545 else if (ix86_compare_op1 == constm1_rtx)
14549 else if (code == GT)
14554 /* Optimize dest = (op0 < 0) ? -1 : cf. */
14555 if (compare_code != UNKNOWN
14556 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
14557 && (cf == -1 || ct == -1))
14559 /* If lea code below could be used, only optimize
14560 if it results in a 2 insn sequence. */
14562 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
14563 || diff == 3 || diff == 5 || diff == 9)
14564 || (compare_code == LT && ct == -1)
14565 || (compare_code == GE && cf == -1))
14568 * notl op1 (if necessary)
14576 code = reverse_condition (code);
14579 out = emit_store_flag (out, code, ix86_compare_op0,
14580 ix86_compare_op1, VOIDmode, 0, -1);
14582 out = expand_simple_binop (mode, IOR,
14584 out, 1, OPTAB_DIRECT);
14585 if (out != operands[0])
14586 emit_move_insn (operands[0], out);
14588 return 1; /* DONE */
14593 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14594 || diff == 3 || diff == 5 || diff == 9)
14595 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14597 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14603 * lea cf(dest*(ct-cf)),dest
14607 * This also catches the degenerate setcc-only case.
14613 out = emit_store_flag (out, code, ix86_compare_op0,
14614 ix86_compare_op1, VOIDmode, 0, 1);
14617 /* On x86_64 the lea instruction operates on Pmode, so we need
14618 to get arithmetics done in proper mode to match. */
14620 tmp = copy_rtx (out);
14624 out1 = copy_rtx (out);
14625 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14629 tmp = gen_rtx_PLUS (mode, tmp, out1);
14635 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14638 if (!rtx_equal_p (tmp, out))
14641 out = force_operand (tmp, copy_rtx (out));
14643 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
14645 if (!rtx_equal_p (out, operands[0]))
14646 emit_move_insn (operands[0], copy_rtx (out));
14648 return 1; /* DONE */
14652 * General case: Jumpful:
14653 * xorl dest,dest cmpl op1, op2
14654 * cmpl op1, op2 movl ct, dest
14655 * setcc dest jcc 1f
14656 * decl dest movl cf, dest
14657 * andl (cf-ct),dest 1:
14660 * Size 20. Size 14.
14662 * This is reasonably steep, but branch mispredict costs are
14663 * high on modern cpus, so consider failing only if optimizing
14667 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14668 && BRANCH_COST (optimize_insn_for_speed_p (),
14673 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14678 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14680 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14682 /* We may be reversing unordered compare to normal compare,
14683 that is not valid in general (we may convert non-trapping
14684 condition to trapping one), however on i386 we currently
14685 emit all comparisons unordered. */
14686 code = reverse_condition_maybe_unordered (code);
14690 code = reverse_condition (code);
14691 if (compare_code != UNKNOWN)
14692 compare_code = reverse_condition (compare_code);
14696 if (compare_code != UNKNOWN)
14698 /* notl op1 (if needed)
14703 For x < 0 (resp. x <= -1) there will be no notl,
14704 so if possible swap the constants to get rid of the
14706 True/false will be -1/0 while code below (store flag
14707 followed by decrement) is 0/-1, so the constants need
14708 to be exchanged once more. */
14710 if (compare_code == GE || !cf)
14712 code = reverse_condition (code);
14717 HOST_WIDE_INT tmp = cf;
14722 out = emit_store_flag (out, code, ix86_compare_op0,
14723 ix86_compare_op1, VOIDmode, 0, -1);
14727 out = emit_store_flag (out, code, ix86_compare_op0,
14728 ix86_compare_op1, VOIDmode, 0, 1);
14730 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
14731 copy_rtx (out), 1, OPTAB_DIRECT);
14734 out = expand_simple_binop (mode, AND, copy_rtx (out),
14735 gen_int_mode (cf - ct, mode),
14736 copy_rtx (out), 1, OPTAB_DIRECT);
14738 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
14739 copy_rtx (out), 1, OPTAB_DIRECT);
14740 if (!rtx_equal_p (out, operands[0]))
14741 emit_move_insn (operands[0], copy_rtx (out));
14743 return 1; /* DONE */
14747 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14749 /* Try a few things more with specific constants and a variable. */
14752 rtx var, orig_out, out, tmp;
14754 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
14755 return 0; /* FAIL */
14757 /* If one of the two operands is an interesting constant, load a
14758 constant with the above and mask it in with a logical operation. */
14760 if (CONST_INT_P (operands[2]))
14763 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
14764 operands[3] = constm1_rtx, op = and_optab;
14765 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
14766 operands[3] = const0_rtx, op = ior_optab;
14768 return 0; /* FAIL */
14770 else if (CONST_INT_P (operands[3]))
14773 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
14774 operands[2] = constm1_rtx, op = and_optab;
14775 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
14776 operands[2] = const0_rtx, op = ior_optab;
14778 return 0; /* FAIL */
14781 return 0; /* FAIL */
14783 orig_out = operands[0];
14784 tmp = gen_reg_rtx (mode);
14787 /* Recurse to get the constant loaded. */
14788 if (ix86_expand_int_movcc (operands) == 0)
14789 return 0; /* FAIL */
14791 /* Mask in the interesting variable. */
14792 out = expand_binop (mode, op, var, tmp, orig_out, 0,
14794 if (!rtx_equal_p (out, orig_out))
14795 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
14797 return 1; /* DONE */
14801 * For comparison with above,
14811 if (! nonimmediate_operand (operands[2], mode))
14812 operands[2] = force_reg (mode, operands[2]);
14813 if (! nonimmediate_operand (operands[3], mode))
14814 operands[3] = force_reg (mode, operands[3]);
14816 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
14818 rtx tmp = gen_reg_rtx (mode);
14819 emit_move_insn (tmp, operands[3]);
14822 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
14824 rtx tmp = gen_reg_rtx (mode);
14825 emit_move_insn (tmp, operands[2]);
14829 if (! register_operand (operands[2], VOIDmode)
14831 || ! register_operand (operands[3], VOIDmode)))
14832 operands[2] = force_reg (mode, operands[2]);
14835 && ! register_operand (operands[3], VOIDmode))
14836 operands[3] = force_reg (mode, operands[3]);
14838 emit_insn (compare_seq);
14839 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14840 gen_rtx_IF_THEN_ELSE (mode,
14841 compare_op, operands[2],
14844 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14845 gen_rtx_IF_THEN_ELSE (mode,
14847 copy_rtx (operands[3]),
14848 copy_rtx (operands[0]))));
14850 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14851 gen_rtx_IF_THEN_ELSE (mode,
14853 copy_rtx (operands[2]),
14854 copy_rtx (operands[0]))));
14856 return 1; /* DONE */
14859 /* Swap, force into registers, or otherwise massage the two operands
14860 to an sse comparison with a mask result. Thus we differ a bit from
14861 ix86_prepare_fp_compare_args which expects to produce a flags result.
14863 The DEST operand exists to help determine whether to commute commutative
14864 operators. The POP0/POP1 operands are updated in place. The new
14865 comparison code is returned, or UNKNOWN if not implementable. */
14867 static enum rtx_code
14868 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
14869 rtx *pop0, rtx *pop1)
14877 /* We have no LTGT as an operator. We could implement it with
14878 NE & ORDERED, but this requires an extra temporary. It's
14879 not clear that it's worth it. */
14886 /* These are supported directly. */
14893 /* For commutative operators, try to canonicalize the destination
14894 operand to be first in the comparison - this helps reload to
14895 avoid extra moves. */
14896 if (!dest || !rtx_equal_p (dest, *pop1))
14904 /* These are not supported directly. Swap the comparison operands
14905 to transform into something that is supported. */
14909 code = swap_condition (code);
14913 gcc_unreachable ();
14919 /* Detect conditional moves that exactly match min/max operational
14920 semantics. Note that this is IEEE safe, as long as we don't
14921 interchange the operands.
14923 Returns FALSE if this conditional move doesn't match a MIN/MAX,
14924 and TRUE if the operation is successful and instructions are emitted. */
14927 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
14928 rtx cmp_op1, rtx if_true, rtx if_false)
14930 enum machine_mode mode;
14936 else if (code == UNGE)
14939 if_true = if_false;
14945 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
14947 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
14952 mode = GET_MODE (dest);
14954 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
14955 but MODE may be a vector mode and thus not appropriate. */
14956 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
14958 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
14961 if_true = force_reg (mode, if_true);
14962 v = gen_rtvec (2, if_true, if_false);
14963 tmp = gen_rtx_UNSPEC (mode, v, u);
14967 code = is_min ? SMIN : SMAX;
14968 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
14971 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
14975 /* Expand an sse vector comparison. Return the register with the result. */
14978 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
14979 rtx op_true, rtx op_false)
14981 enum machine_mode mode = GET_MODE (dest);
14984 cmp_op0 = force_reg (mode, cmp_op0);
14985 if (!nonimmediate_operand (cmp_op1, mode))
14986 cmp_op1 = force_reg (mode, cmp_op1);
14989 || reg_overlap_mentioned_p (dest, op_true)
14990 || reg_overlap_mentioned_p (dest, op_false))
14991 dest = gen_reg_rtx (mode);
14993 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
14994 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14999 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15000 operations. This is used for both scalar and vector conditional moves. */
15003 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15005 enum machine_mode mode = GET_MODE (dest);
15008 if (op_false == CONST0_RTX (mode))
15010 op_true = force_reg (mode, op_true);
15011 x = gen_rtx_AND (mode, cmp, op_true);
15012 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15014 else if (op_true == CONST0_RTX (mode))
15016 op_false = force_reg (mode, op_false);
15017 x = gen_rtx_NOT (mode, cmp);
15018 x = gen_rtx_AND (mode, x, op_false);
15019 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15021 else if (TARGET_SSE5)
15023 rtx pcmov = gen_rtx_SET (mode, dest,
15024 gen_rtx_IF_THEN_ELSE (mode, cmp,
15031 op_true = force_reg (mode, op_true);
15032 op_false = force_reg (mode, op_false);
15034 t2 = gen_reg_rtx (mode);
15036 t3 = gen_reg_rtx (mode);
15040 x = gen_rtx_AND (mode, op_true, cmp);
15041 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15043 x = gen_rtx_NOT (mode, cmp);
15044 x = gen_rtx_AND (mode, x, op_false);
15045 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15047 x = gen_rtx_IOR (mode, t3, t2);
15048 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15052 /* Expand a floating-point conditional move. Return true if successful. */
15055 ix86_expand_fp_movcc (rtx operands[])
15057 enum machine_mode mode = GET_MODE (operands[0]);
15058 enum rtx_code code = GET_CODE (operands[1]);
15059 rtx tmp, compare_op, second_test, bypass_test;
15061 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15063 enum machine_mode cmode;
15065 /* Since we've no cmove for sse registers, don't force bad register
15066 allocation just to gain access to it. Deny movcc when the
15067 comparison mode doesn't match the move mode. */
15068 cmode = GET_MODE (ix86_compare_op0);
15069 if (cmode == VOIDmode)
15070 cmode = GET_MODE (ix86_compare_op1);
15074 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15076 &ix86_compare_op1);
15077 if (code == UNKNOWN)
15080 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15081 ix86_compare_op1, operands[2],
15085 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15086 ix86_compare_op1, operands[2], operands[3]);
15087 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15091 /* The floating point conditional move instructions don't directly
15092 support conditions resulting from a signed integer comparison. */
15094 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15096 /* The floating point conditional move instructions don't directly
15097 support signed integer comparisons. */
15099 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15101 gcc_assert (!second_test && !bypass_test);
15102 tmp = gen_reg_rtx (QImode);
15103 ix86_expand_setcc (code, tmp);
15105 ix86_compare_op0 = tmp;
15106 ix86_compare_op1 = const0_rtx;
15107 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15109 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15111 tmp = gen_reg_rtx (mode);
15112 emit_move_insn (tmp, operands[3]);
15115 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15117 tmp = gen_reg_rtx (mode);
15118 emit_move_insn (tmp, operands[2]);
15122 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15123 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15124 operands[2], operands[3])));
15126 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15127 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15128 operands[3], operands[0])));
15130 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15131 gen_rtx_IF_THEN_ELSE (mode, second_test,
15132 operands[2], operands[0])));
15137 /* Expand a floating-point vector conditional move; a vcond operation
15138 rather than a movcc operation. */
15141 ix86_expand_fp_vcond (rtx operands[])
15143 enum rtx_code code = GET_CODE (operands[3]);
15146 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15147 &operands[4], &operands[5]);
15148 if (code == UNKNOWN)
15151 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15152 operands[5], operands[1], operands[2]))
15155 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15156 operands[1], operands[2]);
15157 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15161 /* Expand a signed/unsigned integral vector conditional move. */
15164 ix86_expand_int_vcond (rtx operands[])
15166 enum machine_mode mode = GET_MODE (operands[0]);
15167 enum rtx_code code = GET_CODE (operands[3]);
15168 bool negate = false;
15171 cop0 = operands[4];
15172 cop1 = operands[5];
15174 /* SSE5 supports all of the comparisons on all vector int types. */
15177 /* Canonicalize the comparison to EQ, GT, GTU. */
15188 code = reverse_condition (code);
15194 code = reverse_condition (code);
15200 code = swap_condition (code);
15201 x = cop0, cop0 = cop1, cop1 = x;
15205 gcc_unreachable ();
15208 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15209 if (mode == V2DImode)
15214 /* SSE4.1 supports EQ. */
15215 if (!TARGET_SSE4_1)
15221 /* SSE4.2 supports GT/GTU. */
15222 if (!TARGET_SSE4_2)
15227 gcc_unreachable ();
15231 /* Unsigned parallel compare is not supported by the hardware. Play some
15232 tricks to turn this into a signed comparison against 0. */
15235 cop0 = force_reg (mode, cop0);
15244 /* Perform a parallel modulo subtraction. */
15245 t1 = gen_reg_rtx (mode);
15246 emit_insn ((mode == V4SImode
15248 : gen_subv2di3) (t1, cop0, cop1));
15250 /* Extract the original sign bit of op0. */
15251 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15253 t2 = gen_reg_rtx (mode);
15254 emit_insn ((mode == V4SImode
15256 : gen_andv2di3) (t2, cop0, mask));
15258 /* XOR it back into the result of the subtraction. This results
15259 in the sign bit set iff we saw unsigned underflow. */
15260 x = gen_reg_rtx (mode);
15261 emit_insn ((mode == V4SImode
15263 : gen_xorv2di3) (x, t1, t2));
15271 /* Perform a parallel unsigned saturating subtraction. */
15272 x = gen_reg_rtx (mode);
15273 emit_insn (gen_rtx_SET (VOIDmode, x,
15274 gen_rtx_US_MINUS (mode, cop0, cop1)));
15281 gcc_unreachable ();
15285 cop1 = CONST0_RTX (mode);
15289 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15290 operands[1+negate], operands[2-negate]);
15292 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15293 operands[2-negate]);
15297 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15298 true if we should do zero extension, else sign extension. HIGH_P is
15299 true if we want the N/2 high elements, else the low elements. */
15302 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15304 enum machine_mode imode = GET_MODE (operands[1]);
15305 rtx (*unpack)(rtx, rtx, rtx);
15312 unpack = gen_vec_interleave_highv16qi;
15314 unpack = gen_vec_interleave_lowv16qi;
15318 unpack = gen_vec_interleave_highv8hi;
15320 unpack = gen_vec_interleave_lowv8hi;
15324 unpack = gen_vec_interleave_highv4si;
15326 unpack = gen_vec_interleave_lowv4si;
15329 gcc_unreachable ();
15332 dest = gen_lowpart (imode, operands[0]);
15335 se = force_reg (imode, CONST0_RTX (imode));
15337 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15338 operands[1], pc_rtx, pc_rtx);
15340 emit_insn (unpack (dest, operands[1], se));
15343 /* This function performs the same task as ix86_expand_sse_unpack,
15344 but with SSE4.1 instructions. */
15347 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15349 enum machine_mode imode = GET_MODE (operands[1]);
15350 rtx (*unpack)(rtx, rtx);
15357 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15359 unpack = gen_sse4_1_extendv8qiv8hi2;
15363 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15365 unpack = gen_sse4_1_extendv4hiv4si2;
15369 unpack = gen_sse4_1_zero_extendv2siv2di2;
15371 unpack = gen_sse4_1_extendv2siv2di2;
15374 gcc_unreachable ();
15377 dest = operands[0];
15380 /* Shift higher 8 bytes to lower 8 bytes. */
15381 src = gen_reg_rtx (imode);
15382 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15383 gen_lowpart (TImode, operands[1]),
15389 emit_insn (unpack (dest, src));
15392 /* This function performs the same task as ix86_expand_sse_unpack,
15393 but with sse5 instructions. */
15396 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15398 enum machine_mode imode = GET_MODE (operands[1]);
15399 int pperm_bytes[16];
15401 int h = (high_p) ? 8 : 0;
15404 rtvec v = rtvec_alloc (16);
15407 rtx op0 = operands[0], op1 = operands[1];
15412 vs = rtvec_alloc (8);
15413 h2 = (high_p) ? 8 : 0;
15414 for (i = 0; i < 8; i++)
15416 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15417 pperm_bytes[2*i+1] = ((unsigned_p)
15419 : PPERM_SIGN | PPERM_SRC2 | i | h);
15422 for (i = 0; i < 16; i++)
15423 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15425 for (i = 0; i < 8; i++)
15426 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15428 p = gen_rtx_PARALLEL (VOIDmode, vs);
15429 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15431 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15433 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15437 vs = rtvec_alloc (4);
15438 h2 = (high_p) ? 4 : 0;
15439 for (i = 0; i < 4; i++)
15441 sign_extend = ((unsigned_p)
15443 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15444 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15445 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15446 pperm_bytes[4*i+2] = sign_extend;
15447 pperm_bytes[4*i+3] = sign_extend;
15450 for (i = 0; i < 16; i++)
15451 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15453 for (i = 0; i < 4; i++)
15454 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15456 p = gen_rtx_PARALLEL (VOIDmode, vs);
15457 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15459 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15461 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15465 vs = rtvec_alloc (2);
15466 h2 = (high_p) ? 2 : 0;
15467 for (i = 0; i < 2; i++)
15469 sign_extend = ((unsigned_p)
15471 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
15472 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
15473 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
15474 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
15475 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
15476 pperm_bytes[8*i+4] = sign_extend;
15477 pperm_bytes[8*i+5] = sign_extend;
15478 pperm_bytes[8*i+6] = sign_extend;
15479 pperm_bytes[8*i+7] = sign_extend;
15482 for (i = 0; i < 16; i++)
15483 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15485 for (i = 0; i < 2; i++)
15486 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15488 p = gen_rtx_PARALLEL (VOIDmode, vs);
15489 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15491 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
15493 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
15497 gcc_unreachable ();
15503 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
15504 next narrower integer vector type */
15506 ix86_expand_sse5_pack (rtx operands[3])
15508 enum machine_mode imode = GET_MODE (operands[0]);
15509 int pperm_bytes[16];
15511 rtvec v = rtvec_alloc (16);
15513 rtx op0 = operands[0];
15514 rtx op1 = operands[1];
15515 rtx op2 = operands[2];
15520 for (i = 0; i < 8; i++)
15522 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
15523 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
15526 for (i = 0; i < 16; i++)
15527 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15529 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15530 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
15534 for (i = 0; i < 4; i++)
15536 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
15537 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
15538 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
15539 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
15542 for (i = 0; i < 16; i++)
15543 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15545 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15546 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
15550 for (i = 0; i < 2; i++)
15552 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
15553 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
15554 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
15555 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
15556 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
15557 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
15558 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
15559 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
15562 for (i = 0; i < 16; i++)
15563 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15565 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15566 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
15570 gcc_unreachable ();
15576 /* Expand conditional increment or decrement using adb/sbb instructions.
15577 The default case using setcc followed by the conditional move can be
15578 done by generic code. */
15580 ix86_expand_int_addcc (rtx operands[])
15582 enum rtx_code code = GET_CODE (operands[1]);
15584 rtx val = const0_rtx;
15585 bool fpcmp = false;
15586 enum machine_mode mode = GET_MODE (operands[0]);
15588 if (operands[3] != const1_rtx
15589 && operands[3] != constm1_rtx)
15591 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15592 ix86_compare_op1, &compare_op))
15594 code = GET_CODE (compare_op);
15596 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15597 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15600 code = ix86_fp_compare_code_to_integer (code);
15607 PUT_CODE (compare_op,
15608 reverse_condition_maybe_unordered
15609 (GET_CODE (compare_op)));
15611 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15613 PUT_MODE (compare_op, mode);
15615 /* Construct either adc or sbb insn. */
15616 if ((code == LTU) == (operands[3] == constm1_rtx))
15618 switch (GET_MODE (operands[0]))
15621 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15624 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15627 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15630 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15633 gcc_unreachable ();
15638 switch (GET_MODE (operands[0]))
15641 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
15644 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
15647 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
15650 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15653 gcc_unreachable ();
15656 return 1; /* DONE */
15660 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
15661 works for floating pointer parameters and nonoffsetable memories.
15662 For pushes, it returns just stack offsets; the values will be saved
15663 in the right order. Maximally three parts are generated. */
15666 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
15671 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
15673 size = (GET_MODE_SIZE (mode) + 4) / 8;
15675 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
15676 gcc_assert (size >= 2 && size <= 4);
15678 /* Optimize constant pool reference to immediates. This is used by fp
15679 moves, that force all constants to memory to allow combining. */
15680 if (MEM_P (operand) && MEM_READONLY_P (operand))
15682 rtx tmp = maybe_get_pool_constant (operand);
15687 if (MEM_P (operand) && !offsettable_memref_p (operand))
15689 /* The only non-offsetable memories we handle are pushes. */
15690 int ok = push_operand (operand, VOIDmode);
15694 operand = copy_rtx (operand);
15695 PUT_MODE (operand, Pmode);
15696 parts[0] = parts[1] = parts[2] = parts[3] = operand;
15700 if (GET_CODE (operand) == CONST_VECTOR)
15702 enum machine_mode imode = int_mode_for_mode (mode);
15703 /* Caution: if we looked through a constant pool memory above,
15704 the operand may actually have a different mode now. That's
15705 ok, since we want to pun this all the way back to an integer. */
15706 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
15707 gcc_assert (operand != NULL);
15713 if (mode == DImode)
15714 split_di (&operand, 1, &parts[0], &parts[1]);
15719 if (REG_P (operand))
15721 gcc_assert (reload_completed);
15722 for (i = 0; i < size; i++)
15723 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
15725 else if (offsettable_memref_p (operand))
15727 operand = adjust_address (operand, SImode, 0);
15728 parts[0] = operand;
15729 for (i = 1; i < size; i++)
15730 parts[i] = adjust_address (operand, SImode, 4 * i);
15732 else if (GET_CODE (operand) == CONST_DOUBLE)
15737 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15741 real_to_target (l, &r, mode);
15742 parts[3] = gen_int_mode (l[3], SImode);
15743 parts[2] = gen_int_mode (l[2], SImode);
15746 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
15747 parts[2] = gen_int_mode (l[2], SImode);
15750 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15753 gcc_unreachable ();
15755 parts[1] = gen_int_mode (l[1], SImode);
15756 parts[0] = gen_int_mode (l[0], SImode);
15759 gcc_unreachable ();
15764 if (mode == TImode)
15765 split_ti (&operand, 1, &parts[0], &parts[1]);
15766 if (mode == XFmode || mode == TFmode)
15768 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
15769 if (REG_P (operand))
15771 gcc_assert (reload_completed);
15772 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
15773 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
15775 else if (offsettable_memref_p (operand))
15777 operand = adjust_address (operand, DImode, 0);
15778 parts[0] = operand;
15779 parts[1] = adjust_address (operand, upper_mode, 8);
15781 else if (GET_CODE (operand) == CONST_DOUBLE)
15786 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15787 real_to_target (l, &r, mode);
15789 /* Do not use shift by 32 to avoid warning on 32bit systems. */
15790 if (HOST_BITS_PER_WIDE_INT >= 64)
15793 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
15794 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
15797 parts[0] = immed_double_const (l[0], l[1], DImode);
15799 if (upper_mode == SImode)
15800 parts[1] = gen_int_mode (l[2], SImode);
15801 else if (HOST_BITS_PER_WIDE_INT >= 64)
15804 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
15805 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
15808 parts[1] = immed_double_const (l[2], l[3], DImode);
15811 gcc_unreachable ();
15818 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
15819 Return false when normal moves are needed; true when all required
15820 insns have been emitted. Operands 2-4 contain the input values
15821 int the correct order; operands 5-7 contain the output values. */
15824 ix86_split_long_move (rtx operands[])
15829 int collisions = 0;
15830 enum machine_mode mode = GET_MODE (operands[0]);
15831 bool collisionparts[4];
15833 /* The DFmode expanders may ask us to move double.
15834 For 64bit target this is single move. By hiding the fact
15835 here we simplify i386.md splitters. */
15836 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
15838 /* Optimize constant pool reference to immediates. This is used by
15839 fp moves, that force all constants to memory to allow combining. */
15841 if (MEM_P (operands[1])
15842 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
15843 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
15844 operands[1] = get_pool_constant (XEXP (operands[1], 0));
15845 if (push_operand (operands[0], VOIDmode))
15847 operands[0] = copy_rtx (operands[0]);
15848 PUT_MODE (operands[0], Pmode);
15851 operands[0] = gen_lowpart (DImode, operands[0]);
15852 operands[1] = gen_lowpart (DImode, operands[1]);
15853 emit_move_insn (operands[0], operands[1]);
15857 /* The only non-offsettable memory we handle is push. */
15858 if (push_operand (operands[0], VOIDmode))
15861 gcc_assert (!MEM_P (operands[0])
15862 || offsettable_memref_p (operands[0]));
15864 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
15865 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
15867 /* When emitting push, take care for source operands on the stack. */
15868 if (push && MEM_P (operands[1])
15869 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
15870 for (i = 0; i < nparts - 1; i++)
15871 part[1][i] = change_address (part[1][i],
15872 GET_MODE (part[1][i]),
15873 XEXP (part[1][i + 1], 0));
15875 /* We need to do copy in the right order in case an address register
15876 of the source overlaps the destination. */
15877 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
15881 for (i = 0; i < nparts; i++)
15884 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
15885 if (collisionparts[i])
15889 /* Collision in the middle part can be handled by reordering. */
15890 if (collisions == 1 && nparts == 3 && collisionparts [1])
15892 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15893 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15895 else if (collisions == 1
15897 && (collisionparts [1] || collisionparts [2]))
15899 if (collisionparts [1])
15901 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15902 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15906 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
15907 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
15911 /* If there are more collisions, we can't handle it by reordering.
15912 Do an lea to the last part and use only one colliding move. */
15913 else if (collisions > 1)
15919 base = part[0][nparts - 1];
15921 /* Handle the case when the last part isn't valid for lea.
15922 Happens in 64-bit mode storing the 12-byte XFmode. */
15923 if (GET_MODE (base) != Pmode)
15924 base = gen_rtx_REG (Pmode, REGNO (base));
15926 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
15927 part[1][0] = replace_equiv_address (part[1][0], base);
15928 for (i = 1; i < nparts; i++)
15930 tmp = plus_constant (base, UNITS_PER_WORD * i);
15931 part[1][i] = replace_equiv_address (part[1][i], tmp);
15942 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
15943 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
15944 emit_move_insn (part[0][2], part[1][2]);
15946 else if (nparts == 4)
15948 emit_move_insn (part[0][3], part[1][3]);
15949 emit_move_insn (part[0][2], part[1][2]);
15954 /* In 64bit mode we don't have 32bit push available. In case this is
15955 register, it is OK - we will just use larger counterpart. We also
15956 retype memory - these comes from attempt to avoid REX prefix on
15957 moving of second half of TFmode value. */
15958 if (GET_MODE (part[1][1]) == SImode)
15960 switch (GET_CODE (part[1][1]))
15963 part[1][1] = adjust_address (part[1][1], DImode, 0);
15967 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
15971 gcc_unreachable ();
15974 if (GET_MODE (part[1][0]) == SImode)
15975 part[1][0] = part[1][1];
15978 emit_move_insn (part[0][1], part[1][1]);
15979 emit_move_insn (part[0][0], part[1][0]);
15983 /* Choose correct order to not overwrite the source before it is copied. */
15984 if ((REG_P (part[0][0])
15985 && REG_P (part[1][1])
15986 && (REGNO (part[0][0]) == REGNO (part[1][1])
15988 && REGNO (part[0][0]) == REGNO (part[1][2]))
15990 && REGNO (part[0][0]) == REGNO (part[1][3]))))
15992 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
15994 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
15996 operands[2 + i] = part[0][j];
15997 operands[6 + i] = part[1][j];
16002 for (i = 0; i < nparts; i++)
16004 operands[2 + i] = part[0][i];
16005 operands[6 + i] = part[1][i];
16009 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16010 if (optimize_insn_for_size_p ())
16012 for (j = 0; j < nparts - 1; j++)
16013 if (CONST_INT_P (operands[6 + j])
16014 && operands[6 + j] != const0_rtx
16015 && REG_P (operands[2 + j]))
16016 for (i = j; i < nparts - 1; i++)
16017 if (CONST_INT_P (operands[7 + i])
16018 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16019 operands[7 + i] = operands[2 + j];
16022 for (i = 0; i < nparts; i++)
16023 emit_move_insn (operands[2 + i], operands[6 + i]);
16028 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16029 left shift by a constant, either using a single shift or
16030 a sequence of add instructions. */
16033 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16037 emit_insn ((mode == DImode
16039 : gen_adddi3) (operand, operand, operand));
16041 else if (!optimize_insn_for_size_p ()
16042 && count * ix86_cost->add <= ix86_cost->shift_const)
16045 for (i=0; i<count; i++)
16047 emit_insn ((mode == DImode
16049 : gen_adddi3) (operand, operand, operand));
16053 emit_insn ((mode == DImode
16055 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16059 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16061 rtx low[2], high[2];
16063 const int single_width = mode == DImode ? 32 : 64;
16065 if (CONST_INT_P (operands[2]))
16067 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16068 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16070 if (count >= single_width)
16072 emit_move_insn (high[0], low[1]);
16073 emit_move_insn (low[0], const0_rtx);
16075 if (count > single_width)
16076 ix86_expand_ashl_const (high[0], count - single_width, mode);
16080 if (!rtx_equal_p (operands[0], operands[1]))
16081 emit_move_insn (operands[0], operands[1]);
16082 emit_insn ((mode == DImode
16084 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16085 ix86_expand_ashl_const (low[0], count, mode);
16090 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16092 if (operands[1] == const1_rtx)
16094 /* Assuming we've chosen a QImode capable registers, then 1 << N
16095 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16096 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16098 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16100 ix86_expand_clear (low[0]);
16101 ix86_expand_clear (high[0]);
16102 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16104 d = gen_lowpart (QImode, low[0]);
16105 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16106 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16107 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16109 d = gen_lowpart (QImode, high[0]);
16110 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16111 s = gen_rtx_NE (QImode, flags, const0_rtx);
16112 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16115 /* Otherwise, we can get the same results by manually performing
16116 a bit extract operation on bit 5/6, and then performing the two
16117 shifts. The two methods of getting 0/1 into low/high are exactly
16118 the same size. Avoiding the shift in the bit extract case helps
16119 pentium4 a bit; no one else seems to care much either way. */
16124 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16125 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16127 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16128 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16130 emit_insn ((mode == DImode
16132 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16133 emit_insn ((mode == DImode
16135 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16136 emit_move_insn (low[0], high[0]);
16137 emit_insn ((mode == DImode
16139 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16142 emit_insn ((mode == DImode
16144 : gen_ashldi3) (low[0], low[0], operands[2]));
16145 emit_insn ((mode == DImode
16147 : gen_ashldi3) (high[0], high[0], operands[2]));
16151 if (operands[1] == constm1_rtx)
16153 /* For -1 << N, we can avoid the shld instruction, because we
16154 know that we're shifting 0...31/63 ones into a -1. */
16155 emit_move_insn (low[0], constm1_rtx);
16156 if (optimize_insn_for_size_p ())
16157 emit_move_insn (high[0], low[0]);
16159 emit_move_insn (high[0], constm1_rtx);
16163 if (!rtx_equal_p (operands[0], operands[1]))
16164 emit_move_insn (operands[0], operands[1]);
16166 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16167 emit_insn ((mode == DImode
16169 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16172 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16174 if (TARGET_CMOVE && scratch)
16176 ix86_expand_clear (scratch);
16177 emit_insn ((mode == DImode
16178 ? gen_x86_shift_adj_1
16179 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16183 emit_insn ((mode == DImode
16184 ? gen_x86_shift_adj_2
16185 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16189 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16191 rtx low[2], high[2];
16193 const int single_width = mode == DImode ? 32 : 64;
16195 if (CONST_INT_P (operands[2]))
16197 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16198 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16200 if (count == single_width * 2 - 1)
16202 emit_move_insn (high[0], high[1]);
16203 emit_insn ((mode == DImode
16205 : gen_ashrdi3) (high[0], high[0],
16206 GEN_INT (single_width - 1)));
16207 emit_move_insn (low[0], high[0]);
16210 else if (count >= single_width)
16212 emit_move_insn (low[0], high[1]);
16213 emit_move_insn (high[0], low[0]);
16214 emit_insn ((mode == DImode
16216 : gen_ashrdi3) (high[0], high[0],
16217 GEN_INT (single_width - 1)));
16218 if (count > single_width)
16219 emit_insn ((mode == DImode
16221 : gen_ashrdi3) (low[0], low[0],
16222 GEN_INT (count - single_width)));
16226 if (!rtx_equal_p (operands[0], operands[1]))
16227 emit_move_insn (operands[0], operands[1]);
16228 emit_insn ((mode == DImode
16230 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16231 emit_insn ((mode == DImode
16233 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16238 if (!rtx_equal_p (operands[0], operands[1]))
16239 emit_move_insn (operands[0], operands[1]);
16241 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16243 emit_insn ((mode == DImode
16245 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16246 emit_insn ((mode == DImode
16248 : gen_ashrdi3) (high[0], high[0], operands[2]));
16250 if (TARGET_CMOVE && scratch)
16252 emit_move_insn (scratch, high[0]);
16253 emit_insn ((mode == DImode
16255 : gen_ashrdi3) (scratch, scratch,
16256 GEN_INT (single_width - 1)));
16257 emit_insn ((mode == DImode
16258 ? gen_x86_shift_adj_1
16259 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16263 emit_insn ((mode == DImode
16264 ? gen_x86_shift_adj_3
16265 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16270 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16272 rtx low[2], high[2];
16274 const int single_width = mode == DImode ? 32 : 64;
16276 if (CONST_INT_P (operands[2]))
16278 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16279 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16281 if (count >= single_width)
16283 emit_move_insn (low[0], high[1]);
16284 ix86_expand_clear (high[0]);
16286 if (count > single_width)
16287 emit_insn ((mode == DImode
16289 : gen_lshrdi3) (low[0], low[0],
16290 GEN_INT (count - single_width)));
16294 if (!rtx_equal_p (operands[0], operands[1]))
16295 emit_move_insn (operands[0], operands[1]);
16296 emit_insn ((mode == DImode
16298 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16299 emit_insn ((mode == DImode
16301 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16306 if (!rtx_equal_p (operands[0], operands[1]))
16307 emit_move_insn (operands[0], operands[1]);
16309 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16311 emit_insn ((mode == DImode
16313 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16314 emit_insn ((mode == DImode
16316 : gen_lshrdi3) (high[0], high[0], operands[2]));
16318 /* Heh. By reversing the arguments, we can reuse this pattern. */
16319 if (TARGET_CMOVE && scratch)
16321 ix86_expand_clear (scratch);
16322 emit_insn ((mode == DImode
16323 ? gen_x86_shift_adj_1
16324 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16328 emit_insn ((mode == DImode
16329 ? gen_x86_shift_adj_2
16330 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16334 /* Predict just emitted jump instruction to be taken with probability PROB. */
16336 predict_jump (int prob)
16338 rtx insn = get_last_insn ();
16339 gcc_assert (JUMP_P (insn));
16341 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16346 /* Helper function for the string operations below. Dest VARIABLE whether
16347 it is aligned to VALUE bytes. If true, jump to the label. */
16349 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16351 rtx label = gen_label_rtx ();
16352 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16353 if (GET_MODE (variable) == DImode)
16354 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16356 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16357 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16360 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16362 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16366 /* Adjust COUNTER by the VALUE. */
16368 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16370 if (GET_MODE (countreg) == DImode)
16371 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16373 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16376 /* Zero extend possibly SImode EXP to Pmode register. */
16378 ix86_zero_extend_to_Pmode (rtx exp)
16381 if (GET_MODE (exp) == VOIDmode)
16382 return force_reg (Pmode, exp);
16383 if (GET_MODE (exp) == Pmode)
16384 return copy_to_mode_reg (Pmode, exp);
16385 r = gen_reg_rtx (Pmode);
16386 emit_insn (gen_zero_extendsidi2 (r, exp));
16390 /* Divide COUNTREG by SCALE. */
16392 scale_counter (rtx countreg, int scale)
16395 rtx piece_size_mask;
16399 if (CONST_INT_P (countreg))
16400 return GEN_INT (INTVAL (countreg) / scale);
16401 gcc_assert (REG_P (countreg));
16403 piece_size_mask = GEN_INT (scale - 1);
16404 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16405 GEN_INT (exact_log2 (scale)),
16406 NULL, 1, OPTAB_DIRECT);
16410 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16411 DImode for constant loop counts. */
16413 static enum machine_mode
16414 counter_mode (rtx count_exp)
16416 if (GET_MODE (count_exp) != VOIDmode)
16417 return GET_MODE (count_exp);
16418 if (GET_CODE (count_exp) != CONST_INT)
16420 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16425 /* When SRCPTR is non-NULL, output simple loop to move memory
16426 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16427 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16428 equivalent loop to set memory by VALUE (supposed to be in MODE).
16430 The size is rounded down to whole number of chunk size moved at once.
16431 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16435 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16436 rtx destptr, rtx srcptr, rtx value,
16437 rtx count, enum machine_mode mode, int unroll,
16440 rtx out_label, top_label, iter, tmp;
16441 enum machine_mode iter_mode = counter_mode (count);
16442 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16443 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16449 top_label = gen_label_rtx ();
16450 out_label = gen_label_rtx ();
16451 iter = gen_reg_rtx (iter_mode);
16453 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16454 NULL, 1, OPTAB_DIRECT);
16455 /* Those two should combine. */
16456 if (piece_size == const1_rtx)
16458 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
16460 predict_jump (REG_BR_PROB_BASE * 10 / 100);
16462 emit_move_insn (iter, const0_rtx);
16464 emit_label (top_label);
16466 tmp = convert_modes (Pmode, iter_mode, iter, true);
16467 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
16468 destmem = change_address (destmem, mode, x_addr);
16472 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
16473 srcmem = change_address (srcmem, mode, y_addr);
16475 /* When unrolling for chips that reorder memory reads and writes,
16476 we can save registers by using single temporary.
16477 Also using 4 temporaries is overkill in 32bit mode. */
16478 if (!TARGET_64BIT && 0)
16480 for (i = 0; i < unroll; i++)
16485 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16487 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16489 emit_move_insn (destmem, srcmem);
16495 gcc_assert (unroll <= 4);
16496 for (i = 0; i < unroll; i++)
16498 tmpreg[i] = gen_reg_rtx (mode);
16502 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16504 emit_move_insn (tmpreg[i], srcmem);
16506 for (i = 0; i < unroll; i++)
16511 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16513 emit_move_insn (destmem, tmpreg[i]);
16518 for (i = 0; i < unroll; i++)
16522 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16523 emit_move_insn (destmem, value);
16526 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
16527 true, OPTAB_LIB_WIDEN);
16529 emit_move_insn (iter, tmp);
16531 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
16533 if (expected_size != -1)
16535 expected_size /= GET_MODE_SIZE (mode) * unroll;
16536 if (expected_size == 0)
16538 else if (expected_size > REG_BR_PROB_BASE)
16539 predict_jump (REG_BR_PROB_BASE - 1);
16541 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
16544 predict_jump (REG_BR_PROB_BASE * 80 / 100);
16545 iter = ix86_zero_extend_to_Pmode (iter);
16546 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
16547 true, OPTAB_LIB_WIDEN);
16548 if (tmp != destptr)
16549 emit_move_insn (destptr, tmp);
16552 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
16553 true, OPTAB_LIB_WIDEN);
16555 emit_move_insn (srcptr, tmp);
16557 emit_label (out_label);
16560 /* Output "rep; mov" instruction.
16561 Arguments have same meaning as for previous function */
16563 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
16564 rtx destptr, rtx srcptr,
16566 enum machine_mode mode)
16572 /* If the size is known, it is shorter to use rep movs. */
16573 if (mode == QImode && CONST_INT_P (count)
16574 && !(INTVAL (count) & 3))
16577 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16578 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16579 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
16580 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
16581 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16582 if (mode != QImode)
16584 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16585 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16586 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16587 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16588 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16589 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16593 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16594 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16596 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16600 /* Output "rep; stos" instruction.
16601 Arguments have same meaning as for previous function */
16603 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16605 enum machine_mode mode)
16610 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16611 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16612 value = force_reg (mode, gen_lowpart (mode, value));
16613 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16614 if (mode != QImode)
16616 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16617 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16618 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16621 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16622 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
16626 emit_strmov (rtx destmem, rtx srcmem,
16627 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
16629 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
16630 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
16631 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16634 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
16636 expand_movmem_epilogue (rtx destmem, rtx srcmem,
16637 rtx destptr, rtx srcptr, rtx count, int max_size)
16640 if (CONST_INT_P (count))
16642 HOST_WIDE_INT countval = INTVAL (count);
16645 if ((countval & 0x10) && max_size > 16)
16649 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16650 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
16653 gcc_unreachable ();
16656 if ((countval & 0x08) && max_size > 8)
16659 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16662 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16663 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
16667 if ((countval & 0x04) && max_size > 4)
16669 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16672 if ((countval & 0x02) && max_size > 2)
16674 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
16677 if ((countval & 0x01) && max_size > 1)
16679 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
16686 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
16687 count, 1, OPTAB_DIRECT);
16688 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
16689 count, QImode, 1, 4);
16693 /* When there are stringops, we can cheaply increase dest and src pointers.
16694 Otherwise we save code size by maintaining offset (zero is readily
16695 available from preceding rep operation) and using x86 addressing modes.
16697 if (TARGET_SINGLE_STRINGOP)
16701 rtx label = ix86_expand_aligntest (count, 4, true);
16702 src = change_address (srcmem, SImode, srcptr);
16703 dest = change_address (destmem, SImode, destptr);
16704 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16705 emit_label (label);
16706 LABEL_NUSES (label) = 1;
16710 rtx label = ix86_expand_aligntest (count, 2, true);
16711 src = change_address (srcmem, HImode, srcptr);
16712 dest = change_address (destmem, HImode, destptr);
16713 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16714 emit_label (label);
16715 LABEL_NUSES (label) = 1;
16719 rtx label = ix86_expand_aligntest (count, 1, true);
16720 src = change_address (srcmem, QImode, srcptr);
16721 dest = change_address (destmem, QImode, destptr);
16722 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16723 emit_label (label);
16724 LABEL_NUSES (label) = 1;
16729 rtx offset = force_reg (Pmode, const0_rtx);
16734 rtx label = ix86_expand_aligntest (count, 4, true);
16735 src = change_address (srcmem, SImode, srcptr);
16736 dest = change_address (destmem, SImode, destptr);
16737 emit_move_insn (dest, src);
16738 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
16739 true, OPTAB_LIB_WIDEN);
16741 emit_move_insn (offset, tmp);
16742 emit_label (label);
16743 LABEL_NUSES (label) = 1;
16747 rtx label = ix86_expand_aligntest (count, 2, true);
16748 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16749 src = change_address (srcmem, HImode, tmp);
16750 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16751 dest = change_address (destmem, HImode, tmp);
16752 emit_move_insn (dest, src);
16753 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
16754 true, OPTAB_LIB_WIDEN);
16756 emit_move_insn (offset, tmp);
16757 emit_label (label);
16758 LABEL_NUSES (label) = 1;
16762 rtx label = ix86_expand_aligntest (count, 1, true);
16763 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16764 src = change_address (srcmem, QImode, tmp);
16765 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16766 dest = change_address (destmem, QImode, tmp);
16767 emit_move_insn (dest, src);
16768 emit_label (label);
16769 LABEL_NUSES (label) = 1;
16774 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16776 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
16777 rtx count, int max_size)
16780 expand_simple_binop (counter_mode (count), AND, count,
16781 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
16782 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
16783 gen_lowpart (QImode, value), count, QImode,
16787 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16789 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
16793 if (CONST_INT_P (count))
16795 HOST_WIDE_INT countval = INTVAL (count);
16798 if ((countval & 0x10) && max_size > 16)
16802 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16803 emit_insn (gen_strset (destptr, dest, value));
16804 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
16805 emit_insn (gen_strset (destptr, dest, value));
16808 gcc_unreachable ();
16811 if ((countval & 0x08) && max_size > 8)
16815 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16816 emit_insn (gen_strset (destptr, dest, value));
16820 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16821 emit_insn (gen_strset (destptr, dest, value));
16822 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
16823 emit_insn (gen_strset (destptr, dest, value));
16827 if ((countval & 0x04) && max_size > 4)
16829 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16830 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16833 if ((countval & 0x02) && max_size > 2)
16835 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
16836 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16839 if ((countval & 0x01) && max_size > 1)
16841 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
16842 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16849 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
16854 rtx label = ix86_expand_aligntest (count, 16, true);
16857 dest = change_address (destmem, DImode, destptr);
16858 emit_insn (gen_strset (destptr, dest, value));
16859 emit_insn (gen_strset (destptr, dest, value));
16863 dest = change_address (destmem, SImode, destptr);
16864 emit_insn (gen_strset (destptr, dest, value));
16865 emit_insn (gen_strset (destptr, dest, value));
16866 emit_insn (gen_strset (destptr, dest, value));
16867 emit_insn (gen_strset (destptr, dest, value));
16869 emit_label (label);
16870 LABEL_NUSES (label) = 1;
16874 rtx label = ix86_expand_aligntest (count, 8, true);
16877 dest = change_address (destmem, DImode, destptr);
16878 emit_insn (gen_strset (destptr, dest, value));
16882 dest = change_address (destmem, SImode, destptr);
16883 emit_insn (gen_strset (destptr, dest, value));
16884 emit_insn (gen_strset (destptr, dest, value));
16886 emit_label (label);
16887 LABEL_NUSES (label) = 1;
16891 rtx label = ix86_expand_aligntest (count, 4, true);
16892 dest = change_address (destmem, SImode, destptr);
16893 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16894 emit_label (label);
16895 LABEL_NUSES (label) = 1;
16899 rtx label = ix86_expand_aligntest (count, 2, true);
16900 dest = change_address (destmem, HImode, destptr);
16901 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16902 emit_label (label);
16903 LABEL_NUSES (label) = 1;
16907 rtx label = ix86_expand_aligntest (count, 1, true);
16908 dest = change_address (destmem, QImode, destptr);
16909 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16910 emit_label (label);
16911 LABEL_NUSES (label) = 1;
16915 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
16916 DESIRED_ALIGNMENT. */
16918 expand_movmem_prologue (rtx destmem, rtx srcmem,
16919 rtx destptr, rtx srcptr, rtx count,
16920 int align, int desired_alignment)
16922 if (align <= 1 && desired_alignment > 1)
16924 rtx label = ix86_expand_aligntest (destptr, 1, false);
16925 srcmem = change_address (srcmem, QImode, srcptr);
16926 destmem = change_address (destmem, QImode, destptr);
16927 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16928 ix86_adjust_counter (count, 1);
16929 emit_label (label);
16930 LABEL_NUSES (label) = 1;
16932 if (align <= 2 && desired_alignment > 2)
16934 rtx label = ix86_expand_aligntest (destptr, 2, false);
16935 srcmem = change_address (srcmem, HImode, srcptr);
16936 destmem = change_address (destmem, HImode, destptr);
16937 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16938 ix86_adjust_counter (count, 2);
16939 emit_label (label);
16940 LABEL_NUSES (label) = 1;
16942 if (align <= 4 && desired_alignment > 4)
16944 rtx label = ix86_expand_aligntest (destptr, 4, false);
16945 srcmem = change_address (srcmem, SImode, srcptr);
16946 destmem = change_address (destmem, SImode, destptr);
16947 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16948 ix86_adjust_counter (count, 4);
16949 emit_label (label);
16950 LABEL_NUSES (label) = 1;
16952 gcc_assert (desired_alignment <= 8);
16955 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
16956 DESIRED_ALIGNMENT. */
16958 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
16959 int align, int desired_alignment)
16961 if (align <= 1 && desired_alignment > 1)
16963 rtx label = ix86_expand_aligntest (destptr, 1, false);
16964 destmem = change_address (destmem, QImode, destptr);
16965 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
16966 ix86_adjust_counter (count, 1);
16967 emit_label (label);
16968 LABEL_NUSES (label) = 1;
16970 if (align <= 2 && desired_alignment > 2)
16972 rtx label = ix86_expand_aligntest (destptr, 2, false);
16973 destmem = change_address (destmem, HImode, destptr);
16974 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
16975 ix86_adjust_counter (count, 2);
16976 emit_label (label);
16977 LABEL_NUSES (label) = 1;
16979 if (align <= 4 && desired_alignment > 4)
16981 rtx label = ix86_expand_aligntest (destptr, 4, false);
16982 destmem = change_address (destmem, SImode, destptr);
16983 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
16984 ix86_adjust_counter (count, 4);
16985 emit_label (label);
16986 LABEL_NUSES (label) = 1;
16988 gcc_assert (desired_alignment <= 8);
16991 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
16992 static enum stringop_alg
16993 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
16994 int *dynamic_check)
16996 const struct stringop_algs * algs;
16997 bool optimize_for_speed;
16998 /* Algorithms using the rep prefix want at least edi and ecx;
16999 additionally, memset wants eax and memcpy wants esi. Don't
17000 consider such algorithms if the user has appropriated those
17001 registers for their own purposes. */
17002 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17004 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17006 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17007 || (alg != rep_prefix_1_byte \
17008 && alg != rep_prefix_4_byte \
17009 && alg != rep_prefix_8_byte))
17010 const struct processor_costs *cost;
17012 /* Even if the string operation call is cold, we still might spend a lot
17013 of time processing large blocks. */
17014 if (optimize_function_for_size_p (cfun)
17015 || (optimize_insn_for_size_p ()
17016 && expected_size != -1 && expected_size < 256))
17017 optimize_for_speed = false;
17019 optimize_for_speed = true;
17021 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17023 *dynamic_check = -1;
17025 algs = &cost->memset[TARGET_64BIT != 0];
17027 algs = &cost->memcpy[TARGET_64BIT != 0];
17028 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17029 return stringop_alg;
17030 /* rep; movq or rep; movl is the smallest variant. */
17031 else if (!optimize_for_speed)
17033 if (!count || (count & 3))
17034 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17036 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17038 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17040 else if (expected_size != -1 && expected_size < 4)
17041 return loop_1_byte;
17042 else if (expected_size != -1)
17045 enum stringop_alg alg = libcall;
17046 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17048 /* We get here if the algorithms that were not libcall-based
17049 were rep-prefix based and we are unable to use rep prefixes
17050 based on global register usage. Break out of the loop and
17051 use the heuristic below. */
17052 if (algs->size[i].max == 0)
17054 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17056 enum stringop_alg candidate = algs->size[i].alg;
17058 if (candidate != libcall && ALG_USABLE_P (candidate))
17060 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17061 last non-libcall inline algorithm. */
17062 if (TARGET_INLINE_ALL_STRINGOPS)
17064 /* When the current size is best to be copied by a libcall,
17065 but we are still forced to inline, run the heuristic below
17066 that will pick code for medium sized blocks. */
17067 if (alg != libcall)
17071 else if (ALG_USABLE_P (candidate))
17075 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17077 /* When asked to inline the call anyway, try to pick meaningful choice.
17078 We look for maximal size of block that is faster to copy by hand and
17079 take blocks of at most of that size guessing that average size will
17080 be roughly half of the block.
17082 If this turns out to be bad, we might simply specify the preferred
17083 choice in ix86_costs. */
17084 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17085 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17088 enum stringop_alg alg;
17090 bool any_alg_usable_p = true;
17092 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17094 enum stringop_alg candidate = algs->size[i].alg;
17095 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17097 if (candidate != libcall && candidate
17098 && ALG_USABLE_P (candidate))
17099 max = algs->size[i].max;
17101 /* If there aren't any usable algorithms, then recursing on
17102 smaller sizes isn't going to find anything. Just return the
17103 simple byte-at-a-time copy loop. */
17104 if (!any_alg_usable_p)
17106 /* Pick something reasonable. */
17107 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17108 *dynamic_check = 128;
17109 return loop_1_byte;
17113 alg = decide_alg (count, max / 2, memset, dynamic_check);
17114 gcc_assert (*dynamic_check == -1);
17115 gcc_assert (alg != libcall);
17116 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17117 *dynamic_check = max;
17120 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17121 #undef ALG_USABLE_P
17124 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17125 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17127 decide_alignment (int align,
17128 enum stringop_alg alg,
17131 int desired_align = 0;
17135 gcc_unreachable ();
17137 case unrolled_loop:
17138 desired_align = GET_MODE_SIZE (Pmode);
17140 case rep_prefix_8_byte:
17143 case rep_prefix_4_byte:
17144 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17145 copying whole cacheline at once. */
17146 if (TARGET_PENTIUMPRO)
17151 case rep_prefix_1_byte:
17152 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17153 copying whole cacheline at once. */
17154 if (TARGET_PENTIUMPRO)
17168 if (desired_align < align)
17169 desired_align = align;
17170 if (expected_size != -1 && expected_size < 4)
17171 desired_align = align;
17172 return desired_align;
17175 /* Return the smallest power of 2 greater than VAL. */
17177 smallest_pow2_greater_than (int val)
17185 /* Expand string move (memcpy) operation. Use i386 string operations when
17186 profitable. expand_setmem contains similar code. The code depends upon
17187 architecture, block size and alignment, but always has the same
17190 1) Prologue guard: Conditional that jumps up to epilogues for small
17191 blocks that can be handled by epilogue alone. This is faster but
17192 also needed for correctness, since prologue assume the block is larger
17193 than the desired alignment.
17195 Optional dynamic check for size and libcall for large
17196 blocks is emitted here too, with -minline-stringops-dynamically.
17198 2) Prologue: copy first few bytes in order to get destination aligned
17199 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17200 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17201 We emit either a jump tree on power of two sized blocks, or a byte loop.
17203 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17204 with specified algorithm.
17206 4) Epilogue: code copying tail of the block that is too small to be
17207 handled by main body (or up to size guarded by prologue guard). */
17210 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17211 rtx expected_align_exp, rtx expected_size_exp)
17217 rtx jump_around_label = NULL;
17218 HOST_WIDE_INT align = 1;
17219 unsigned HOST_WIDE_INT count = 0;
17220 HOST_WIDE_INT expected_size = -1;
17221 int size_needed = 0, epilogue_size_needed;
17222 int desired_align = 0;
17223 enum stringop_alg alg;
17225 bool need_zero_guard = false;
17227 if (CONST_INT_P (align_exp))
17228 align = INTVAL (align_exp);
17229 /* i386 can do misaligned access on reasonably increased cost. */
17230 if (CONST_INT_P (expected_align_exp)
17231 && INTVAL (expected_align_exp) > align)
17232 align = INTVAL (expected_align_exp);
17233 if (CONST_INT_P (count_exp))
17234 count = expected_size = INTVAL (count_exp);
17235 if (CONST_INT_P (expected_size_exp) && count == 0)
17236 expected_size = INTVAL (expected_size_exp);
17238 /* Make sure we don't need to care about overflow later on. */
17239 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17242 /* Step 0: Decide on preferred algorithm, desired alignment and
17243 size of chunks to be copied by main loop. */
17245 alg = decide_alg (count, expected_size, false, &dynamic_check);
17246 desired_align = decide_alignment (align, alg, expected_size);
17248 if (!TARGET_ALIGN_STRINGOPS)
17249 align = desired_align;
17251 if (alg == libcall)
17253 gcc_assert (alg != no_stringop);
17255 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17256 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17257 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17262 gcc_unreachable ();
17264 need_zero_guard = true;
17265 size_needed = GET_MODE_SIZE (Pmode);
17267 case unrolled_loop:
17268 need_zero_guard = true;
17269 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17271 case rep_prefix_8_byte:
17274 case rep_prefix_4_byte:
17277 case rep_prefix_1_byte:
17281 need_zero_guard = true;
17286 epilogue_size_needed = size_needed;
17288 /* Step 1: Prologue guard. */
17290 /* Alignment code needs count to be in register. */
17291 if (CONST_INT_P (count_exp) && desired_align > align)
17292 count_exp = force_reg (counter_mode (count_exp), count_exp);
17293 gcc_assert (desired_align >= 1 && align >= 1);
17295 /* Ensure that alignment prologue won't copy past end of block. */
17296 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17298 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17299 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17300 Make sure it is power of 2. */
17301 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17303 if (CONST_INT_P (count_exp))
17305 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
17310 label = gen_label_rtx ();
17311 emit_cmp_and_jump_insns (count_exp,
17312 GEN_INT (epilogue_size_needed),
17313 LTU, 0, counter_mode (count_exp), 1, label);
17314 if (expected_size == -1 || expected_size < epilogue_size_needed)
17315 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17317 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17321 /* Emit code to decide on runtime whether library call or inline should be
17323 if (dynamic_check != -1)
17325 if (CONST_INT_P (count_exp))
17327 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
17329 emit_block_move_via_libcall (dst, src, count_exp, false);
17330 count_exp = const0_rtx;
17336 rtx hot_label = gen_label_rtx ();
17337 jump_around_label = gen_label_rtx ();
17338 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17339 LEU, 0, GET_MODE (count_exp), 1, hot_label);
17340 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17341 emit_block_move_via_libcall (dst, src, count_exp, false);
17342 emit_jump (jump_around_label);
17343 emit_label (hot_label);
17347 /* Step 2: Alignment prologue. */
17349 if (desired_align > align)
17351 /* Except for the first move in epilogue, we no longer know
17352 constant offset in aliasing info. It don't seems to worth
17353 the pain to maintain it for the first move, so throw away
17355 src = change_address (src, BLKmode, srcreg);
17356 dst = change_address (dst, BLKmode, destreg);
17357 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
17359 if (need_zero_guard && !count)
17361 /* It is possible that we copied enough so the main loop will not
17363 emit_cmp_and_jump_insns (count_exp,
17364 GEN_INT (size_needed),
17365 LTU, 0, counter_mode (count_exp), 1, label);
17366 if (expected_size == -1
17367 || expected_size < (desired_align - align) / 2 + size_needed)
17368 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17370 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17373 if (label && size_needed == 1)
17375 emit_label (label);
17376 LABEL_NUSES (label) = 1;
17380 /* Step 3: Main loop. */
17386 gcc_unreachable ();
17388 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17389 count_exp, QImode, 1, expected_size);
17392 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17393 count_exp, Pmode, 1, expected_size);
17395 case unrolled_loop:
17396 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
17397 registers for 4 temporaries anyway. */
17398 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17399 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
17402 case rep_prefix_8_byte:
17403 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17406 case rep_prefix_4_byte:
17407 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17410 case rep_prefix_1_byte:
17411 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17415 /* Adjust properly the offset of src and dest memory for aliasing. */
17416 if (CONST_INT_P (count_exp))
17418 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
17419 (count / size_needed) * size_needed);
17420 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17421 (count / size_needed) * size_needed);
17425 src = change_address (src, BLKmode, srcreg);
17426 dst = change_address (dst, BLKmode, destreg);
17429 /* Step 4: Epilogue to copy the remaining bytes. */
17433 /* When the main loop is done, COUNT_EXP might hold original count,
17434 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17435 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17436 bytes. Compensate if needed. */
17438 if (size_needed < epilogue_size_needed)
17441 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17442 GEN_INT (size_needed - 1), count_exp, 1,
17444 if (tmp != count_exp)
17445 emit_move_insn (count_exp, tmp);
17447 emit_label (label);
17448 LABEL_NUSES (label) = 1;
17451 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17452 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
17453 epilogue_size_needed);
17454 if (jump_around_label)
17455 emit_label (jump_around_label);
17459 /* Helper function for memcpy. For QImode value 0xXY produce
17460 0xXYXYXYXY of wide specified by MODE. This is essentially
17461 a * 0x10101010, but we can do slightly better than
17462 synth_mult by unwinding the sequence by hand on CPUs with
17465 promote_duplicated_reg (enum machine_mode mode, rtx val)
17467 enum machine_mode valmode = GET_MODE (val);
17469 int nops = mode == DImode ? 3 : 2;
17471 gcc_assert (mode == SImode || mode == DImode);
17472 if (val == const0_rtx)
17473 return copy_to_mode_reg (mode, const0_rtx);
17474 if (CONST_INT_P (val))
17476 HOST_WIDE_INT v = INTVAL (val) & 255;
17480 if (mode == DImode)
17481 v |= (v << 16) << 16;
17482 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
17485 if (valmode == VOIDmode)
17487 if (valmode != QImode)
17488 val = gen_lowpart (QImode, val);
17489 if (mode == QImode)
17491 if (!TARGET_PARTIAL_REG_STALL)
17493 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
17494 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
17495 <= (ix86_cost->shift_const + ix86_cost->add) * nops
17496 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
17498 rtx reg = convert_modes (mode, QImode, val, true);
17499 tmp = promote_duplicated_reg (mode, const1_rtx);
17500 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
17505 rtx reg = convert_modes (mode, QImode, val, true);
17507 if (!TARGET_PARTIAL_REG_STALL)
17508 if (mode == SImode)
17509 emit_insn (gen_movsi_insv_1 (reg, reg));
17511 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
17514 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
17515 NULL, 1, OPTAB_DIRECT);
17517 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17519 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
17520 NULL, 1, OPTAB_DIRECT);
17521 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17522 if (mode == SImode)
17524 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
17525 NULL, 1, OPTAB_DIRECT);
17526 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17531 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
17532 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
17533 alignment from ALIGN to DESIRED_ALIGN. */
17535 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
17540 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
17541 promoted_val = promote_duplicated_reg (DImode, val);
17542 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
17543 promoted_val = promote_duplicated_reg (SImode, val);
17544 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
17545 promoted_val = promote_duplicated_reg (HImode, val);
17547 promoted_val = val;
17549 return promoted_val;
17552 /* Expand string clear operation (bzero). Use i386 string operations when
17553 profitable. See expand_movmem comment for explanation of individual
17554 steps performed. */
17556 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
17557 rtx expected_align_exp, rtx expected_size_exp)
17562 rtx jump_around_label = NULL;
17563 HOST_WIDE_INT align = 1;
17564 unsigned HOST_WIDE_INT count = 0;
17565 HOST_WIDE_INT expected_size = -1;
17566 int size_needed = 0, epilogue_size_needed;
17567 int desired_align = 0;
17568 enum stringop_alg alg;
17569 rtx promoted_val = NULL;
17570 bool force_loopy_epilogue = false;
17572 bool need_zero_guard = false;
17574 if (CONST_INT_P (align_exp))
17575 align = INTVAL (align_exp);
17576 /* i386 can do misaligned access on reasonably increased cost. */
17577 if (CONST_INT_P (expected_align_exp)
17578 && INTVAL (expected_align_exp) > align)
17579 align = INTVAL (expected_align_exp);
17580 if (CONST_INT_P (count_exp))
17581 count = expected_size = INTVAL (count_exp);
17582 if (CONST_INT_P (expected_size_exp) && count == 0)
17583 expected_size = INTVAL (expected_size_exp);
17585 /* Make sure we don't need to care about overflow later on. */
17586 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17589 /* Step 0: Decide on preferred algorithm, desired alignment and
17590 size of chunks to be copied by main loop. */
17592 alg = decide_alg (count, expected_size, true, &dynamic_check);
17593 desired_align = decide_alignment (align, alg, expected_size);
17595 if (!TARGET_ALIGN_STRINGOPS)
17596 align = desired_align;
17598 if (alg == libcall)
17600 gcc_assert (alg != no_stringop);
17602 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
17603 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17608 gcc_unreachable ();
17610 need_zero_guard = true;
17611 size_needed = GET_MODE_SIZE (Pmode);
17613 case unrolled_loop:
17614 need_zero_guard = true;
17615 size_needed = GET_MODE_SIZE (Pmode) * 4;
17617 case rep_prefix_8_byte:
17620 case rep_prefix_4_byte:
17623 case rep_prefix_1_byte:
17627 need_zero_guard = true;
17631 epilogue_size_needed = size_needed;
17633 /* Step 1: Prologue guard. */
17635 /* Alignment code needs count to be in register. */
17636 if (CONST_INT_P (count_exp) && desired_align > align)
17638 enum machine_mode mode = SImode;
17639 if (TARGET_64BIT && (count & ~0xffffffff))
17641 count_exp = force_reg (mode, count_exp);
17643 /* Do the cheap promotion to allow better CSE across the
17644 main loop and epilogue (ie one load of the big constant in the
17645 front of all code. */
17646 if (CONST_INT_P (val_exp))
17647 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17648 desired_align, align);
17649 /* Ensure that alignment prologue won't copy past end of block. */
17650 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17652 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17653 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17654 Make sure it is power of 2. */
17655 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17657 /* To improve performance of small blocks, we jump around the VAL
17658 promoting mode. This mean that if the promoted VAL is not constant,
17659 we might not use it in the epilogue and have to use byte
17661 if (epilogue_size_needed > 2 && !promoted_val)
17662 force_loopy_epilogue = true;
17663 label = gen_label_rtx ();
17664 emit_cmp_and_jump_insns (count_exp,
17665 GEN_INT (epilogue_size_needed),
17666 LTU, 0, counter_mode (count_exp), 1, label);
17667 if (GET_CODE (count_exp) == CONST_INT)
17669 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
17670 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17672 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17674 if (dynamic_check != -1)
17676 rtx hot_label = gen_label_rtx ();
17677 jump_around_label = gen_label_rtx ();
17678 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17679 LEU, 0, counter_mode (count_exp), 1, hot_label);
17680 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17681 set_storage_via_libcall (dst, count_exp, val_exp, false);
17682 emit_jump (jump_around_label);
17683 emit_label (hot_label);
17686 /* Step 2: Alignment prologue. */
17688 /* Do the expensive promotion once we branched off the small blocks. */
17690 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17691 desired_align, align);
17692 gcc_assert (desired_align >= 1 && align >= 1);
17694 if (desired_align > align)
17696 /* Except for the first move in epilogue, we no longer know
17697 constant offset in aliasing info. It don't seems to worth
17698 the pain to maintain it for the first move, so throw away
17700 dst = change_address (dst, BLKmode, destreg);
17701 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
17703 if (need_zero_guard && !count)
17705 /* It is possible that we copied enough so the main loop will not
17707 emit_cmp_and_jump_insns (count_exp,
17708 GEN_INT (size_needed),
17709 LTU, 0, counter_mode (count_exp), 1, label);
17710 if (expected_size == -1
17711 || expected_size < (desired_align - align) / 2 + size_needed)
17712 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17714 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17717 if (label && size_needed == 1)
17719 emit_label (label);
17720 LABEL_NUSES (label) = 1;
17724 /* Step 3: Main loop. */
17730 gcc_unreachable ();
17732 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17733 count_exp, QImode, 1, expected_size);
17736 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17737 count_exp, Pmode, 1, expected_size);
17739 case unrolled_loop:
17740 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17741 count_exp, Pmode, 4, expected_size);
17743 case rep_prefix_8_byte:
17744 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17747 case rep_prefix_4_byte:
17748 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17751 case rep_prefix_1_byte:
17752 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17756 /* Adjust properly the offset of src and dest memory for aliasing. */
17757 if (CONST_INT_P (count_exp))
17758 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17759 (count / size_needed) * size_needed);
17761 dst = change_address (dst, BLKmode, destreg);
17763 /* Step 4: Epilogue to copy the remaining bytes. */
17767 /* When the main loop is done, COUNT_EXP might hold original count,
17768 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17769 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17770 bytes. Compensate if needed. */
17772 if (size_needed < desired_align - align)
17775 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17776 GEN_INT (size_needed - 1), count_exp, 1,
17778 size_needed = desired_align - align + 1;
17779 if (tmp != count_exp)
17780 emit_move_insn (count_exp, tmp);
17782 emit_label (label);
17783 LABEL_NUSES (label) = 1;
17785 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17787 if (force_loopy_epilogue)
17788 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
17791 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
17794 if (jump_around_label)
17795 emit_label (jump_around_label);
17799 /* Expand the appropriate insns for doing strlen if not just doing
17802 out = result, initialized with the start address
17803 align_rtx = alignment of the address.
17804 scratch = scratch register, initialized with the startaddress when
17805 not aligned, otherwise undefined
17807 This is just the body. It needs the initializations mentioned above and
17808 some address computing at the end. These things are done in i386.md. */
17811 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
17815 rtx align_2_label = NULL_RTX;
17816 rtx align_3_label = NULL_RTX;
17817 rtx align_4_label = gen_label_rtx ();
17818 rtx end_0_label = gen_label_rtx ();
17820 rtx tmpreg = gen_reg_rtx (SImode);
17821 rtx scratch = gen_reg_rtx (SImode);
17825 if (CONST_INT_P (align_rtx))
17826 align = INTVAL (align_rtx);
17828 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
17830 /* Is there a known alignment and is it less than 4? */
17833 rtx scratch1 = gen_reg_rtx (Pmode);
17834 emit_move_insn (scratch1, out);
17835 /* Is there a known alignment and is it not 2? */
17838 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
17839 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
17841 /* Leave just the 3 lower bits. */
17842 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
17843 NULL_RTX, 0, OPTAB_WIDEN);
17845 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17846 Pmode, 1, align_4_label);
17847 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
17848 Pmode, 1, align_2_label);
17849 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
17850 Pmode, 1, align_3_label);
17854 /* Since the alignment is 2, we have to check 2 or 0 bytes;
17855 check if is aligned to 4 - byte. */
17857 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
17858 NULL_RTX, 0, OPTAB_WIDEN);
17860 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17861 Pmode, 1, align_4_label);
17864 mem = change_address (src, QImode, out);
17866 /* Now compare the bytes. */
17868 /* Compare the first n unaligned byte on a byte per byte basis. */
17869 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
17870 QImode, 1, end_0_label);
17872 /* Increment the address. */
17873 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17875 /* Not needed with an alignment of 2 */
17878 emit_label (align_2_label);
17880 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17883 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17885 emit_label (align_3_label);
17888 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17891 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17894 /* Generate loop to check 4 bytes at a time. It is not a good idea to
17895 align this loop. It gives only huge programs, but does not help to
17897 emit_label (align_4_label);
17899 mem = change_address (src, SImode, out);
17900 emit_move_insn (scratch, mem);
17901 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
17903 /* This formula yields a nonzero result iff one of the bytes is zero.
17904 This saves three branches inside loop and many cycles. */
17906 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
17907 emit_insn (gen_one_cmplsi2 (scratch, scratch));
17908 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
17909 emit_insn (gen_andsi3 (tmpreg, tmpreg,
17910 gen_int_mode (0x80808080, SImode)));
17911 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
17916 rtx reg = gen_reg_rtx (SImode);
17917 rtx reg2 = gen_reg_rtx (Pmode);
17918 emit_move_insn (reg, tmpreg);
17919 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
17921 /* If zero is not in the first two bytes, move two bytes forward. */
17922 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17923 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17924 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17925 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
17926 gen_rtx_IF_THEN_ELSE (SImode, tmp,
17929 /* Emit lea manually to avoid clobbering of flags. */
17930 emit_insn (gen_rtx_SET (SImode, reg2,
17931 gen_rtx_PLUS (Pmode, out, const2_rtx)));
17933 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17934 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17935 emit_insn (gen_rtx_SET (VOIDmode, out,
17936 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
17943 rtx end_2_label = gen_label_rtx ();
17944 /* Is zero in the first two bytes? */
17946 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17947 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17948 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
17949 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17950 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
17952 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17953 JUMP_LABEL (tmp) = end_2_label;
17955 /* Not in the first two. Move two bytes forward. */
17956 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
17957 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
17959 emit_label (end_2_label);
17963 /* Avoid branch in fixing the byte. */
17964 tmpreg = gen_lowpart (QImode, tmpreg);
17965 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
17966 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
17967 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
17969 emit_label (end_0_label);
17972 /* Expand strlen. */
17975 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
17977 rtx addr, scratch1, scratch2, scratch3, scratch4;
17979 /* The generic case of strlen expander is long. Avoid it's
17980 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
17982 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17983 && !TARGET_INLINE_ALL_STRINGOPS
17984 && !optimize_insn_for_size_p ()
17985 && (!CONST_INT_P (align) || INTVAL (align) < 4))
17988 addr = force_reg (Pmode, XEXP (src, 0));
17989 scratch1 = gen_reg_rtx (Pmode);
17991 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17992 && !optimize_insn_for_size_p ())
17994 /* Well it seems that some optimizer does not combine a call like
17995 foo(strlen(bar), strlen(bar));
17996 when the move and the subtraction is done here. It does calculate
17997 the length just once when these instructions are done inside of
17998 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
17999 often used and I use one fewer register for the lifetime of
18000 output_strlen_unroll() this is better. */
18002 emit_move_insn (out, addr);
18004 ix86_expand_strlensi_unroll_1 (out, src, align);
18006 /* strlensi_unroll_1 returns the address of the zero at the end of
18007 the string, like memchr(), so compute the length by subtracting
18008 the start address. */
18009 emit_insn ((*ix86_gen_sub3) (out, out, addr));
18015 /* Can't use this if the user has appropriated eax, ecx, or edi. */
18016 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18019 scratch2 = gen_reg_rtx (Pmode);
18020 scratch3 = gen_reg_rtx (Pmode);
18021 scratch4 = force_reg (Pmode, constm1_rtx);
18023 emit_move_insn (scratch3, addr);
18024 eoschar = force_reg (QImode, eoschar);
18026 src = replace_equiv_address_nv (src, scratch3);
18028 /* If .md starts supporting :P, this can be done in .md. */
18029 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18030 scratch4), UNSPEC_SCAS);
18031 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18032 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18033 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18038 /* For given symbol (function) construct code to compute address of it's PLT
18039 entry in large x86-64 PIC model. */
18041 construct_plt_address (rtx symbol)
18043 rtx tmp = gen_reg_rtx (Pmode);
18044 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18046 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18047 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18049 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18050 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18055 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18056 rtx callarg2 ATTRIBUTE_UNUSED,
18057 rtx pop, int sibcall)
18059 rtx use = NULL, call;
18061 if (pop == const0_rtx)
18063 gcc_assert (!TARGET_64BIT || !pop);
18065 if (TARGET_MACHO && !TARGET_64BIT)
18068 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18069 fnaddr = machopic_indirect_call_target (fnaddr);
18074 /* Static functions and indirect calls don't need the pic register. */
18075 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18076 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18077 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18078 use_reg (&use, pic_offset_table_rtx);
18081 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18083 rtx al = gen_rtx_REG (QImode, AX_REG);
18084 emit_move_insn (al, callarg2);
18085 use_reg (&use, al);
18088 if (ix86_cmodel == CM_LARGE_PIC
18089 && GET_CODE (fnaddr) == MEM
18090 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18091 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18092 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18093 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
18095 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18096 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18098 if (sibcall && TARGET_64BIT
18099 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
18102 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18103 fnaddr = gen_rtx_REG (Pmode, R11_REG);
18104 emit_move_insn (fnaddr, addr);
18105 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18108 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18110 call = gen_rtx_SET (VOIDmode, retval, call);
18113 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18114 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18115 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18118 call = emit_call_insn (call);
18120 CALL_INSN_FUNCTION_USAGE (call) = use;
18124 /* Clear stack slot assignments remembered from previous functions.
18125 This is called from INIT_EXPANDERS once before RTL is emitted for each
18128 static struct machine_function *
18129 ix86_init_machine_status (void)
18131 struct machine_function *f;
18133 f = GGC_CNEW (struct machine_function);
18134 f->use_fast_prologue_epilogue_nregs = -1;
18135 f->tls_descriptor_call_expanded_p = 0;
18136 f->call_abi = DEFAULT_ABI;
18141 /* Return a MEM corresponding to a stack slot with mode MODE.
18142 Allocate a new slot if necessary.
18144 The RTL for a function can have several slots available: N is
18145 which slot to use. */
18148 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18150 struct stack_local_entry *s;
18152 gcc_assert (n < MAX_386_STACK_LOCALS);
18154 /* Virtual slot is valid only before vregs are instantiated. */
18155 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18157 for (s = ix86_stack_locals; s; s = s->next)
18158 if (s->mode == mode && s->n == n)
18159 return copy_rtx (s->rtl);
18161 s = (struct stack_local_entry *)
18162 ggc_alloc (sizeof (struct stack_local_entry));
18165 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18167 s->next = ix86_stack_locals;
18168 ix86_stack_locals = s;
18172 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18174 static GTY(()) rtx ix86_tls_symbol;
18176 ix86_tls_get_addr (void)
18179 if (!ix86_tls_symbol)
18181 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18182 (TARGET_ANY_GNU_TLS
18184 ? "___tls_get_addr"
18185 : "__tls_get_addr");
18188 return ix86_tls_symbol;
18191 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
18193 static GTY(()) rtx ix86_tls_module_base_symbol;
18195 ix86_tls_module_base (void)
18198 if (!ix86_tls_module_base_symbol)
18200 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
18201 "_TLS_MODULE_BASE_");
18202 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
18203 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
18206 return ix86_tls_module_base_symbol;
18209 /* Calculate the length of the memory address in the instruction
18210 encoding. Does not include the one-byte modrm, opcode, or prefix. */
18213 memory_address_length (rtx addr)
18215 struct ix86_address parts;
18216 rtx base, index, disp;
18220 if (GET_CODE (addr) == PRE_DEC
18221 || GET_CODE (addr) == POST_INC
18222 || GET_CODE (addr) == PRE_MODIFY
18223 || GET_CODE (addr) == POST_MODIFY)
18226 ok = ix86_decompose_address (addr, &parts);
18229 if (parts.base && GET_CODE (parts.base) == SUBREG)
18230 parts.base = SUBREG_REG (parts.base);
18231 if (parts.index && GET_CODE (parts.index) == SUBREG)
18232 parts.index = SUBREG_REG (parts.index);
18235 index = parts.index;
18240 - esp as the base always wants an index,
18241 - ebp as the base always wants a displacement. */
18243 /* Register Indirect. */
18244 if (base && !index && !disp)
18246 /* esp (for its index) and ebp (for its displacement) need
18247 the two-byte modrm form. */
18248 if (addr == stack_pointer_rtx
18249 || addr == arg_pointer_rtx
18250 || addr == frame_pointer_rtx
18251 || addr == hard_frame_pointer_rtx)
18255 /* Direct Addressing. */
18256 else if (disp && !base && !index)
18261 /* Find the length of the displacement constant. */
18264 if (base && satisfies_constraint_K (disp))
18269 /* ebp always wants a displacement. */
18270 else if (base == hard_frame_pointer_rtx)
18273 /* An index requires the two-byte modrm form.... */
18275 /* ...like esp, which always wants an index. */
18276 || base == stack_pointer_rtx
18277 || base == arg_pointer_rtx
18278 || base == frame_pointer_rtx)
18285 /* Compute default value for "length_immediate" attribute. When SHORTFORM
18286 is set, expect that insn have 8bit immediate alternative. */
18288 ix86_attr_length_immediate_default (rtx insn, int shortform)
18292 extract_insn_cached (insn);
18293 for (i = recog_data.n_operands - 1; i >= 0; --i)
18294 if (CONSTANT_P (recog_data.operand[i]))
18297 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
18301 switch (get_attr_mode (insn))
18312 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
18317 fatal_insn ("unknown insn mode", insn);
18323 /* Compute default value for "length_address" attribute. */
18325 ix86_attr_length_address_default (rtx insn)
18329 if (get_attr_type (insn) == TYPE_LEA)
18331 rtx set = PATTERN (insn);
18333 if (GET_CODE (set) == PARALLEL)
18334 set = XVECEXP (set, 0, 0);
18336 gcc_assert (GET_CODE (set) == SET);
18338 return memory_address_length (SET_SRC (set));
18341 extract_insn_cached (insn);
18342 for (i = recog_data.n_operands - 1; i >= 0; --i)
18343 if (MEM_P (recog_data.operand[i]))
18345 return memory_address_length (XEXP (recog_data.operand[i], 0));
18351 /* Compute default value for "length_vex" attribute. It includes
18352 2 or 3 byte VEX prefix and 1 opcode byte. */
18355 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
18360 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18361 byte VEX prefix. */
18362 if (!has_0f_opcode || has_vex_w)
18365 /* We can always use 2 byte VEX prefix in 32bit. */
18369 extract_insn_cached (insn);
18371 for (i = recog_data.n_operands - 1; i >= 0; --i)
18372 if (REG_P (recog_data.operand[i]))
18374 /* REX.W bit uses 3 byte VEX prefix. */
18375 if (GET_MODE (recog_data.operand[i]) == DImode)
18380 /* REX.X or REX.B bits use 3 byte VEX prefix. */
18381 if (MEM_P (recog_data.operand[i])
18382 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
18389 /* Return the maximum number of instructions a cpu can issue. */
18392 ix86_issue_rate (void)
18396 case PROCESSOR_PENTIUM:
18400 case PROCESSOR_PENTIUMPRO:
18401 case PROCESSOR_PENTIUM4:
18402 case PROCESSOR_ATHLON:
18404 case PROCESSOR_AMDFAM10:
18405 case PROCESSOR_NOCONA:
18406 case PROCESSOR_GENERIC32:
18407 case PROCESSOR_GENERIC64:
18410 case PROCESSOR_CORE2:
18418 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
18419 by DEP_INSN and nothing set by DEP_INSN. */
18422 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18426 /* Simplify the test for uninteresting insns. */
18427 if (insn_type != TYPE_SETCC
18428 && insn_type != TYPE_ICMOV
18429 && insn_type != TYPE_FCMOV
18430 && insn_type != TYPE_IBR)
18433 if ((set = single_set (dep_insn)) != 0)
18435 set = SET_DEST (set);
18438 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
18439 && XVECLEN (PATTERN (dep_insn), 0) == 2
18440 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
18441 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
18443 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18444 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18449 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
18452 /* This test is true if the dependent insn reads the flags but
18453 not any other potentially set register. */
18454 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
18457 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
18463 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
18464 address with operands set by DEP_INSN. */
18467 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18471 if (insn_type == TYPE_LEA
18474 addr = PATTERN (insn);
18476 if (GET_CODE (addr) == PARALLEL)
18477 addr = XVECEXP (addr, 0, 0);
18479 gcc_assert (GET_CODE (addr) == SET);
18481 addr = SET_SRC (addr);
18486 extract_insn_cached (insn);
18487 for (i = recog_data.n_operands - 1; i >= 0; --i)
18488 if (MEM_P (recog_data.operand[i]))
18490 addr = XEXP (recog_data.operand[i], 0);
18497 return modified_in_p (addr, dep_insn);
18501 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
18503 enum attr_type insn_type, dep_insn_type;
18504 enum attr_memory memory;
18506 int dep_insn_code_number;
18508 /* Anti and output dependencies have zero cost on all CPUs. */
18509 if (REG_NOTE_KIND (link) != 0)
18512 dep_insn_code_number = recog_memoized (dep_insn);
18514 /* If we can't recognize the insns, we can't really do anything. */
18515 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
18518 insn_type = get_attr_type (insn);
18519 dep_insn_type = get_attr_type (dep_insn);
18523 case PROCESSOR_PENTIUM:
18524 /* Address Generation Interlock adds a cycle of latency. */
18525 if (ix86_agi_dependent (insn, dep_insn, insn_type))
18528 /* ??? Compares pair with jump/setcc. */
18529 if (ix86_flags_dependent (insn, dep_insn, insn_type))
18532 /* Floating point stores require value to be ready one cycle earlier. */
18533 if (insn_type == TYPE_FMOV
18534 && get_attr_memory (insn) == MEMORY_STORE
18535 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18539 case PROCESSOR_PENTIUMPRO:
18540 memory = get_attr_memory (insn);
18542 /* INT->FP conversion is expensive. */
18543 if (get_attr_fp_int_src (dep_insn))
18546 /* There is one cycle extra latency between an FP op and a store. */
18547 if (insn_type == TYPE_FMOV
18548 && (set = single_set (dep_insn)) != NULL_RTX
18549 && (set2 = single_set (insn)) != NULL_RTX
18550 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
18551 && MEM_P (SET_DEST (set2)))
18554 /* Show ability of reorder buffer to hide latency of load by executing
18555 in parallel with previous instruction in case
18556 previous instruction is not needed to compute the address. */
18557 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18558 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18560 /* Claim moves to take one cycle, as core can issue one load
18561 at time and the next load can start cycle later. */
18562 if (dep_insn_type == TYPE_IMOV
18563 || dep_insn_type == TYPE_FMOV)
18571 memory = get_attr_memory (insn);
18573 /* The esp dependency is resolved before the instruction is really
18575 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
18576 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
18579 /* INT->FP conversion is expensive. */
18580 if (get_attr_fp_int_src (dep_insn))
18583 /* Show ability of reorder buffer to hide latency of load by executing
18584 in parallel with previous instruction in case
18585 previous instruction is not needed to compute the address. */
18586 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18587 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18589 /* Claim moves to take one cycle, as core can issue one load
18590 at time and the next load can start cycle later. */
18591 if (dep_insn_type == TYPE_IMOV
18592 || dep_insn_type == TYPE_FMOV)
18601 case PROCESSOR_ATHLON:
18603 case PROCESSOR_AMDFAM10:
18604 case PROCESSOR_GENERIC32:
18605 case PROCESSOR_GENERIC64:
18606 memory = get_attr_memory (insn);
18608 /* Show ability of reorder buffer to hide latency of load by executing
18609 in parallel with previous instruction in case
18610 previous instruction is not needed to compute the address. */
18611 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18612 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18614 enum attr_unit unit = get_attr_unit (insn);
18617 /* Because of the difference between the length of integer and
18618 floating unit pipeline preparation stages, the memory operands
18619 for floating point are cheaper.
18621 ??? For Athlon it the difference is most probably 2. */
18622 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
18625 loadcost = TARGET_ATHLON ? 2 : 0;
18627 if (cost >= loadcost)
18640 /* How many alternative schedules to try. This should be as wide as the
18641 scheduling freedom in the DFA, but no wider. Making this value too
18642 large results extra work for the scheduler. */
18645 ia32_multipass_dfa_lookahead (void)
18649 case PROCESSOR_PENTIUM:
18652 case PROCESSOR_PENTIUMPRO:
18662 /* Compute the alignment given to a constant that is being placed in memory.
18663 EXP is the constant and ALIGN is the alignment that the object would
18665 The value of this function is used instead of that alignment to align
18669 ix86_constant_alignment (tree exp, int align)
18671 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18672 || TREE_CODE (exp) == INTEGER_CST)
18674 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
18676 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
18679 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18680 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18681 return BITS_PER_WORD;
18686 /* Compute the alignment for a static variable.
18687 TYPE is the data type, and ALIGN is the alignment that
18688 the object would ordinarily have. The value of this function is used
18689 instead of that alignment to align the object. */
18692 ix86_data_alignment (tree type, int align)
18694 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
18696 if (AGGREGATE_TYPE_P (type)
18697 && TYPE_SIZE (type)
18698 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18699 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
18700 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
18701 && align < max_align)
18704 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18705 to 16byte boundary. */
18708 if (AGGREGATE_TYPE_P (type)
18709 && TYPE_SIZE (type)
18710 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18711 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
18712 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18716 if (TREE_CODE (type) == ARRAY_TYPE)
18718 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18720 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18723 else if (TREE_CODE (type) == COMPLEX_TYPE)
18726 if (TYPE_MODE (type) == DCmode && align < 64)
18728 if ((TYPE_MODE (type) == XCmode
18729 || TYPE_MODE (type) == TCmode) && align < 128)
18732 else if ((TREE_CODE (type) == RECORD_TYPE
18733 || TREE_CODE (type) == UNION_TYPE
18734 || TREE_CODE (type) == QUAL_UNION_TYPE)
18735 && TYPE_FIELDS (type))
18737 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18739 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18742 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18743 || TREE_CODE (type) == INTEGER_TYPE)
18745 if (TYPE_MODE (type) == DFmode && align < 64)
18747 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18754 /* Compute the alignment for a local variable or a stack slot. TYPE is
18755 the data type, MODE is the widest mode available and ALIGN is the
18756 alignment that the object would ordinarily have. The value of this
18757 macro is used instead of that alignment to align the object. */
18760 ix86_local_alignment (tree type, enum machine_mode mode,
18761 unsigned int align)
18763 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18764 register in MODE. We will return the largest alignment of XF
18768 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18769 align = GET_MODE_ALIGNMENT (DFmode);
18773 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18774 to 16byte boundary. */
18777 if (AGGREGATE_TYPE_P (type)
18778 && TYPE_SIZE (type)
18779 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18780 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
18781 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18784 if (TREE_CODE (type) == ARRAY_TYPE)
18786 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18788 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18791 else if (TREE_CODE (type) == COMPLEX_TYPE)
18793 if (TYPE_MODE (type) == DCmode && align < 64)
18795 if ((TYPE_MODE (type) == XCmode
18796 || TYPE_MODE (type) == TCmode) && align < 128)
18799 else if ((TREE_CODE (type) == RECORD_TYPE
18800 || TREE_CODE (type) == UNION_TYPE
18801 || TREE_CODE (type) == QUAL_UNION_TYPE)
18802 && TYPE_FIELDS (type))
18804 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18806 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18809 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18810 || TREE_CODE (type) == INTEGER_TYPE)
18813 if (TYPE_MODE (type) == DFmode && align < 64)
18815 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18821 /* Emit RTL insns to initialize the variable parts of a trampoline.
18822 FNADDR is an RTX for the address of the function's pure code.
18823 CXT is an RTX for the static chain value for the function. */
18825 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
18829 /* Compute offset from the end of the jmp to the target function. */
18830 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
18831 plus_constant (tramp, 10),
18832 NULL_RTX, 1, OPTAB_DIRECT);
18833 emit_move_insn (gen_rtx_MEM (QImode, tramp),
18834 gen_int_mode (0xb9, QImode));
18835 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
18836 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
18837 gen_int_mode (0xe9, QImode));
18838 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
18843 /* Try to load address using shorter movl instead of movabs.
18844 We may want to support movq for kernel mode, but kernel does not use
18845 trampolines at the moment. */
18846 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18848 fnaddr = copy_to_mode_reg (DImode, fnaddr);
18849 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18850 gen_int_mode (0xbb41, HImode));
18851 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
18852 gen_lowpart (SImode, fnaddr));
18857 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18858 gen_int_mode (0xbb49, HImode));
18859 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18863 /* Load static chain using movabs to r10. */
18864 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18865 gen_int_mode (0xba49, HImode));
18866 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18869 /* Jump to the r11 */
18870 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18871 gen_int_mode (0xff49, HImode));
18872 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
18873 gen_int_mode (0xe3, QImode));
18875 gcc_assert (offset <= TRAMPOLINE_SIZE);
18878 #ifdef ENABLE_EXECUTE_STACK
18879 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18880 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
18884 /* Codes for all the SSE/MMX builtins. */
18887 IX86_BUILTIN_ADDPS,
18888 IX86_BUILTIN_ADDSS,
18889 IX86_BUILTIN_DIVPS,
18890 IX86_BUILTIN_DIVSS,
18891 IX86_BUILTIN_MULPS,
18892 IX86_BUILTIN_MULSS,
18893 IX86_BUILTIN_SUBPS,
18894 IX86_BUILTIN_SUBSS,
18896 IX86_BUILTIN_CMPEQPS,
18897 IX86_BUILTIN_CMPLTPS,
18898 IX86_BUILTIN_CMPLEPS,
18899 IX86_BUILTIN_CMPGTPS,
18900 IX86_BUILTIN_CMPGEPS,
18901 IX86_BUILTIN_CMPNEQPS,
18902 IX86_BUILTIN_CMPNLTPS,
18903 IX86_BUILTIN_CMPNLEPS,
18904 IX86_BUILTIN_CMPNGTPS,
18905 IX86_BUILTIN_CMPNGEPS,
18906 IX86_BUILTIN_CMPORDPS,
18907 IX86_BUILTIN_CMPUNORDPS,
18908 IX86_BUILTIN_CMPEQSS,
18909 IX86_BUILTIN_CMPLTSS,
18910 IX86_BUILTIN_CMPLESS,
18911 IX86_BUILTIN_CMPNEQSS,
18912 IX86_BUILTIN_CMPNLTSS,
18913 IX86_BUILTIN_CMPNLESS,
18914 IX86_BUILTIN_CMPNGTSS,
18915 IX86_BUILTIN_CMPNGESS,
18916 IX86_BUILTIN_CMPORDSS,
18917 IX86_BUILTIN_CMPUNORDSS,
18919 IX86_BUILTIN_COMIEQSS,
18920 IX86_BUILTIN_COMILTSS,
18921 IX86_BUILTIN_COMILESS,
18922 IX86_BUILTIN_COMIGTSS,
18923 IX86_BUILTIN_COMIGESS,
18924 IX86_BUILTIN_COMINEQSS,
18925 IX86_BUILTIN_UCOMIEQSS,
18926 IX86_BUILTIN_UCOMILTSS,
18927 IX86_BUILTIN_UCOMILESS,
18928 IX86_BUILTIN_UCOMIGTSS,
18929 IX86_BUILTIN_UCOMIGESS,
18930 IX86_BUILTIN_UCOMINEQSS,
18932 IX86_BUILTIN_CVTPI2PS,
18933 IX86_BUILTIN_CVTPS2PI,
18934 IX86_BUILTIN_CVTSI2SS,
18935 IX86_BUILTIN_CVTSI642SS,
18936 IX86_BUILTIN_CVTSS2SI,
18937 IX86_BUILTIN_CVTSS2SI64,
18938 IX86_BUILTIN_CVTTPS2PI,
18939 IX86_BUILTIN_CVTTSS2SI,
18940 IX86_BUILTIN_CVTTSS2SI64,
18942 IX86_BUILTIN_MAXPS,
18943 IX86_BUILTIN_MAXSS,
18944 IX86_BUILTIN_MINPS,
18945 IX86_BUILTIN_MINSS,
18947 IX86_BUILTIN_LOADUPS,
18948 IX86_BUILTIN_STOREUPS,
18949 IX86_BUILTIN_MOVSS,
18951 IX86_BUILTIN_MOVHLPS,
18952 IX86_BUILTIN_MOVLHPS,
18953 IX86_BUILTIN_LOADHPS,
18954 IX86_BUILTIN_LOADLPS,
18955 IX86_BUILTIN_STOREHPS,
18956 IX86_BUILTIN_STORELPS,
18958 IX86_BUILTIN_MASKMOVQ,
18959 IX86_BUILTIN_MOVMSKPS,
18960 IX86_BUILTIN_PMOVMSKB,
18962 IX86_BUILTIN_MOVNTPS,
18963 IX86_BUILTIN_MOVNTQ,
18965 IX86_BUILTIN_LOADDQU,
18966 IX86_BUILTIN_STOREDQU,
18968 IX86_BUILTIN_PACKSSWB,
18969 IX86_BUILTIN_PACKSSDW,
18970 IX86_BUILTIN_PACKUSWB,
18972 IX86_BUILTIN_PADDB,
18973 IX86_BUILTIN_PADDW,
18974 IX86_BUILTIN_PADDD,
18975 IX86_BUILTIN_PADDQ,
18976 IX86_BUILTIN_PADDSB,
18977 IX86_BUILTIN_PADDSW,
18978 IX86_BUILTIN_PADDUSB,
18979 IX86_BUILTIN_PADDUSW,
18980 IX86_BUILTIN_PSUBB,
18981 IX86_BUILTIN_PSUBW,
18982 IX86_BUILTIN_PSUBD,
18983 IX86_BUILTIN_PSUBQ,
18984 IX86_BUILTIN_PSUBSB,
18985 IX86_BUILTIN_PSUBSW,
18986 IX86_BUILTIN_PSUBUSB,
18987 IX86_BUILTIN_PSUBUSW,
18990 IX86_BUILTIN_PANDN,
18994 IX86_BUILTIN_PAVGB,
18995 IX86_BUILTIN_PAVGW,
18997 IX86_BUILTIN_PCMPEQB,
18998 IX86_BUILTIN_PCMPEQW,
18999 IX86_BUILTIN_PCMPEQD,
19000 IX86_BUILTIN_PCMPGTB,
19001 IX86_BUILTIN_PCMPGTW,
19002 IX86_BUILTIN_PCMPGTD,
19004 IX86_BUILTIN_PMADDWD,
19006 IX86_BUILTIN_PMAXSW,
19007 IX86_BUILTIN_PMAXUB,
19008 IX86_BUILTIN_PMINSW,
19009 IX86_BUILTIN_PMINUB,
19011 IX86_BUILTIN_PMULHUW,
19012 IX86_BUILTIN_PMULHW,
19013 IX86_BUILTIN_PMULLW,
19015 IX86_BUILTIN_PSADBW,
19016 IX86_BUILTIN_PSHUFW,
19018 IX86_BUILTIN_PSLLW,
19019 IX86_BUILTIN_PSLLD,
19020 IX86_BUILTIN_PSLLQ,
19021 IX86_BUILTIN_PSRAW,
19022 IX86_BUILTIN_PSRAD,
19023 IX86_BUILTIN_PSRLW,
19024 IX86_BUILTIN_PSRLD,
19025 IX86_BUILTIN_PSRLQ,
19026 IX86_BUILTIN_PSLLWI,
19027 IX86_BUILTIN_PSLLDI,
19028 IX86_BUILTIN_PSLLQI,
19029 IX86_BUILTIN_PSRAWI,
19030 IX86_BUILTIN_PSRADI,
19031 IX86_BUILTIN_PSRLWI,
19032 IX86_BUILTIN_PSRLDI,
19033 IX86_BUILTIN_PSRLQI,
19035 IX86_BUILTIN_PUNPCKHBW,
19036 IX86_BUILTIN_PUNPCKHWD,
19037 IX86_BUILTIN_PUNPCKHDQ,
19038 IX86_BUILTIN_PUNPCKLBW,
19039 IX86_BUILTIN_PUNPCKLWD,
19040 IX86_BUILTIN_PUNPCKLDQ,
19042 IX86_BUILTIN_SHUFPS,
19044 IX86_BUILTIN_RCPPS,
19045 IX86_BUILTIN_RCPSS,
19046 IX86_BUILTIN_RSQRTPS,
19047 IX86_BUILTIN_RSQRTPS_NR,
19048 IX86_BUILTIN_RSQRTSS,
19049 IX86_BUILTIN_RSQRTF,
19050 IX86_BUILTIN_SQRTPS,
19051 IX86_BUILTIN_SQRTPS_NR,
19052 IX86_BUILTIN_SQRTSS,
19054 IX86_BUILTIN_UNPCKHPS,
19055 IX86_BUILTIN_UNPCKLPS,
19057 IX86_BUILTIN_ANDPS,
19058 IX86_BUILTIN_ANDNPS,
19060 IX86_BUILTIN_XORPS,
19063 IX86_BUILTIN_LDMXCSR,
19064 IX86_BUILTIN_STMXCSR,
19065 IX86_BUILTIN_SFENCE,
19067 /* 3DNow! Original */
19068 IX86_BUILTIN_FEMMS,
19069 IX86_BUILTIN_PAVGUSB,
19070 IX86_BUILTIN_PF2ID,
19071 IX86_BUILTIN_PFACC,
19072 IX86_BUILTIN_PFADD,
19073 IX86_BUILTIN_PFCMPEQ,
19074 IX86_BUILTIN_PFCMPGE,
19075 IX86_BUILTIN_PFCMPGT,
19076 IX86_BUILTIN_PFMAX,
19077 IX86_BUILTIN_PFMIN,
19078 IX86_BUILTIN_PFMUL,
19079 IX86_BUILTIN_PFRCP,
19080 IX86_BUILTIN_PFRCPIT1,
19081 IX86_BUILTIN_PFRCPIT2,
19082 IX86_BUILTIN_PFRSQIT1,
19083 IX86_BUILTIN_PFRSQRT,
19084 IX86_BUILTIN_PFSUB,
19085 IX86_BUILTIN_PFSUBR,
19086 IX86_BUILTIN_PI2FD,
19087 IX86_BUILTIN_PMULHRW,
19089 /* 3DNow! Athlon Extensions */
19090 IX86_BUILTIN_PF2IW,
19091 IX86_BUILTIN_PFNACC,
19092 IX86_BUILTIN_PFPNACC,
19093 IX86_BUILTIN_PI2FW,
19094 IX86_BUILTIN_PSWAPDSI,
19095 IX86_BUILTIN_PSWAPDSF,
19098 IX86_BUILTIN_ADDPD,
19099 IX86_BUILTIN_ADDSD,
19100 IX86_BUILTIN_DIVPD,
19101 IX86_BUILTIN_DIVSD,
19102 IX86_BUILTIN_MULPD,
19103 IX86_BUILTIN_MULSD,
19104 IX86_BUILTIN_SUBPD,
19105 IX86_BUILTIN_SUBSD,
19107 IX86_BUILTIN_CMPEQPD,
19108 IX86_BUILTIN_CMPLTPD,
19109 IX86_BUILTIN_CMPLEPD,
19110 IX86_BUILTIN_CMPGTPD,
19111 IX86_BUILTIN_CMPGEPD,
19112 IX86_BUILTIN_CMPNEQPD,
19113 IX86_BUILTIN_CMPNLTPD,
19114 IX86_BUILTIN_CMPNLEPD,
19115 IX86_BUILTIN_CMPNGTPD,
19116 IX86_BUILTIN_CMPNGEPD,
19117 IX86_BUILTIN_CMPORDPD,
19118 IX86_BUILTIN_CMPUNORDPD,
19119 IX86_BUILTIN_CMPEQSD,
19120 IX86_BUILTIN_CMPLTSD,
19121 IX86_BUILTIN_CMPLESD,
19122 IX86_BUILTIN_CMPNEQSD,
19123 IX86_BUILTIN_CMPNLTSD,
19124 IX86_BUILTIN_CMPNLESD,
19125 IX86_BUILTIN_CMPORDSD,
19126 IX86_BUILTIN_CMPUNORDSD,
19128 IX86_BUILTIN_COMIEQSD,
19129 IX86_BUILTIN_COMILTSD,
19130 IX86_BUILTIN_COMILESD,
19131 IX86_BUILTIN_COMIGTSD,
19132 IX86_BUILTIN_COMIGESD,
19133 IX86_BUILTIN_COMINEQSD,
19134 IX86_BUILTIN_UCOMIEQSD,
19135 IX86_BUILTIN_UCOMILTSD,
19136 IX86_BUILTIN_UCOMILESD,
19137 IX86_BUILTIN_UCOMIGTSD,
19138 IX86_BUILTIN_UCOMIGESD,
19139 IX86_BUILTIN_UCOMINEQSD,
19141 IX86_BUILTIN_MAXPD,
19142 IX86_BUILTIN_MAXSD,
19143 IX86_BUILTIN_MINPD,
19144 IX86_BUILTIN_MINSD,
19146 IX86_BUILTIN_ANDPD,
19147 IX86_BUILTIN_ANDNPD,
19149 IX86_BUILTIN_XORPD,
19151 IX86_BUILTIN_SQRTPD,
19152 IX86_BUILTIN_SQRTSD,
19154 IX86_BUILTIN_UNPCKHPD,
19155 IX86_BUILTIN_UNPCKLPD,
19157 IX86_BUILTIN_SHUFPD,
19159 IX86_BUILTIN_LOADUPD,
19160 IX86_BUILTIN_STOREUPD,
19161 IX86_BUILTIN_MOVSD,
19163 IX86_BUILTIN_LOADHPD,
19164 IX86_BUILTIN_LOADLPD,
19166 IX86_BUILTIN_CVTDQ2PD,
19167 IX86_BUILTIN_CVTDQ2PS,
19169 IX86_BUILTIN_CVTPD2DQ,
19170 IX86_BUILTIN_CVTPD2PI,
19171 IX86_BUILTIN_CVTPD2PS,
19172 IX86_BUILTIN_CVTTPD2DQ,
19173 IX86_BUILTIN_CVTTPD2PI,
19175 IX86_BUILTIN_CVTPI2PD,
19176 IX86_BUILTIN_CVTSI2SD,
19177 IX86_BUILTIN_CVTSI642SD,
19179 IX86_BUILTIN_CVTSD2SI,
19180 IX86_BUILTIN_CVTSD2SI64,
19181 IX86_BUILTIN_CVTSD2SS,
19182 IX86_BUILTIN_CVTSS2SD,
19183 IX86_BUILTIN_CVTTSD2SI,
19184 IX86_BUILTIN_CVTTSD2SI64,
19186 IX86_BUILTIN_CVTPS2DQ,
19187 IX86_BUILTIN_CVTPS2PD,
19188 IX86_BUILTIN_CVTTPS2DQ,
19190 IX86_BUILTIN_MOVNTI,
19191 IX86_BUILTIN_MOVNTPD,
19192 IX86_BUILTIN_MOVNTDQ,
19194 IX86_BUILTIN_MOVQ128,
19197 IX86_BUILTIN_MASKMOVDQU,
19198 IX86_BUILTIN_MOVMSKPD,
19199 IX86_BUILTIN_PMOVMSKB128,
19201 IX86_BUILTIN_PACKSSWB128,
19202 IX86_BUILTIN_PACKSSDW128,
19203 IX86_BUILTIN_PACKUSWB128,
19205 IX86_BUILTIN_PADDB128,
19206 IX86_BUILTIN_PADDW128,
19207 IX86_BUILTIN_PADDD128,
19208 IX86_BUILTIN_PADDQ128,
19209 IX86_BUILTIN_PADDSB128,
19210 IX86_BUILTIN_PADDSW128,
19211 IX86_BUILTIN_PADDUSB128,
19212 IX86_BUILTIN_PADDUSW128,
19213 IX86_BUILTIN_PSUBB128,
19214 IX86_BUILTIN_PSUBW128,
19215 IX86_BUILTIN_PSUBD128,
19216 IX86_BUILTIN_PSUBQ128,
19217 IX86_BUILTIN_PSUBSB128,
19218 IX86_BUILTIN_PSUBSW128,
19219 IX86_BUILTIN_PSUBUSB128,
19220 IX86_BUILTIN_PSUBUSW128,
19222 IX86_BUILTIN_PAND128,
19223 IX86_BUILTIN_PANDN128,
19224 IX86_BUILTIN_POR128,
19225 IX86_BUILTIN_PXOR128,
19227 IX86_BUILTIN_PAVGB128,
19228 IX86_BUILTIN_PAVGW128,
19230 IX86_BUILTIN_PCMPEQB128,
19231 IX86_BUILTIN_PCMPEQW128,
19232 IX86_BUILTIN_PCMPEQD128,
19233 IX86_BUILTIN_PCMPGTB128,
19234 IX86_BUILTIN_PCMPGTW128,
19235 IX86_BUILTIN_PCMPGTD128,
19237 IX86_BUILTIN_PMADDWD128,
19239 IX86_BUILTIN_PMAXSW128,
19240 IX86_BUILTIN_PMAXUB128,
19241 IX86_BUILTIN_PMINSW128,
19242 IX86_BUILTIN_PMINUB128,
19244 IX86_BUILTIN_PMULUDQ,
19245 IX86_BUILTIN_PMULUDQ128,
19246 IX86_BUILTIN_PMULHUW128,
19247 IX86_BUILTIN_PMULHW128,
19248 IX86_BUILTIN_PMULLW128,
19250 IX86_BUILTIN_PSADBW128,
19251 IX86_BUILTIN_PSHUFHW,
19252 IX86_BUILTIN_PSHUFLW,
19253 IX86_BUILTIN_PSHUFD,
19255 IX86_BUILTIN_PSLLDQI128,
19256 IX86_BUILTIN_PSLLWI128,
19257 IX86_BUILTIN_PSLLDI128,
19258 IX86_BUILTIN_PSLLQI128,
19259 IX86_BUILTIN_PSRAWI128,
19260 IX86_BUILTIN_PSRADI128,
19261 IX86_BUILTIN_PSRLDQI128,
19262 IX86_BUILTIN_PSRLWI128,
19263 IX86_BUILTIN_PSRLDI128,
19264 IX86_BUILTIN_PSRLQI128,
19266 IX86_BUILTIN_PSLLDQ128,
19267 IX86_BUILTIN_PSLLW128,
19268 IX86_BUILTIN_PSLLD128,
19269 IX86_BUILTIN_PSLLQ128,
19270 IX86_BUILTIN_PSRAW128,
19271 IX86_BUILTIN_PSRAD128,
19272 IX86_BUILTIN_PSRLW128,
19273 IX86_BUILTIN_PSRLD128,
19274 IX86_BUILTIN_PSRLQ128,
19276 IX86_BUILTIN_PUNPCKHBW128,
19277 IX86_BUILTIN_PUNPCKHWD128,
19278 IX86_BUILTIN_PUNPCKHDQ128,
19279 IX86_BUILTIN_PUNPCKHQDQ128,
19280 IX86_BUILTIN_PUNPCKLBW128,
19281 IX86_BUILTIN_PUNPCKLWD128,
19282 IX86_BUILTIN_PUNPCKLDQ128,
19283 IX86_BUILTIN_PUNPCKLQDQ128,
19285 IX86_BUILTIN_CLFLUSH,
19286 IX86_BUILTIN_MFENCE,
19287 IX86_BUILTIN_LFENCE,
19290 IX86_BUILTIN_ADDSUBPS,
19291 IX86_BUILTIN_HADDPS,
19292 IX86_BUILTIN_HSUBPS,
19293 IX86_BUILTIN_MOVSHDUP,
19294 IX86_BUILTIN_MOVSLDUP,
19295 IX86_BUILTIN_ADDSUBPD,
19296 IX86_BUILTIN_HADDPD,
19297 IX86_BUILTIN_HSUBPD,
19298 IX86_BUILTIN_LDDQU,
19300 IX86_BUILTIN_MONITOR,
19301 IX86_BUILTIN_MWAIT,
19304 IX86_BUILTIN_PHADDW,
19305 IX86_BUILTIN_PHADDD,
19306 IX86_BUILTIN_PHADDSW,
19307 IX86_BUILTIN_PHSUBW,
19308 IX86_BUILTIN_PHSUBD,
19309 IX86_BUILTIN_PHSUBSW,
19310 IX86_BUILTIN_PMADDUBSW,
19311 IX86_BUILTIN_PMULHRSW,
19312 IX86_BUILTIN_PSHUFB,
19313 IX86_BUILTIN_PSIGNB,
19314 IX86_BUILTIN_PSIGNW,
19315 IX86_BUILTIN_PSIGND,
19316 IX86_BUILTIN_PALIGNR,
19317 IX86_BUILTIN_PABSB,
19318 IX86_BUILTIN_PABSW,
19319 IX86_BUILTIN_PABSD,
19321 IX86_BUILTIN_PHADDW128,
19322 IX86_BUILTIN_PHADDD128,
19323 IX86_BUILTIN_PHADDSW128,
19324 IX86_BUILTIN_PHSUBW128,
19325 IX86_BUILTIN_PHSUBD128,
19326 IX86_BUILTIN_PHSUBSW128,
19327 IX86_BUILTIN_PMADDUBSW128,
19328 IX86_BUILTIN_PMULHRSW128,
19329 IX86_BUILTIN_PSHUFB128,
19330 IX86_BUILTIN_PSIGNB128,
19331 IX86_BUILTIN_PSIGNW128,
19332 IX86_BUILTIN_PSIGND128,
19333 IX86_BUILTIN_PALIGNR128,
19334 IX86_BUILTIN_PABSB128,
19335 IX86_BUILTIN_PABSW128,
19336 IX86_BUILTIN_PABSD128,
19338 /* AMDFAM10 - SSE4A New Instructions. */
19339 IX86_BUILTIN_MOVNTSD,
19340 IX86_BUILTIN_MOVNTSS,
19341 IX86_BUILTIN_EXTRQI,
19342 IX86_BUILTIN_EXTRQ,
19343 IX86_BUILTIN_INSERTQI,
19344 IX86_BUILTIN_INSERTQ,
19347 IX86_BUILTIN_BLENDPD,
19348 IX86_BUILTIN_BLENDPS,
19349 IX86_BUILTIN_BLENDVPD,
19350 IX86_BUILTIN_BLENDVPS,
19351 IX86_BUILTIN_PBLENDVB128,
19352 IX86_BUILTIN_PBLENDW128,
19357 IX86_BUILTIN_INSERTPS128,
19359 IX86_BUILTIN_MOVNTDQA,
19360 IX86_BUILTIN_MPSADBW128,
19361 IX86_BUILTIN_PACKUSDW128,
19362 IX86_BUILTIN_PCMPEQQ,
19363 IX86_BUILTIN_PHMINPOSUW128,
19365 IX86_BUILTIN_PMAXSB128,
19366 IX86_BUILTIN_PMAXSD128,
19367 IX86_BUILTIN_PMAXUD128,
19368 IX86_BUILTIN_PMAXUW128,
19370 IX86_BUILTIN_PMINSB128,
19371 IX86_BUILTIN_PMINSD128,
19372 IX86_BUILTIN_PMINUD128,
19373 IX86_BUILTIN_PMINUW128,
19375 IX86_BUILTIN_PMOVSXBW128,
19376 IX86_BUILTIN_PMOVSXBD128,
19377 IX86_BUILTIN_PMOVSXBQ128,
19378 IX86_BUILTIN_PMOVSXWD128,
19379 IX86_BUILTIN_PMOVSXWQ128,
19380 IX86_BUILTIN_PMOVSXDQ128,
19382 IX86_BUILTIN_PMOVZXBW128,
19383 IX86_BUILTIN_PMOVZXBD128,
19384 IX86_BUILTIN_PMOVZXBQ128,
19385 IX86_BUILTIN_PMOVZXWD128,
19386 IX86_BUILTIN_PMOVZXWQ128,
19387 IX86_BUILTIN_PMOVZXDQ128,
19389 IX86_BUILTIN_PMULDQ128,
19390 IX86_BUILTIN_PMULLD128,
19392 IX86_BUILTIN_ROUNDPD,
19393 IX86_BUILTIN_ROUNDPS,
19394 IX86_BUILTIN_ROUNDSD,
19395 IX86_BUILTIN_ROUNDSS,
19397 IX86_BUILTIN_PTESTZ,
19398 IX86_BUILTIN_PTESTC,
19399 IX86_BUILTIN_PTESTNZC,
19401 IX86_BUILTIN_VEC_INIT_V2SI,
19402 IX86_BUILTIN_VEC_INIT_V4HI,
19403 IX86_BUILTIN_VEC_INIT_V8QI,
19404 IX86_BUILTIN_VEC_EXT_V2DF,
19405 IX86_BUILTIN_VEC_EXT_V2DI,
19406 IX86_BUILTIN_VEC_EXT_V4SF,
19407 IX86_BUILTIN_VEC_EXT_V4SI,
19408 IX86_BUILTIN_VEC_EXT_V8HI,
19409 IX86_BUILTIN_VEC_EXT_V2SI,
19410 IX86_BUILTIN_VEC_EXT_V4HI,
19411 IX86_BUILTIN_VEC_EXT_V16QI,
19412 IX86_BUILTIN_VEC_SET_V2DI,
19413 IX86_BUILTIN_VEC_SET_V4SF,
19414 IX86_BUILTIN_VEC_SET_V4SI,
19415 IX86_BUILTIN_VEC_SET_V8HI,
19416 IX86_BUILTIN_VEC_SET_V4HI,
19417 IX86_BUILTIN_VEC_SET_V16QI,
19419 IX86_BUILTIN_VEC_PACK_SFIX,
19422 IX86_BUILTIN_CRC32QI,
19423 IX86_BUILTIN_CRC32HI,
19424 IX86_BUILTIN_CRC32SI,
19425 IX86_BUILTIN_CRC32DI,
19427 IX86_BUILTIN_PCMPESTRI128,
19428 IX86_BUILTIN_PCMPESTRM128,
19429 IX86_BUILTIN_PCMPESTRA128,
19430 IX86_BUILTIN_PCMPESTRC128,
19431 IX86_BUILTIN_PCMPESTRO128,
19432 IX86_BUILTIN_PCMPESTRS128,
19433 IX86_BUILTIN_PCMPESTRZ128,
19434 IX86_BUILTIN_PCMPISTRI128,
19435 IX86_BUILTIN_PCMPISTRM128,
19436 IX86_BUILTIN_PCMPISTRA128,
19437 IX86_BUILTIN_PCMPISTRC128,
19438 IX86_BUILTIN_PCMPISTRO128,
19439 IX86_BUILTIN_PCMPISTRS128,
19440 IX86_BUILTIN_PCMPISTRZ128,
19442 IX86_BUILTIN_PCMPGTQ,
19444 /* AES instructions */
19445 IX86_BUILTIN_AESENC128,
19446 IX86_BUILTIN_AESENCLAST128,
19447 IX86_BUILTIN_AESDEC128,
19448 IX86_BUILTIN_AESDECLAST128,
19449 IX86_BUILTIN_AESIMC128,
19450 IX86_BUILTIN_AESKEYGENASSIST128,
19452 /* PCLMUL instruction */
19453 IX86_BUILTIN_PCLMULQDQ128,
19456 IX86_BUILTIN_ADDPD256,
19457 IX86_BUILTIN_ADDPS256,
19458 IX86_BUILTIN_ADDSUBPD256,
19459 IX86_BUILTIN_ADDSUBPS256,
19460 IX86_BUILTIN_ANDPD256,
19461 IX86_BUILTIN_ANDPS256,
19462 IX86_BUILTIN_ANDNPD256,
19463 IX86_BUILTIN_ANDNPS256,
19464 IX86_BUILTIN_BLENDPD256,
19465 IX86_BUILTIN_BLENDPS256,
19466 IX86_BUILTIN_BLENDVPD256,
19467 IX86_BUILTIN_BLENDVPS256,
19468 IX86_BUILTIN_DIVPD256,
19469 IX86_BUILTIN_DIVPS256,
19470 IX86_BUILTIN_DPPS256,
19471 IX86_BUILTIN_HADDPD256,
19472 IX86_BUILTIN_HADDPS256,
19473 IX86_BUILTIN_HSUBPD256,
19474 IX86_BUILTIN_HSUBPS256,
19475 IX86_BUILTIN_MAXPD256,
19476 IX86_BUILTIN_MAXPS256,
19477 IX86_BUILTIN_MINPD256,
19478 IX86_BUILTIN_MINPS256,
19479 IX86_BUILTIN_MULPD256,
19480 IX86_BUILTIN_MULPS256,
19481 IX86_BUILTIN_ORPD256,
19482 IX86_BUILTIN_ORPS256,
19483 IX86_BUILTIN_SHUFPD256,
19484 IX86_BUILTIN_SHUFPS256,
19485 IX86_BUILTIN_SUBPD256,
19486 IX86_BUILTIN_SUBPS256,
19487 IX86_BUILTIN_XORPD256,
19488 IX86_BUILTIN_XORPS256,
19489 IX86_BUILTIN_CMPSD,
19490 IX86_BUILTIN_CMPSS,
19491 IX86_BUILTIN_CMPPD,
19492 IX86_BUILTIN_CMPPS,
19493 IX86_BUILTIN_CMPPD256,
19494 IX86_BUILTIN_CMPPS256,
19495 IX86_BUILTIN_CVTDQ2PD256,
19496 IX86_BUILTIN_CVTDQ2PS256,
19497 IX86_BUILTIN_CVTPD2PS256,
19498 IX86_BUILTIN_CVTPS2DQ256,
19499 IX86_BUILTIN_CVTPS2PD256,
19500 IX86_BUILTIN_CVTTPD2DQ256,
19501 IX86_BUILTIN_CVTPD2DQ256,
19502 IX86_BUILTIN_CVTTPS2DQ256,
19503 IX86_BUILTIN_EXTRACTF128PD256,
19504 IX86_BUILTIN_EXTRACTF128PS256,
19505 IX86_BUILTIN_EXTRACTF128SI256,
19506 IX86_BUILTIN_VZEROALL,
19507 IX86_BUILTIN_VZEROUPPER,
19508 IX86_BUILTIN_VZEROUPPER_REX64,
19509 IX86_BUILTIN_VPERMILVARPD,
19510 IX86_BUILTIN_VPERMILVARPS,
19511 IX86_BUILTIN_VPERMILVARPD256,
19512 IX86_BUILTIN_VPERMILVARPS256,
19513 IX86_BUILTIN_VPERMILPD,
19514 IX86_BUILTIN_VPERMILPS,
19515 IX86_BUILTIN_VPERMILPD256,
19516 IX86_BUILTIN_VPERMILPS256,
19517 IX86_BUILTIN_VPERMIL2PD,
19518 IX86_BUILTIN_VPERMIL2PS,
19519 IX86_BUILTIN_VPERMIL2PD256,
19520 IX86_BUILTIN_VPERMIL2PS256,
19521 IX86_BUILTIN_VPERM2F128PD256,
19522 IX86_BUILTIN_VPERM2F128PS256,
19523 IX86_BUILTIN_VPERM2F128SI256,
19524 IX86_BUILTIN_VBROADCASTSS,
19525 IX86_BUILTIN_VBROADCASTSD256,
19526 IX86_BUILTIN_VBROADCASTSS256,
19527 IX86_BUILTIN_VBROADCASTPD256,
19528 IX86_BUILTIN_VBROADCASTPS256,
19529 IX86_BUILTIN_VINSERTF128PD256,
19530 IX86_BUILTIN_VINSERTF128PS256,
19531 IX86_BUILTIN_VINSERTF128SI256,
19532 IX86_BUILTIN_LOADUPD256,
19533 IX86_BUILTIN_LOADUPS256,
19534 IX86_BUILTIN_STOREUPD256,
19535 IX86_BUILTIN_STOREUPS256,
19536 IX86_BUILTIN_LDDQU256,
19537 IX86_BUILTIN_LOADDQU256,
19538 IX86_BUILTIN_STOREDQU256,
19539 IX86_BUILTIN_MASKLOADPD,
19540 IX86_BUILTIN_MASKLOADPS,
19541 IX86_BUILTIN_MASKSTOREPD,
19542 IX86_BUILTIN_MASKSTOREPS,
19543 IX86_BUILTIN_MASKLOADPD256,
19544 IX86_BUILTIN_MASKLOADPS256,
19545 IX86_BUILTIN_MASKSTOREPD256,
19546 IX86_BUILTIN_MASKSTOREPS256,
19547 IX86_BUILTIN_MOVSHDUP256,
19548 IX86_BUILTIN_MOVSLDUP256,
19549 IX86_BUILTIN_MOVDDUP256,
19551 IX86_BUILTIN_SQRTPD256,
19552 IX86_BUILTIN_SQRTPS256,
19553 IX86_BUILTIN_SQRTPS_NR256,
19554 IX86_BUILTIN_RSQRTPS256,
19555 IX86_BUILTIN_RSQRTPS_NR256,
19557 IX86_BUILTIN_RCPPS256,
19559 IX86_BUILTIN_ROUNDPD256,
19560 IX86_BUILTIN_ROUNDPS256,
19562 IX86_BUILTIN_UNPCKHPD256,
19563 IX86_BUILTIN_UNPCKLPD256,
19564 IX86_BUILTIN_UNPCKHPS256,
19565 IX86_BUILTIN_UNPCKLPS256,
19567 IX86_BUILTIN_SI256_SI,
19568 IX86_BUILTIN_PS256_PS,
19569 IX86_BUILTIN_PD256_PD,
19570 IX86_BUILTIN_SI_SI256,
19571 IX86_BUILTIN_PS_PS256,
19572 IX86_BUILTIN_PD_PD256,
19574 IX86_BUILTIN_VTESTZPD,
19575 IX86_BUILTIN_VTESTCPD,
19576 IX86_BUILTIN_VTESTNZCPD,
19577 IX86_BUILTIN_VTESTZPS,
19578 IX86_BUILTIN_VTESTCPS,
19579 IX86_BUILTIN_VTESTNZCPS,
19580 IX86_BUILTIN_VTESTZPD256,
19581 IX86_BUILTIN_VTESTCPD256,
19582 IX86_BUILTIN_VTESTNZCPD256,
19583 IX86_BUILTIN_VTESTZPS256,
19584 IX86_BUILTIN_VTESTCPS256,
19585 IX86_BUILTIN_VTESTNZCPS256,
19586 IX86_BUILTIN_PTESTZ256,
19587 IX86_BUILTIN_PTESTC256,
19588 IX86_BUILTIN_PTESTNZC256,
19590 IX86_BUILTIN_MOVMSKPD256,
19591 IX86_BUILTIN_MOVMSKPS256,
19593 /* TFmode support builtins. */
19595 IX86_BUILTIN_FABSQ,
19596 IX86_BUILTIN_COPYSIGNQ,
19598 /* SSE5 instructions */
19599 IX86_BUILTIN_FMADDSS,
19600 IX86_BUILTIN_FMADDSD,
19601 IX86_BUILTIN_FMADDPS,
19602 IX86_BUILTIN_FMADDPD,
19603 IX86_BUILTIN_FMSUBSS,
19604 IX86_BUILTIN_FMSUBSD,
19605 IX86_BUILTIN_FMSUBPS,
19606 IX86_BUILTIN_FMSUBPD,
19607 IX86_BUILTIN_FNMADDSS,
19608 IX86_BUILTIN_FNMADDSD,
19609 IX86_BUILTIN_FNMADDPS,
19610 IX86_BUILTIN_FNMADDPD,
19611 IX86_BUILTIN_FNMSUBSS,
19612 IX86_BUILTIN_FNMSUBSD,
19613 IX86_BUILTIN_FNMSUBPS,
19614 IX86_BUILTIN_FNMSUBPD,
19615 IX86_BUILTIN_PCMOV,
19616 IX86_BUILTIN_PCMOV_V2DI,
19617 IX86_BUILTIN_PCMOV_V4SI,
19618 IX86_BUILTIN_PCMOV_V8HI,
19619 IX86_BUILTIN_PCMOV_V16QI,
19620 IX86_BUILTIN_PCMOV_V4SF,
19621 IX86_BUILTIN_PCMOV_V2DF,
19622 IX86_BUILTIN_PPERM,
19623 IX86_BUILTIN_PERMPS,
19624 IX86_BUILTIN_PERMPD,
19625 IX86_BUILTIN_PMACSSWW,
19626 IX86_BUILTIN_PMACSWW,
19627 IX86_BUILTIN_PMACSSWD,
19628 IX86_BUILTIN_PMACSWD,
19629 IX86_BUILTIN_PMACSSDD,
19630 IX86_BUILTIN_PMACSDD,
19631 IX86_BUILTIN_PMACSSDQL,
19632 IX86_BUILTIN_PMACSSDQH,
19633 IX86_BUILTIN_PMACSDQL,
19634 IX86_BUILTIN_PMACSDQH,
19635 IX86_BUILTIN_PMADCSSWD,
19636 IX86_BUILTIN_PMADCSWD,
19637 IX86_BUILTIN_PHADDBW,
19638 IX86_BUILTIN_PHADDBD,
19639 IX86_BUILTIN_PHADDBQ,
19640 IX86_BUILTIN_PHADDWD,
19641 IX86_BUILTIN_PHADDWQ,
19642 IX86_BUILTIN_PHADDDQ,
19643 IX86_BUILTIN_PHADDUBW,
19644 IX86_BUILTIN_PHADDUBD,
19645 IX86_BUILTIN_PHADDUBQ,
19646 IX86_BUILTIN_PHADDUWD,
19647 IX86_BUILTIN_PHADDUWQ,
19648 IX86_BUILTIN_PHADDUDQ,
19649 IX86_BUILTIN_PHSUBBW,
19650 IX86_BUILTIN_PHSUBWD,
19651 IX86_BUILTIN_PHSUBDQ,
19652 IX86_BUILTIN_PROTB,
19653 IX86_BUILTIN_PROTW,
19654 IX86_BUILTIN_PROTD,
19655 IX86_BUILTIN_PROTQ,
19656 IX86_BUILTIN_PROTB_IMM,
19657 IX86_BUILTIN_PROTW_IMM,
19658 IX86_BUILTIN_PROTD_IMM,
19659 IX86_BUILTIN_PROTQ_IMM,
19660 IX86_BUILTIN_PSHLB,
19661 IX86_BUILTIN_PSHLW,
19662 IX86_BUILTIN_PSHLD,
19663 IX86_BUILTIN_PSHLQ,
19664 IX86_BUILTIN_PSHAB,
19665 IX86_BUILTIN_PSHAW,
19666 IX86_BUILTIN_PSHAD,
19667 IX86_BUILTIN_PSHAQ,
19668 IX86_BUILTIN_FRCZSS,
19669 IX86_BUILTIN_FRCZSD,
19670 IX86_BUILTIN_FRCZPS,
19671 IX86_BUILTIN_FRCZPD,
19672 IX86_BUILTIN_CVTPH2PS,
19673 IX86_BUILTIN_CVTPS2PH,
19675 IX86_BUILTIN_COMEQSS,
19676 IX86_BUILTIN_COMNESS,
19677 IX86_BUILTIN_COMLTSS,
19678 IX86_BUILTIN_COMLESS,
19679 IX86_BUILTIN_COMGTSS,
19680 IX86_BUILTIN_COMGESS,
19681 IX86_BUILTIN_COMUEQSS,
19682 IX86_BUILTIN_COMUNESS,
19683 IX86_BUILTIN_COMULTSS,
19684 IX86_BUILTIN_COMULESS,
19685 IX86_BUILTIN_COMUGTSS,
19686 IX86_BUILTIN_COMUGESS,
19687 IX86_BUILTIN_COMORDSS,
19688 IX86_BUILTIN_COMUNORDSS,
19689 IX86_BUILTIN_COMFALSESS,
19690 IX86_BUILTIN_COMTRUESS,
19692 IX86_BUILTIN_COMEQSD,
19693 IX86_BUILTIN_COMNESD,
19694 IX86_BUILTIN_COMLTSD,
19695 IX86_BUILTIN_COMLESD,
19696 IX86_BUILTIN_COMGTSD,
19697 IX86_BUILTIN_COMGESD,
19698 IX86_BUILTIN_COMUEQSD,
19699 IX86_BUILTIN_COMUNESD,
19700 IX86_BUILTIN_COMULTSD,
19701 IX86_BUILTIN_COMULESD,
19702 IX86_BUILTIN_COMUGTSD,
19703 IX86_BUILTIN_COMUGESD,
19704 IX86_BUILTIN_COMORDSD,
19705 IX86_BUILTIN_COMUNORDSD,
19706 IX86_BUILTIN_COMFALSESD,
19707 IX86_BUILTIN_COMTRUESD,
19709 IX86_BUILTIN_COMEQPS,
19710 IX86_BUILTIN_COMNEPS,
19711 IX86_BUILTIN_COMLTPS,
19712 IX86_BUILTIN_COMLEPS,
19713 IX86_BUILTIN_COMGTPS,
19714 IX86_BUILTIN_COMGEPS,
19715 IX86_BUILTIN_COMUEQPS,
19716 IX86_BUILTIN_COMUNEPS,
19717 IX86_BUILTIN_COMULTPS,
19718 IX86_BUILTIN_COMULEPS,
19719 IX86_BUILTIN_COMUGTPS,
19720 IX86_BUILTIN_COMUGEPS,
19721 IX86_BUILTIN_COMORDPS,
19722 IX86_BUILTIN_COMUNORDPS,
19723 IX86_BUILTIN_COMFALSEPS,
19724 IX86_BUILTIN_COMTRUEPS,
19726 IX86_BUILTIN_COMEQPD,
19727 IX86_BUILTIN_COMNEPD,
19728 IX86_BUILTIN_COMLTPD,
19729 IX86_BUILTIN_COMLEPD,
19730 IX86_BUILTIN_COMGTPD,
19731 IX86_BUILTIN_COMGEPD,
19732 IX86_BUILTIN_COMUEQPD,
19733 IX86_BUILTIN_COMUNEPD,
19734 IX86_BUILTIN_COMULTPD,
19735 IX86_BUILTIN_COMULEPD,
19736 IX86_BUILTIN_COMUGTPD,
19737 IX86_BUILTIN_COMUGEPD,
19738 IX86_BUILTIN_COMORDPD,
19739 IX86_BUILTIN_COMUNORDPD,
19740 IX86_BUILTIN_COMFALSEPD,
19741 IX86_BUILTIN_COMTRUEPD,
19743 IX86_BUILTIN_PCOMEQUB,
19744 IX86_BUILTIN_PCOMNEUB,
19745 IX86_BUILTIN_PCOMLTUB,
19746 IX86_BUILTIN_PCOMLEUB,
19747 IX86_BUILTIN_PCOMGTUB,
19748 IX86_BUILTIN_PCOMGEUB,
19749 IX86_BUILTIN_PCOMFALSEUB,
19750 IX86_BUILTIN_PCOMTRUEUB,
19751 IX86_BUILTIN_PCOMEQUW,
19752 IX86_BUILTIN_PCOMNEUW,
19753 IX86_BUILTIN_PCOMLTUW,
19754 IX86_BUILTIN_PCOMLEUW,
19755 IX86_BUILTIN_PCOMGTUW,
19756 IX86_BUILTIN_PCOMGEUW,
19757 IX86_BUILTIN_PCOMFALSEUW,
19758 IX86_BUILTIN_PCOMTRUEUW,
19759 IX86_BUILTIN_PCOMEQUD,
19760 IX86_BUILTIN_PCOMNEUD,
19761 IX86_BUILTIN_PCOMLTUD,
19762 IX86_BUILTIN_PCOMLEUD,
19763 IX86_BUILTIN_PCOMGTUD,
19764 IX86_BUILTIN_PCOMGEUD,
19765 IX86_BUILTIN_PCOMFALSEUD,
19766 IX86_BUILTIN_PCOMTRUEUD,
19767 IX86_BUILTIN_PCOMEQUQ,
19768 IX86_BUILTIN_PCOMNEUQ,
19769 IX86_BUILTIN_PCOMLTUQ,
19770 IX86_BUILTIN_PCOMLEUQ,
19771 IX86_BUILTIN_PCOMGTUQ,
19772 IX86_BUILTIN_PCOMGEUQ,
19773 IX86_BUILTIN_PCOMFALSEUQ,
19774 IX86_BUILTIN_PCOMTRUEUQ,
19776 IX86_BUILTIN_PCOMEQB,
19777 IX86_BUILTIN_PCOMNEB,
19778 IX86_BUILTIN_PCOMLTB,
19779 IX86_BUILTIN_PCOMLEB,
19780 IX86_BUILTIN_PCOMGTB,
19781 IX86_BUILTIN_PCOMGEB,
19782 IX86_BUILTIN_PCOMFALSEB,
19783 IX86_BUILTIN_PCOMTRUEB,
19784 IX86_BUILTIN_PCOMEQW,
19785 IX86_BUILTIN_PCOMNEW,
19786 IX86_BUILTIN_PCOMLTW,
19787 IX86_BUILTIN_PCOMLEW,
19788 IX86_BUILTIN_PCOMGTW,
19789 IX86_BUILTIN_PCOMGEW,
19790 IX86_BUILTIN_PCOMFALSEW,
19791 IX86_BUILTIN_PCOMTRUEW,
19792 IX86_BUILTIN_PCOMEQD,
19793 IX86_BUILTIN_PCOMNED,
19794 IX86_BUILTIN_PCOMLTD,
19795 IX86_BUILTIN_PCOMLED,
19796 IX86_BUILTIN_PCOMGTD,
19797 IX86_BUILTIN_PCOMGED,
19798 IX86_BUILTIN_PCOMFALSED,
19799 IX86_BUILTIN_PCOMTRUED,
19800 IX86_BUILTIN_PCOMEQQ,
19801 IX86_BUILTIN_PCOMNEQ,
19802 IX86_BUILTIN_PCOMLTQ,
19803 IX86_BUILTIN_PCOMLEQ,
19804 IX86_BUILTIN_PCOMGTQ,
19805 IX86_BUILTIN_PCOMGEQ,
19806 IX86_BUILTIN_PCOMFALSEQ,
19807 IX86_BUILTIN_PCOMTRUEQ,
19812 /* Table for the ix86 builtin decls. */
19813 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
19815 /* Table of all of the builtin functions that are possible with different ISA's
19816 but are waiting to be built until a function is declared to use that
19818 struct builtin_isa GTY(())
19820 tree type; /* builtin type to use in the declaration */
19821 const char *name; /* function name */
19822 int isa; /* isa_flags this builtin is defined for */
19823 bool const_p; /* true if the declaration is constant */
19826 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
19829 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
19830 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
19831 * function decl in the ix86_builtins array. Returns the function decl or
19832 * NULL_TREE, if the builtin was not added.
19834 * If the front end has a special hook for builtin functions, delay adding
19835 * builtin functions that aren't in the current ISA until the ISA is changed
19836 * with function specific optimization. Doing so, can save about 300K for the
19837 * default compiler. When the builtin is expanded, check at that time whether
19840 * If the front end doesn't have a special hook, record all builtins, even if
19841 * it isn't an instruction set in the current ISA in case the user uses
19842 * function specific options for a different ISA, so that we don't get scope
19843 * errors if a builtin is added in the middle of a function scope. */
19846 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
19848 tree decl = NULL_TREE;
19850 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
19852 ix86_builtins_isa[(int) code].isa = mask;
19854 if ((mask & ix86_isa_flags) != 0
19855 || (lang_hooks.builtin_function
19856 == lang_hooks.builtin_function_ext_scope))
19859 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
19861 ix86_builtins[(int) code] = decl;
19862 ix86_builtins_isa[(int) code].type = NULL_TREE;
19866 ix86_builtins[(int) code] = NULL_TREE;
19867 ix86_builtins_isa[(int) code].const_p = false;
19868 ix86_builtins_isa[(int) code].type = type;
19869 ix86_builtins_isa[(int) code].name = name;
19876 /* Like def_builtin, but also marks the function decl "const". */
19879 def_builtin_const (int mask, const char *name, tree type,
19880 enum ix86_builtins code)
19882 tree decl = def_builtin (mask, name, type, code);
19884 TREE_READONLY (decl) = 1;
19886 ix86_builtins_isa[(int) code].const_p = true;
19891 /* Add any new builtin functions for a given ISA that may not have been
19892 declared. This saves a bit of space compared to adding all of the
19893 declarations to the tree, even if we didn't use them. */
19896 ix86_add_new_builtins (int isa)
19901 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
19903 if ((ix86_builtins_isa[i].isa & isa) != 0
19904 && ix86_builtins_isa[i].type != NULL_TREE)
19906 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
19907 ix86_builtins_isa[i].type,
19908 i, BUILT_IN_MD, NULL,
19911 ix86_builtins[i] = decl;
19912 ix86_builtins_isa[i].type = NULL_TREE;
19913 if (ix86_builtins_isa[i].const_p)
19914 TREE_READONLY (decl) = 1;
19919 /* Bits for builtin_description.flag. */
19921 /* Set when we don't support the comparison natively, and should
19922 swap_comparison in order to support it. */
19923 #define BUILTIN_DESC_SWAP_OPERANDS 1
19925 struct builtin_description
19927 const unsigned int mask;
19928 const enum insn_code icode;
19929 const char *const name;
19930 const enum ix86_builtins code;
19931 const enum rtx_code comparison;
19935 static const struct builtin_description bdesc_comi[] =
19937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
19938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
19939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
19940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
19941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
19942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
19943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
19944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
19945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
19946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
19947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
19948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
19949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
19950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
19951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
19952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
19953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
19954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
19955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
19956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
19957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
19958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
19959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
19960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
19963 static const struct builtin_description bdesc_pcmpestr[] =
19966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
19967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
19968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
19969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
19970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
19971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
19972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
19975 static const struct builtin_description bdesc_pcmpistr[] =
19978 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
19979 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
19980 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
19981 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
19982 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
19983 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
19984 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
19987 /* Special builtin types */
19988 enum ix86_special_builtin_type
19990 SPECIAL_FTYPE_UNKNOWN,
19992 V32QI_FTYPE_PCCHAR,
19993 V16QI_FTYPE_PCCHAR,
19995 V8SF_FTYPE_PCFLOAT,
19997 V4DF_FTYPE_PCDOUBLE,
19998 V4SF_FTYPE_PCFLOAT,
19999 V2DF_FTYPE_PCDOUBLE,
20000 V8SF_FTYPE_PCV8SF_V8SF,
20001 V4DF_FTYPE_PCV4DF_V4DF,
20002 V4SF_FTYPE_V4SF_PCV2SF,
20003 V4SF_FTYPE_PCV4SF_V4SF,
20004 V2DF_FTYPE_V2DF_PCDOUBLE,
20005 V2DF_FTYPE_PCV2DF_V2DF,
20007 VOID_FTYPE_PV2SF_V4SF,
20008 VOID_FTYPE_PV2DI_V2DI,
20009 VOID_FTYPE_PCHAR_V32QI,
20010 VOID_FTYPE_PCHAR_V16QI,
20011 VOID_FTYPE_PFLOAT_V8SF,
20012 VOID_FTYPE_PFLOAT_V4SF,
20013 VOID_FTYPE_PDOUBLE_V4DF,
20014 VOID_FTYPE_PDOUBLE_V2DF,
20016 VOID_FTYPE_PINT_INT,
20017 VOID_FTYPE_PV8SF_V8SF_V8SF,
20018 VOID_FTYPE_PV4DF_V4DF_V4DF,
20019 VOID_FTYPE_PV4SF_V4SF_V4SF,
20020 VOID_FTYPE_PV2DF_V2DF_V2DF
20023 /* Builtin types */
20024 enum ix86_builtin_type
20027 FLOAT128_FTYPE_FLOAT128,
20029 FLOAT128_FTYPE_FLOAT128_FLOAT128,
20030 INT_FTYPE_V8SF_V8SF_PTEST,
20031 INT_FTYPE_V4DI_V4DI_PTEST,
20032 INT_FTYPE_V4DF_V4DF_PTEST,
20033 INT_FTYPE_V4SF_V4SF_PTEST,
20034 INT_FTYPE_V2DI_V2DI_PTEST,
20035 INT_FTYPE_V2DF_V2DF_PTEST,
20067 V4SF_FTYPE_V4SF_VEC_MERGE,
20076 V2DF_FTYPE_V2DF_VEC_MERGE,
20087 V16QI_FTYPE_V16QI_V16QI,
20088 V16QI_FTYPE_V8HI_V8HI,
20089 V8QI_FTYPE_V8QI_V8QI,
20090 V8QI_FTYPE_V4HI_V4HI,
20091 V8HI_FTYPE_V8HI_V8HI,
20092 V8HI_FTYPE_V8HI_V8HI_COUNT,
20093 V8HI_FTYPE_V16QI_V16QI,
20094 V8HI_FTYPE_V4SI_V4SI,
20095 V8HI_FTYPE_V8HI_SI_COUNT,
20096 V8SF_FTYPE_V8SF_V8SF,
20097 V8SF_FTYPE_V8SF_V8SI,
20098 V4SI_FTYPE_V4SI_V4SI,
20099 V4SI_FTYPE_V4SI_V4SI_COUNT,
20100 V4SI_FTYPE_V8HI_V8HI,
20101 V4SI_FTYPE_V4SF_V4SF,
20102 V4SI_FTYPE_V2DF_V2DF,
20103 V4SI_FTYPE_V4SI_SI_COUNT,
20104 V4HI_FTYPE_V4HI_V4HI,
20105 V4HI_FTYPE_V4HI_V4HI_COUNT,
20106 V4HI_FTYPE_V8QI_V8QI,
20107 V4HI_FTYPE_V2SI_V2SI,
20108 V4HI_FTYPE_V4HI_SI_COUNT,
20109 V4DF_FTYPE_V4DF_V4DF,
20110 V4DF_FTYPE_V4DF_V4DI,
20111 V4SF_FTYPE_V4SF_V4SF,
20112 V4SF_FTYPE_V4SF_V4SF_SWAP,
20113 V4SF_FTYPE_V4SF_V4SI,
20114 V4SF_FTYPE_V4SF_V2SI,
20115 V4SF_FTYPE_V4SF_V2DF,
20116 V4SF_FTYPE_V4SF_DI,
20117 V4SF_FTYPE_V4SF_SI,
20118 V2DI_FTYPE_V2DI_V2DI,
20119 V2DI_FTYPE_V2DI_V2DI_COUNT,
20120 V2DI_FTYPE_V16QI_V16QI,
20121 V2DI_FTYPE_V4SI_V4SI,
20122 V2DI_FTYPE_V2DI_V16QI,
20123 V2DI_FTYPE_V2DF_V2DF,
20124 V2DI_FTYPE_V2DI_SI_COUNT,
20125 V2SI_FTYPE_V2SI_V2SI,
20126 V2SI_FTYPE_V2SI_V2SI_COUNT,
20127 V2SI_FTYPE_V4HI_V4HI,
20128 V2SI_FTYPE_V2SF_V2SF,
20129 V2SI_FTYPE_V2SI_SI_COUNT,
20130 V2DF_FTYPE_V2DF_V2DF,
20131 V2DF_FTYPE_V2DF_V2DF_SWAP,
20132 V2DF_FTYPE_V2DF_V4SF,
20133 V2DF_FTYPE_V2DF_V2DI,
20134 V2DF_FTYPE_V2DF_DI,
20135 V2DF_FTYPE_V2DF_SI,
20136 V2SF_FTYPE_V2SF_V2SF,
20137 V1DI_FTYPE_V1DI_V1DI,
20138 V1DI_FTYPE_V1DI_V1DI_COUNT,
20139 V1DI_FTYPE_V8QI_V8QI,
20140 V1DI_FTYPE_V2SI_V2SI,
20141 V1DI_FTYPE_V1DI_SI_COUNT,
20142 UINT64_FTYPE_UINT64_UINT64,
20143 UINT_FTYPE_UINT_UINT,
20144 UINT_FTYPE_UINT_USHORT,
20145 UINT_FTYPE_UINT_UCHAR,
20146 V8HI_FTYPE_V8HI_INT,
20147 V4SI_FTYPE_V4SI_INT,
20148 V4HI_FTYPE_V4HI_INT,
20149 V8SF_FTYPE_V8SF_INT,
20150 V4SI_FTYPE_V8SI_INT,
20151 V4SF_FTYPE_V8SF_INT,
20152 V2DF_FTYPE_V4DF_INT,
20153 V4DF_FTYPE_V4DF_INT,
20154 V4SF_FTYPE_V4SF_INT,
20155 V2DI_FTYPE_V2DI_INT,
20156 V2DI2TI_FTYPE_V2DI_INT,
20157 V2DF_FTYPE_V2DF_INT,
20158 V16QI_FTYPE_V16QI_V16QI_V16QI,
20159 V8SF_FTYPE_V8SF_V8SF_V8SF,
20160 V4DF_FTYPE_V4DF_V4DF_V4DF,
20161 V4SF_FTYPE_V4SF_V4SF_V4SF,
20162 V2DF_FTYPE_V2DF_V2DF_V2DF,
20163 V16QI_FTYPE_V16QI_V16QI_INT,
20164 V8SI_FTYPE_V8SI_V8SI_INT,
20165 V8SI_FTYPE_V8SI_V4SI_INT,
20166 V8HI_FTYPE_V8HI_V8HI_INT,
20167 V8SF_FTYPE_V8SF_V8SF_INT,
20168 V8SF_FTYPE_V8SF_V4SF_INT,
20169 V4SI_FTYPE_V4SI_V4SI_INT,
20170 V4DF_FTYPE_V4DF_V4DF_INT,
20171 V4DF_FTYPE_V4DF_V2DF_INT,
20172 V4SF_FTYPE_V4SF_V4SF_INT,
20173 V2DI_FTYPE_V2DI_V2DI_INT,
20174 V2DI2TI_FTYPE_V2DI_V2DI_INT,
20175 V1DI2DI_FTYPE_V1DI_V1DI_INT,
20176 V2DF_FTYPE_V2DF_V2DF_INT,
20177 V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
20178 V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
20179 V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
20180 V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
20181 V2DI_FTYPE_V2DI_UINT_UINT,
20182 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
20185 /* Special builtins with variable number of arguments. */
20186 static const struct builtin_description bdesc_special_args[] =
20189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20195 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20196 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20197 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20199 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20200 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20201 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20202 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20204 /* SSE or 3DNow!A */
20205 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20206 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
20209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20210 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20211 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20212 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
20213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
20215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
20216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
20217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20223 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20226 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
20229 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20230 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
20234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
20235 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
20237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20240 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
20241 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
20243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20245 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20246 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20247 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
20249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
20252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
20253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
20254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
20255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
20256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
20257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
20258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
20261 /* Builtins with variable number of arguments. */
20262 static const struct builtin_description bdesc_args[] =
20265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20269 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20270 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20274 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20275 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20276 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20277 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20278 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20279 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20282 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20286 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20287 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20292 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20298 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
20301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
20303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
20305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
20309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20329 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20330 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20331 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20332 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20334 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20335 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20336 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20337 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20338 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20339 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20340 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20341 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20342 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20343 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20344 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20345 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20346 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20347 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20348 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20351 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20352 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20353 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20354 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20355 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20356 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
20360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20361 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20363 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20366 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20367 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20368 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20370 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20374 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20375 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20376 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20379 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20386 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20389 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20390 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20391 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20392 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
20394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20395 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20397 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20406 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20407 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20411 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20413 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20414 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
20423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
20424 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
20426 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
20428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20432 /* SSE MMX or 3Dnow!A */
20433 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20434 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20435 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20437 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20438 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20439 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20440 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20442 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
20443 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
20445 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
20448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
20451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
20452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
20453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
20454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
20456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
20459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
20464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20466 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20467 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
20471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20473 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20474 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20475 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20476 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
20487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20504 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20508 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20510 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20511 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20517 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
20519 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20520 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20521 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20522 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20523 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20524 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20525 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20526 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20537 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20538 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
20540 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20542 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20543 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20555 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20556 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20557 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
20570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
20571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
20573 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
20576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
20577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
20579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
20581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
20582 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
20583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
20584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
20586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
20587 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20588 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20589 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
20590 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20591 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20592 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
20594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
20595 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20596 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20597 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
20598 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20599 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20600 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
20602 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20603 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20604 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20605 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
20608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
20609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
20611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
20613 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
20614 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
20616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
20619 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
20620 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
20623 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
20624 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20626 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20627 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20628 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20629 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20630 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20631 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20634 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
20635 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
20636 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
20637 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
20638 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
20639 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20641 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20642 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20643 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20644 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20645 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20646 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20647 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20648 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20649 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20650 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20651 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20652 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20653 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
20654 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
20655 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20656 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20657 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20658 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20659 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20660 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20661 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20662 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20663 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20664 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20667 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
20668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
20671 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20672 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20673 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
20674 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
20675 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20676 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20677 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20678 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
20679 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
20680 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
20682 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
20683 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
20684 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
20685 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
20686 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
20687 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
20688 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
20689 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
20690 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
20691 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
20692 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
20693 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
20694 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
20696 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
20697 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20698 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20699 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20700 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20701 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20702 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20703 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
20707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20709 /* SSE4.1 and SSE5 */
20710 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
20711 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
20712 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20713 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20715 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20716 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20717 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20720 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20721 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
20722 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
20723 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
20724 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
20727 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
20728 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
20729 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
20730 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20733 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
20734 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
20736 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20737 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20738 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20739 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20742 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
20745 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20746 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20749 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20750 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20753 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20759 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20760 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20761 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20762 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20763 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20764 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20765 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20766 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20767 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20768 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20769 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20770 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
20773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
20774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
20775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
20777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
20780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
20781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
20791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
20792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
20793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
20794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
20795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
20796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
20797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
20798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
20799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
20800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
20801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
20804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
20805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
20806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
20807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
20808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
20809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
20810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
20811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
20812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
20813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
20814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
20816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
20820 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
20821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20822 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20824 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
20829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
20831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
20837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
20838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
20839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
20840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
20841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
20843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
20860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
20864 enum multi_arg_type {
20874 MULTI_ARG_3_PERMPS,
20875 MULTI_ARG_3_PERMPD,
20882 MULTI_ARG_2_DI_IMM,
20883 MULTI_ARG_2_SI_IMM,
20884 MULTI_ARG_2_HI_IMM,
20885 MULTI_ARG_2_QI_IMM,
20886 MULTI_ARG_2_SF_CMP,
20887 MULTI_ARG_2_DF_CMP,
20888 MULTI_ARG_2_DI_CMP,
20889 MULTI_ARG_2_SI_CMP,
20890 MULTI_ARG_2_HI_CMP,
20891 MULTI_ARG_2_QI_CMP,
20914 static const struct builtin_description bdesc_multi_arg[] =
20916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
20917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
20918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
20919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
20920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
20921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
20922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
20923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
20924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
20925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
20926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
20927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
20928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
20929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
20930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
20931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
20932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
20933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
20934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
20935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
20936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
20937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
20938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
20939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
20940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
20941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
20942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
20943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
20944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
20946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
20947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
20948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
20954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
20955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
20956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
20957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
20958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
20959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
20960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
20961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
20962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
20963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
20964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
20965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
20966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
20967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
20968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
20969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
20970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
20971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
20972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
20973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
20974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
20975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
20976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
20977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
20978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
20979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
20980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
20981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
20982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
20983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
20984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
20985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
20986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
20987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
20988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
20989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
20990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
20992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
20993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
20994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
20995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
20996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
20997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
20998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
20999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
21010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
21013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
21014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
21015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
21016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
21027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
21030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
21031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
21032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
21033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
21044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
21047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
21048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
21049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
21050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
21061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
21064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
21065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
21066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
21068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
21069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
21072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
21073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
21074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
21076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
21077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
21080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
21081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
21082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
21084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
21088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
21089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
21090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
21092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
21093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
21096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
21097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
21098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
21100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
21101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
21104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
21105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
21106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
21108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
21109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
21112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
21113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
21114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
21116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
21120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
21121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
21122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
21124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
21125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
21126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
21127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
21128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
21129 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
21130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
21131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
21133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21152 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
21153 in the current target ISA to allow the user to compile particular modules
21154 with different target specific options that differ from the command line
21157 ix86_init_mmx_sse_builtins (void)
21159 const struct builtin_description * d;
21162 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
21163 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21164 tree V1DI_type_node
21165 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
21166 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
21167 tree V2DI_type_node
21168 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
21169 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
21170 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
21171 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
21172 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21173 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
21174 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
21176 tree pchar_type_node = build_pointer_type (char_type_node);
21177 tree pcchar_type_node
21178 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
21179 tree pfloat_type_node = build_pointer_type (float_type_node);
21180 tree pcfloat_type_node
21181 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
21182 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
21183 tree pcv2sf_type_node
21184 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
21185 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
21186 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
21189 tree int_ftype_v4sf_v4sf
21190 = build_function_type_list (integer_type_node,
21191 V4SF_type_node, V4SF_type_node, NULL_TREE);
21192 tree v4si_ftype_v4sf_v4sf
21193 = build_function_type_list (V4SI_type_node,
21194 V4SF_type_node, V4SF_type_node, NULL_TREE);
21195 /* MMX/SSE/integer conversions. */
21196 tree int_ftype_v4sf
21197 = build_function_type_list (integer_type_node,
21198 V4SF_type_node, NULL_TREE);
21199 tree int64_ftype_v4sf
21200 = build_function_type_list (long_long_integer_type_node,
21201 V4SF_type_node, NULL_TREE);
21202 tree int_ftype_v8qi
21203 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
21204 tree v4sf_ftype_v4sf_int
21205 = build_function_type_list (V4SF_type_node,
21206 V4SF_type_node, integer_type_node, NULL_TREE);
21207 tree v4sf_ftype_v4sf_int64
21208 = build_function_type_list (V4SF_type_node,
21209 V4SF_type_node, long_long_integer_type_node,
21211 tree v4sf_ftype_v4sf_v2si
21212 = build_function_type_list (V4SF_type_node,
21213 V4SF_type_node, V2SI_type_node, NULL_TREE);
21215 /* Miscellaneous. */
21216 tree v8qi_ftype_v4hi_v4hi
21217 = build_function_type_list (V8QI_type_node,
21218 V4HI_type_node, V4HI_type_node, NULL_TREE);
21219 tree v4hi_ftype_v2si_v2si
21220 = build_function_type_list (V4HI_type_node,
21221 V2SI_type_node, V2SI_type_node, NULL_TREE);
21222 tree v4sf_ftype_v4sf_v4sf_int
21223 = build_function_type_list (V4SF_type_node,
21224 V4SF_type_node, V4SF_type_node,
21225 integer_type_node, NULL_TREE);
21226 tree v2si_ftype_v4hi_v4hi
21227 = build_function_type_list (V2SI_type_node,
21228 V4HI_type_node, V4HI_type_node, NULL_TREE);
21229 tree v4hi_ftype_v4hi_int
21230 = build_function_type_list (V4HI_type_node,
21231 V4HI_type_node, integer_type_node, NULL_TREE);
21232 tree v2si_ftype_v2si_int
21233 = build_function_type_list (V2SI_type_node,
21234 V2SI_type_node, integer_type_node, NULL_TREE);
21235 tree v1di_ftype_v1di_int
21236 = build_function_type_list (V1DI_type_node,
21237 V1DI_type_node, integer_type_node, NULL_TREE);
21239 tree void_ftype_void
21240 = build_function_type (void_type_node, void_list_node);
21241 tree void_ftype_unsigned
21242 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
21243 tree void_ftype_unsigned_unsigned
21244 = build_function_type_list (void_type_node, unsigned_type_node,
21245 unsigned_type_node, NULL_TREE);
21246 tree void_ftype_pcvoid_unsigned_unsigned
21247 = build_function_type_list (void_type_node, const_ptr_type_node,
21248 unsigned_type_node, unsigned_type_node,
21250 tree unsigned_ftype_void
21251 = build_function_type (unsigned_type_node, void_list_node);
21252 tree v2si_ftype_v4sf
21253 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
21254 /* Loads/stores. */
21255 tree void_ftype_v8qi_v8qi_pchar
21256 = build_function_type_list (void_type_node,
21257 V8QI_type_node, V8QI_type_node,
21258 pchar_type_node, NULL_TREE);
21259 tree v4sf_ftype_pcfloat
21260 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
21261 tree v4sf_ftype_v4sf_pcv2sf
21262 = build_function_type_list (V4SF_type_node,
21263 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
21264 tree void_ftype_pv2sf_v4sf
21265 = build_function_type_list (void_type_node,
21266 pv2sf_type_node, V4SF_type_node, NULL_TREE);
21267 tree void_ftype_pfloat_v4sf
21268 = build_function_type_list (void_type_node,
21269 pfloat_type_node, V4SF_type_node, NULL_TREE);
21270 tree void_ftype_pdi_di
21271 = build_function_type_list (void_type_node,
21272 pdi_type_node, long_long_unsigned_type_node,
21274 tree void_ftype_pv2di_v2di
21275 = build_function_type_list (void_type_node,
21276 pv2di_type_node, V2DI_type_node, NULL_TREE);
21277 /* Normal vector unops. */
21278 tree v4sf_ftype_v4sf
21279 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
21280 tree v16qi_ftype_v16qi
21281 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
21282 tree v8hi_ftype_v8hi
21283 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
21284 tree v4si_ftype_v4si
21285 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
21286 tree v8qi_ftype_v8qi
21287 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
21288 tree v4hi_ftype_v4hi
21289 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
21291 /* Normal vector binops. */
21292 tree v4sf_ftype_v4sf_v4sf
21293 = build_function_type_list (V4SF_type_node,
21294 V4SF_type_node, V4SF_type_node, NULL_TREE);
21295 tree v8qi_ftype_v8qi_v8qi
21296 = build_function_type_list (V8QI_type_node,
21297 V8QI_type_node, V8QI_type_node, NULL_TREE);
21298 tree v4hi_ftype_v4hi_v4hi
21299 = build_function_type_list (V4HI_type_node,
21300 V4HI_type_node, V4HI_type_node, NULL_TREE);
21301 tree v2si_ftype_v2si_v2si
21302 = build_function_type_list (V2SI_type_node,
21303 V2SI_type_node, V2SI_type_node, NULL_TREE);
21304 tree v1di_ftype_v1di_v1di
21305 = build_function_type_list (V1DI_type_node,
21306 V1DI_type_node, V1DI_type_node, NULL_TREE);
21307 tree v1di_ftype_v1di_v1di_int
21308 = build_function_type_list (V1DI_type_node,
21309 V1DI_type_node, V1DI_type_node,
21310 integer_type_node, NULL_TREE);
21311 tree v2si_ftype_v2sf
21312 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
21313 tree v2sf_ftype_v2si
21314 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
21315 tree v2si_ftype_v2si
21316 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
21317 tree v2sf_ftype_v2sf
21318 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
21319 tree v2sf_ftype_v2sf_v2sf
21320 = build_function_type_list (V2SF_type_node,
21321 V2SF_type_node, V2SF_type_node, NULL_TREE);
21322 tree v2si_ftype_v2sf_v2sf
21323 = build_function_type_list (V2SI_type_node,
21324 V2SF_type_node, V2SF_type_node, NULL_TREE);
21325 tree pint_type_node = build_pointer_type (integer_type_node);
21326 tree pdouble_type_node = build_pointer_type (double_type_node);
21327 tree pcdouble_type_node = build_pointer_type (
21328 build_type_variant (double_type_node, 1, 0));
21329 tree int_ftype_v2df_v2df
21330 = build_function_type_list (integer_type_node,
21331 V2DF_type_node, V2DF_type_node, NULL_TREE);
21333 tree void_ftype_pcvoid
21334 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
21335 tree v4sf_ftype_v4si
21336 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
21337 tree v4si_ftype_v4sf
21338 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
21339 tree v2df_ftype_v4si
21340 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
21341 tree v4si_ftype_v2df
21342 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
21343 tree v4si_ftype_v2df_v2df
21344 = build_function_type_list (V4SI_type_node,
21345 V2DF_type_node, V2DF_type_node, NULL_TREE);
21346 tree v2si_ftype_v2df
21347 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
21348 tree v4sf_ftype_v2df
21349 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
21350 tree v2df_ftype_v2si
21351 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
21352 tree v2df_ftype_v4sf
21353 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
21354 tree int_ftype_v2df
21355 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
21356 tree int64_ftype_v2df
21357 = build_function_type_list (long_long_integer_type_node,
21358 V2DF_type_node, NULL_TREE);
21359 tree v2df_ftype_v2df_int
21360 = build_function_type_list (V2DF_type_node,
21361 V2DF_type_node, integer_type_node, NULL_TREE);
21362 tree v2df_ftype_v2df_int64
21363 = build_function_type_list (V2DF_type_node,
21364 V2DF_type_node, long_long_integer_type_node,
21366 tree v4sf_ftype_v4sf_v2df
21367 = build_function_type_list (V4SF_type_node,
21368 V4SF_type_node, V2DF_type_node, NULL_TREE);
21369 tree v2df_ftype_v2df_v4sf
21370 = build_function_type_list (V2DF_type_node,
21371 V2DF_type_node, V4SF_type_node, NULL_TREE);
21372 tree v2df_ftype_v2df_v2df_int
21373 = build_function_type_list (V2DF_type_node,
21374 V2DF_type_node, V2DF_type_node,
21377 tree v2df_ftype_v2df_pcdouble
21378 = build_function_type_list (V2DF_type_node,
21379 V2DF_type_node, pcdouble_type_node, NULL_TREE);
21380 tree void_ftype_pdouble_v2df
21381 = build_function_type_list (void_type_node,
21382 pdouble_type_node, V2DF_type_node, NULL_TREE);
21383 tree void_ftype_pint_int
21384 = build_function_type_list (void_type_node,
21385 pint_type_node, integer_type_node, NULL_TREE);
21386 tree void_ftype_v16qi_v16qi_pchar
21387 = build_function_type_list (void_type_node,
21388 V16QI_type_node, V16QI_type_node,
21389 pchar_type_node, NULL_TREE);
21390 tree v2df_ftype_pcdouble
21391 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
21392 tree v2df_ftype_v2df_v2df
21393 = build_function_type_list (V2DF_type_node,
21394 V2DF_type_node, V2DF_type_node, NULL_TREE);
21395 tree v16qi_ftype_v16qi_v16qi
21396 = build_function_type_list (V16QI_type_node,
21397 V16QI_type_node, V16QI_type_node, NULL_TREE);
21398 tree v8hi_ftype_v8hi_v8hi
21399 = build_function_type_list (V8HI_type_node,
21400 V8HI_type_node, V8HI_type_node, NULL_TREE);
21401 tree v4si_ftype_v4si_v4si
21402 = build_function_type_list (V4SI_type_node,
21403 V4SI_type_node, V4SI_type_node, NULL_TREE);
21404 tree v2di_ftype_v2di_v2di
21405 = build_function_type_list (V2DI_type_node,
21406 V2DI_type_node, V2DI_type_node, NULL_TREE);
21407 tree v2di_ftype_v2df_v2df
21408 = build_function_type_list (V2DI_type_node,
21409 V2DF_type_node, V2DF_type_node, NULL_TREE);
21410 tree v2df_ftype_v2df
21411 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
21412 tree v2di_ftype_v2di_int
21413 = build_function_type_list (V2DI_type_node,
21414 V2DI_type_node, integer_type_node, NULL_TREE);
21415 tree v2di_ftype_v2di_v2di_int
21416 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21417 V2DI_type_node, integer_type_node, NULL_TREE);
21418 tree v4si_ftype_v4si_int
21419 = build_function_type_list (V4SI_type_node,
21420 V4SI_type_node, integer_type_node, NULL_TREE);
21421 tree v8hi_ftype_v8hi_int
21422 = build_function_type_list (V8HI_type_node,
21423 V8HI_type_node, integer_type_node, NULL_TREE);
21424 tree v4si_ftype_v8hi_v8hi
21425 = build_function_type_list (V4SI_type_node,
21426 V8HI_type_node, V8HI_type_node, NULL_TREE);
21427 tree v1di_ftype_v8qi_v8qi
21428 = build_function_type_list (V1DI_type_node,
21429 V8QI_type_node, V8QI_type_node, NULL_TREE);
21430 tree v1di_ftype_v2si_v2si
21431 = build_function_type_list (V1DI_type_node,
21432 V2SI_type_node, V2SI_type_node, NULL_TREE);
21433 tree v2di_ftype_v16qi_v16qi
21434 = build_function_type_list (V2DI_type_node,
21435 V16QI_type_node, V16QI_type_node, NULL_TREE);
21436 tree v2di_ftype_v4si_v4si
21437 = build_function_type_list (V2DI_type_node,
21438 V4SI_type_node, V4SI_type_node, NULL_TREE);
21439 tree int_ftype_v16qi
21440 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
21441 tree v16qi_ftype_pcchar
21442 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
21443 tree void_ftype_pchar_v16qi
21444 = build_function_type_list (void_type_node,
21445 pchar_type_node, V16QI_type_node, NULL_TREE);
21447 tree v2di_ftype_v2di_unsigned_unsigned
21448 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21449 unsigned_type_node, unsigned_type_node,
21451 tree v2di_ftype_v2di_v2di_unsigned_unsigned
21452 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
21453 unsigned_type_node, unsigned_type_node,
21455 tree v2di_ftype_v2di_v16qi
21456 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
21458 tree v2df_ftype_v2df_v2df_v2df
21459 = build_function_type_list (V2DF_type_node,
21460 V2DF_type_node, V2DF_type_node,
21461 V2DF_type_node, NULL_TREE);
21462 tree v4sf_ftype_v4sf_v4sf_v4sf
21463 = build_function_type_list (V4SF_type_node,
21464 V4SF_type_node, V4SF_type_node,
21465 V4SF_type_node, NULL_TREE);
21466 tree v8hi_ftype_v16qi
21467 = build_function_type_list (V8HI_type_node, V16QI_type_node,
21469 tree v4si_ftype_v16qi
21470 = build_function_type_list (V4SI_type_node, V16QI_type_node,
21472 tree v2di_ftype_v16qi
21473 = build_function_type_list (V2DI_type_node, V16QI_type_node,
21475 tree v4si_ftype_v8hi
21476 = build_function_type_list (V4SI_type_node, V8HI_type_node,
21478 tree v2di_ftype_v8hi
21479 = build_function_type_list (V2DI_type_node, V8HI_type_node,
21481 tree v2di_ftype_v4si
21482 = build_function_type_list (V2DI_type_node, V4SI_type_node,
21484 tree v2di_ftype_pv2di
21485 = build_function_type_list (V2DI_type_node, pv2di_type_node,
21487 tree v16qi_ftype_v16qi_v16qi_int
21488 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21489 V16QI_type_node, integer_type_node,
21491 tree v16qi_ftype_v16qi_v16qi_v16qi
21492 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21493 V16QI_type_node, V16QI_type_node,
21495 tree v8hi_ftype_v8hi_v8hi_int
21496 = build_function_type_list (V8HI_type_node, V8HI_type_node,
21497 V8HI_type_node, integer_type_node,
21499 tree v4si_ftype_v4si_v4si_int
21500 = build_function_type_list (V4SI_type_node, V4SI_type_node,
21501 V4SI_type_node, integer_type_node,
21503 tree int_ftype_v2di_v2di
21504 = build_function_type_list (integer_type_node,
21505 V2DI_type_node, V2DI_type_node,
21507 tree int_ftype_v16qi_int_v16qi_int_int
21508 = build_function_type_list (integer_type_node,
21515 tree v16qi_ftype_v16qi_int_v16qi_int_int
21516 = build_function_type_list (V16QI_type_node,
21523 tree int_ftype_v16qi_v16qi_int
21524 = build_function_type_list (integer_type_node,
21530 /* SSE5 instructions */
21531 tree v2di_ftype_v2di_v2di_v2di
21532 = build_function_type_list (V2DI_type_node,
21538 tree v4si_ftype_v4si_v4si_v4si
21539 = build_function_type_list (V4SI_type_node,
21545 tree v4si_ftype_v4si_v4si_v2di
21546 = build_function_type_list (V4SI_type_node,
21552 tree v8hi_ftype_v8hi_v8hi_v8hi
21553 = build_function_type_list (V8HI_type_node,
21559 tree v8hi_ftype_v8hi_v8hi_v4si
21560 = build_function_type_list (V8HI_type_node,
21566 tree v2df_ftype_v2df_v2df_v16qi
21567 = build_function_type_list (V2DF_type_node,
21573 tree v4sf_ftype_v4sf_v4sf_v16qi
21574 = build_function_type_list (V4SF_type_node,
21580 tree v2di_ftype_v2di_si
21581 = build_function_type_list (V2DI_type_node,
21586 tree v4si_ftype_v4si_si
21587 = build_function_type_list (V4SI_type_node,
21592 tree v8hi_ftype_v8hi_si
21593 = build_function_type_list (V8HI_type_node,
21598 tree v16qi_ftype_v16qi_si
21599 = build_function_type_list (V16QI_type_node,
21603 tree v4sf_ftype_v4hi
21604 = build_function_type_list (V4SF_type_node,
21608 tree v4hi_ftype_v4sf
21609 = build_function_type_list (V4HI_type_node,
21613 tree v2di_ftype_v2di
21614 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
21616 tree v16qi_ftype_v8hi_v8hi
21617 = build_function_type_list (V16QI_type_node,
21618 V8HI_type_node, V8HI_type_node,
21620 tree v8hi_ftype_v4si_v4si
21621 = build_function_type_list (V8HI_type_node,
21622 V4SI_type_node, V4SI_type_node,
21624 tree v8hi_ftype_v16qi_v16qi
21625 = build_function_type_list (V8HI_type_node,
21626 V16QI_type_node, V16QI_type_node,
21628 tree v4hi_ftype_v8qi_v8qi
21629 = build_function_type_list (V4HI_type_node,
21630 V8QI_type_node, V8QI_type_node,
21632 tree unsigned_ftype_unsigned_uchar
21633 = build_function_type_list (unsigned_type_node,
21634 unsigned_type_node,
21635 unsigned_char_type_node,
21637 tree unsigned_ftype_unsigned_ushort
21638 = build_function_type_list (unsigned_type_node,
21639 unsigned_type_node,
21640 short_unsigned_type_node,
21642 tree unsigned_ftype_unsigned_unsigned
21643 = build_function_type_list (unsigned_type_node,
21644 unsigned_type_node,
21645 unsigned_type_node,
21647 tree uint64_ftype_uint64_uint64
21648 = build_function_type_list (long_long_unsigned_type_node,
21649 long_long_unsigned_type_node,
21650 long_long_unsigned_type_node,
21652 tree float_ftype_float
21653 = build_function_type_list (float_type_node,
21658 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
21660 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
21662 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
21664 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
21666 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
21668 tree v8sf_ftype_v8sf
21669 = build_function_type_list (V8SF_type_node,
21672 tree v8si_ftype_v8sf
21673 = build_function_type_list (V8SI_type_node,
21676 tree v8sf_ftype_v8si
21677 = build_function_type_list (V8SF_type_node,
21680 tree v4si_ftype_v4df
21681 = build_function_type_list (V4SI_type_node,
21684 tree v4df_ftype_v4df
21685 = build_function_type_list (V4DF_type_node,
21688 tree v4df_ftype_v4si
21689 = build_function_type_list (V4DF_type_node,
21692 tree v4df_ftype_v4sf
21693 = build_function_type_list (V4DF_type_node,
21696 tree v4sf_ftype_v4df
21697 = build_function_type_list (V4SF_type_node,
21700 tree v8sf_ftype_v8sf_v8sf
21701 = build_function_type_list (V8SF_type_node,
21702 V8SF_type_node, V8SF_type_node,
21704 tree v4df_ftype_v4df_v4df
21705 = build_function_type_list (V4DF_type_node,
21706 V4DF_type_node, V4DF_type_node,
21708 tree v8sf_ftype_v8sf_int
21709 = build_function_type_list (V8SF_type_node,
21710 V8SF_type_node, integer_type_node,
21712 tree v4si_ftype_v8si_int
21713 = build_function_type_list (V4SI_type_node,
21714 V8SI_type_node, integer_type_node,
21716 tree v4df_ftype_v4df_int
21717 = build_function_type_list (V4DF_type_node,
21718 V4DF_type_node, integer_type_node,
21720 tree v4sf_ftype_v8sf_int
21721 = build_function_type_list (V4SF_type_node,
21722 V8SF_type_node, integer_type_node,
21724 tree v2df_ftype_v4df_int
21725 = build_function_type_list (V2DF_type_node,
21726 V4DF_type_node, integer_type_node,
21728 tree v8sf_ftype_v8sf_v8sf_int
21729 = build_function_type_list (V8SF_type_node,
21730 V8SF_type_node, V8SF_type_node,
21733 tree v8sf_ftype_v8sf_v8sf_v8sf
21734 = build_function_type_list (V8SF_type_node,
21735 V8SF_type_node, V8SF_type_node,
21738 tree v4df_ftype_v4df_v4df_v4df
21739 = build_function_type_list (V4DF_type_node,
21740 V4DF_type_node, V4DF_type_node,
21743 tree v8si_ftype_v8si_v8si_int
21744 = build_function_type_list (V8SI_type_node,
21745 V8SI_type_node, V8SI_type_node,
21748 tree v4df_ftype_v4df_v4df_int
21749 = build_function_type_list (V4DF_type_node,
21750 V4DF_type_node, V4DF_type_node,
21753 tree v8sf_ftype_v8sf_v8sf_v8si_int
21754 = build_function_type_list (V8SF_type_node,
21755 V8SF_type_node, V8SF_type_node,
21756 V8SI_type_node, integer_type_node,
21758 tree v4df_ftype_v4df_v4df_v4di_int
21759 = build_function_type_list (V4DF_type_node,
21760 V4DF_type_node, V4DF_type_node,
21761 V4DI_type_node, integer_type_node,
21763 tree v4sf_ftype_v4sf_v4sf_v4si_int
21764 = build_function_type_list (V4SF_type_node,
21765 V4SF_type_node, V4SF_type_node,
21766 V4SI_type_node, integer_type_node,
21768 tree v2df_ftype_v2df_v2df_v2di_int
21769 = build_function_type_list (V2DF_type_node,
21770 V2DF_type_node, V2DF_type_node,
21771 V2DI_type_node, integer_type_node,
21773 tree v8sf_ftype_pcfloat
21774 = build_function_type_list (V8SF_type_node,
21777 tree v4df_ftype_pcdouble
21778 = build_function_type_list (V4DF_type_node,
21779 pcdouble_type_node,
21781 tree pcv4sf_type_node
21782 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
21783 tree pcv2df_type_node
21784 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
21785 tree v8sf_ftype_pcv4sf
21786 = build_function_type_list (V8SF_type_node,
21789 tree v4df_ftype_pcv2df
21790 = build_function_type_list (V4DF_type_node,
21793 tree v32qi_ftype_pcchar
21794 = build_function_type_list (V32QI_type_node,
21797 tree void_ftype_pchar_v32qi
21798 = build_function_type_list (void_type_node,
21799 pchar_type_node, V32QI_type_node,
21801 tree v8si_ftype_v8si_v4si_int
21802 = build_function_type_list (V8SI_type_node,
21803 V8SI_type_node, V4SI_type_node,
21806 tree v8sf_ftype_v8sf_v4sf_int
21807 = build_function_type_list (V8SF_type_node,
21808 V8SF_type_node, V4SF_type_node,
21811 tree v4df_ftype_v4df_v2df_int
21812 = build_function_type_list (V4DF_type_node,
21813 V4DF_type_node, V2DF_type_node,
21816 tree void_ftype_pfloat_v8sf
21817 = build_function_type_list (void_type_node,
21818 pfloat_type_node, V8SF_type_node,
21820 tree void_ftype_pdouble_v4df
21821 = build_function_type_list (void_type_node,
21822 pdouble_type_node, V4DF_type_node,
21824 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
21825 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
21826 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
21827 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
21828 tree pcv8sf_type_node
21829 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
21830 tree pcv4df_type_node
21831 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
21832 tree v8sf_ftype_pcv8sf_v8sf
21833 = build_function_type_list (V8SF_type_node,
21834 pcv8sf_type_node, V8SF_type_node,
21836 tree v4df_ftype_pcv4df_v4df
21837 = build_function_type_list (V4DF_type_node,
21838 pcv4df_type_node, V4DF_type_node,
21840 tree v4sf_ftype_pcv4sf_v4sf
21841 = build_function_type_list (V4SF_type_node,
21842 pcv4sf_type_node, V4SF_type_node,
21844 tree v2df_ftype_pcv2df_v2df
21845 = build_function_type_list (V2DF_type_node,
21846 pcv2df_type_node, V2DF_type_node,
21848 tree void_ftype_pv8sf_v8sf_v8sf
21849 = build_function_type_list (void_type_node,
21850 pv8sf_type_node, V8SF_type_node,
21853 tree void_ftype_pv4df_v4df_v4df
21854 = build_function_type_list (void_type_node,
21855 pv4df_type_node, V4DF_type_node,
21858 tree void_ftype_pv4sf_v4sf_v4sf
21859 = build_function_type_list (void_type_node,
21860 pv4sf_type_node, V4SF_type_node,
21863 tree void_ftype_pv2df_v2df_v2df
21864 = build_function_type_list (void_type_node,
21865 pv2df_type_node, V2DF_type_node,
21868 tree v4df_ftype_v2df
21869 = build_function_type_list (V4DF_type_node,
21872 tree v8sf_ftype_v4sf
21873 = build_function_type_list (V8SF_type_node,
21876 tree v8si_ftype_v4si
21877 = build_function_type_list (V8SI_type_node,
21880 tree v2df_ftype_v4df
21881 = build_function_type_list (V2DF_type_node,
21884 tree v4sf_ftype_v8sf
21885 = build_function_type_list (V4SF_type_node,
21888 tree v4si_ftype_v8si
21889 = build_function_type_list (V4SI_type_node,
21892 tree int_ftype_v4df
21893 = build_function_type_list (integer_type_node,
21896 tree int_ftype_v8sf
21897 = build_function_type_list (integer_type_node,
21900 tree int_ftype_v8sf_v8sf
21901 = build_function_type_list (integer_type_node,
21902 V8SF_type_node, V8SF_type_node,
21904 tree int_ftype_v4di_v4di
21905 = build_function_type_list (integer_type_node,
21906 V4DI_type_node, V4DI_type_node,
21908 tree int_ftype_v4df_v4df
21909 = build_function_type_list (integer_type_node,
21910 V4DF_type_node, V4DF_type_node,
21912 tree v8sf_ftype_v8sf_v8si
21913 = build_function_type_list (V8SF_type_node,
21914 V8SF_type_node, V8SI_type_node,
21916 tree v4df_ftype_v4df_v4di
21917 = build_function_type_list (V4DF_type_node,
21918 V4DF_type_node, V4DI_type_node,
21920 tree v4sf_ftype_v4sf_v4si
21921 = build_function_type_list (V4SF_type_node,
21922 V4SF_type_node, V4SI_type_node, NULL_TREE);
21923 tree v2df_ftype_v2df_v2di
21924 = build_function_type_list (V2DF_type_node,
21925 V2DF_type_node, V2DI_type_node, NULL_TREE);
21929 /* Add all special builtins with variable number of operands. */
21930 for (i = 0, d = bdesc_special_args;
21931 i < ARRAY_SIZE (bdesc_special_args);
21939 switch ((enum ix86_special_builtin_type) d->flag)
21941 case VOID_FTYPE_VOID:
21942 type = void_ftype_void;
21944 case V32QI_FTYPE_PCCHAR:
21945 type = v32qi_ftype_pcchar;
21947 case V16QI_FTYPE_PCCHAR:
21948 type = v16qi_ftype_pcchar;
21950 case V8SF_FTYPE_PCV4SF:
21951 type = v8sf_ftype_pcv4sf;
21953 case V8SF_FTYPE_PCFLOAT:
21954 type = v8sf_ftype_pcfloat;
21956 case V4DF_FTYPE_PCV2DF:
21957 type = v4df_ftype_pcv2df;
21959 case V4DF_FTYPE_PCDOUBLE:
21960 type = v4df_ftype_pcdouble;
21962 case V4SF_FTYPE_PCFLOAT:
21963 type = v4sf_ftype_pcfloat;
21965 case V2DI_FTYPE_PV2DI:
21966 type = v2di_ftype_pv2di;
21968 case V2DF_FTYPE_PCDOUBLE:
21969 type = v2df_ftype_pcdouble;
21971 case V8SF_FTYPE_PCV8SF_V8SF:
21972 type = v8sf_ftype_pcv8sf_v8sf;
21974 case V4DF_FTYPE_PCV4DF_V4DF:
21975 type = v4df_ftype_pcv4df_v4df;
21977 case V4SF_FTYPE_V4SF_PCV2SF:
21978 type = v4sf_ftype_v4sf_pcv2sf;
21980 case V4SF_FTYPE_PCV4SF_V4SF:
21981 type = v4sf_ftype_pcv4sf_v4sf;
21983 case V2DF_FTYPE_V2DF_PCDOUBLE:
21984 type = v2df_ftype_v2df_pcdouble;
21986 case V2DF_FTYPE_PCV2DF_V2DF:
21987 type = v2df_ftype_pcv2df_v2df;
21989 case VOID_FTYPE_PV2SF_V4SF:
21990 type = void_ftype_pv2sf_v4sf;
21992 case VOID_FTYPE_PV2DI_V2DI:
21993 type = void_ftype_pv2di_v2di;
21995 case VOID_FTYPE_PCHAR_V32QI:
21996 type = void_ftype_pchar_v32qi;
21998 case VOID_FTYPE_PCHAR_V16QI:
21999 type = void_ftype_pchar_v16qi;
22001 case VOID_FTYPE_PFLOAT_V8SF:
22002 type = void_ftype_pfloat_v8sf;
22004 case VOID_FTYPE_PFLOAT_V4SF:
22005 type = void_ftype_pfloat_v4sf;
22007 case VOID_FTYPE_PDOUBLE_V4DF:
22008 type = void_ftype_pdouble_v4df;
22010 case VOID_FTYPE_PDOUBLE_V2DF:
22011 type = void_ftype_pdouble_v2df;
22013 case VOID_FTYPE_PDI_DI:
22014 type = void_ftype_pdi_di;
22016 case VOID_FTYPE_PINT_INT:
22017 type = void_ftype_pint_int;
22019 case VOID_FTYPE_PV8SF_V8SF_V8SF:
22020 type = void_ftype_pv8sf_v8sf_v8sf;
22022 case VOID_FTYPE_PV4DF_V4DF_V4DF:
22023 type = void_ftype_pv4df_v4df_v4df;
22025 case VOID_FTYPE_PV4SF_V4SF_V4SF:
22026 type = void_ftype_pv4sf_v4sf_v4sf;
22028 case VOID_FTYPE_PV2DF_V2DF_V2DF:
22029 type = void_ftype_pv2df_v2df_v2df;
22032 gcc_unreachable ();
22035 def_builtin (d->mask, d->name, type, d->code);
22038 /* Add all builtins with variable number of operands. */
22039 for (i = 0, d = bdesc_args;
22040 i < ARRAY_SIZE (bdesc_args);
22048 switch ((enum ix86_builtin_type) d->flag)
22050 case FLOAT_FTYPE_FLOAT:
22051 type = float_ftype_float;
22053 case INT_FTYPE_V8SF_V8SF_PTEST:
22054 type = int_ftype_v8sf_v8sf;
22056 case INT_FTYPE_V4DI_V4DI_PTEST:
22057 type = int_ftype_v4di_v4di;
22059 case INT_FTYPE_V4DF_V4DF_PTEST:
22060 type = int_ftype_v4df_v4df;
22062 case INT_FTYPE_V4SF_V4SF_PTEST:
22063 type = int_ftype_v4sf_v4sf;
22065 case INT_FTYPE_V2DI_V2DI_PTEST:
22066 type = int_ftype_v2di_v2di;
22068 case INT_FTYPE_V2DF_V2DF_PTEST:
22069 type = int_ftype_v2df_v2df;
22071 case INT64_FTYPE_V4SF:
22072 type = int64_ftype_v4sf;
22074 case INT64_FTYPE_V2DF:
22075 type = int64_ftype_v2df;
22077 case INT_FTYPE_V16QI:
22078 type = int_ftype_v16qi;
22080 case INT_FTYPE_V8QI:
22081 type = int_ftype_v8qi;
22083 case INT_FTYPE_V8SF:
22084 type = int_ftype_v8sf;
22086 case INT_FTYPE_V4DF:
22087 type = int_ftype_v4df;
22089 case INT_FTYPE_V4SF:
22090 type = int_ftype_v4sf;
22092 case INT_FTYPE_V2DF:
22093 type = int_ftype_v2df;
22095 case V16QI_FTYPE_V16QI:
22096 type = v16qi_ftype_v16qi;
22098 case V8SI_FTYPE_V8SF:
22099 type = v8si_ftype_v8sf;
22101 case V8SI_FTYPE_V4SI:
22102 type = v8si_ftype_v4si;
22104 case V8HI_FTYPE_V8HI:
22105 type = v8hi_ftype_v8hi;
22107 case V8HI_FTYPE_V16QI:
22108 type = v8hi_ftype_v16qi;
22110 case V8QI_FTYPE_V8QI:
22111 type = v8qi_ftype_v8qi;
22113 case V8SF_FTYPE_V8SF:
22114 type = v8sf_ftype_v8sf;
22116 case V8SF_FTYPE_V8SI:
22117 type = v8sf_ftype_v8si;
22119 case V8SF_FTYPE_V4SF:
22120 type = v8sf_ftype_v4sf;
22122 case V4SI_FTYPE_V4DF:
22123 type = v4si_ftype_v4df;
22125 case V4SI_FTYPE_V4SI:
22126 type = v4si_ftype_v4si;
22128 case V4SI_FTYPE_V16QI:
22129 type = v4si_ftype_v16qi;
22131 case V4SI_FTYPE_V8SI:
22132 type = v4si_ftype_v8si;
22134 case V4SI_FTYPE_V8HI:
22135 type = v4si_ftype_v8hi;
22137 case V4SI_FTYPE_V4SF:
22138 type = v4si_ftype_v4sf;
22140 case V4SI_FTYPE_V2DF:
22141 type = v4si_ftype_v2df;
22143 case V4HI_FTYPE_V4HI:
22144 type = v4hi_ftype_v4hi;
22146 case V4DF_FTYPE_V4DF:
22147 type = v4df_ftype_v4df;
22149 case V4DF_FTYPE_V4SI:
22150 type = v4df_ftype_v4si;
22152 case V4DF_FTYPE_V4SF:
22153 type = v4df_ftype_v4sf;
22155 case V4DF_FTYPE_V2DF:
22156 type = v4df_ftype_v2df;
22158 case V4SF_FTYPE_V4SF:
22159 case V4SF_FTYPE_V4SF_VEC_MERGE:
22160 type = v4sf_ftype_v4sf;
22162 case V4SF_FTYPE_V8SF:
22163 type = v4sf_ftype_v8sf;
22165 case V4SF_FTYPE_V4SI:
22166 type = v4sf_ftype_v4si;
22168 case V4SF_FTYPE_V4DF:
22169 type = v4sf_ftype_v4df;
22171 case V4SF_FTYPE_V2DF:
22172 type = v4sf_ftype_v2df;
22174 case V2DI_FTYPE_V2DI:
22175 type = v2di_ftype_v2di;
22177 case V2DI_FTYPE_V16QI:
22178 type = v2di_ftype_v16qi;
22180 case V2DI_FTYPE_V8HI:
22181 type = v2di_ftype_v8hi;
22183 case V2DI_FTYPE_V4SI:
22184 type = v2di_ftype_v4si;
22186 case V2SI_FTYPE_V2SI:
22187 type = v2si_ftype_v2si;
22189 case V2SI_FTYPE_V4SF:
22190 type = v2si_ftype_v4sf;
22192 case V2SI_FTYPE_V2DF:
22193 type = v2si_ftype_v2df;
22195 case V2SI_FTYPE_V2SF:
22196 type = v2si_ftype_v2sf;
22198 case V2DF_FTYPE_V4DF:
22199 type = v2df_ftype_v4df;
22201 case V2DF_FTYPE_V4SF:
22202 type = v2df_ftype_v4sf;
22204 case V2DF_FTYPE_V2DF:
22205 case V2DF_FTYPE_V2DF_VEC_MERGE:
22206 type = v2df_ftype_v2df;
22208 case V2DF_FTYPE_V2SI:
22209 type = v2df_ftype_v2si;
22211 case V2DF_FTYPE_V4SI:
22212 type = v2df_ftype_v4si;
22214 case V2SF_FTYPE_V2SF:
22215 type = v2sf_ftype_v2sf;
22217 case V2SF_FTYPE_V2SI:
22218 type = v2sf_ftype_v2si;
22220 case V16QI_FTYPE_V16QI_V16QI:
22221 type = v16qi_ftype_v16qi_v16qi;
22223 case V16QI_FTYPE_V8HI_V8HI:
22224 type = v16qi_ftype_v8hi_v8hi;
22226 case V8QI_FTYPE_V8QI_V8QI:
22227 type = v8qi_ftype_v8qi_v8qi;
22229 case V8QI_FTYPE_V4HI_V4HI:
22230 type = v8qi_ftype_v4hi_v4hi;
22232 case V8HI_FTYPE_V8HI_V8HI:
22233 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22234 type = v8hi_ftype_v8hi_v8hi;
22236 case V8HI_FTYPE_V16QI_V16QI:
22237 type = v8hi_ftype_v16qi_v16qi;
22239 case V8HI_FTYPE_V4SI_V4SI:
22240 type = v8hi_ftype_v4si_v4si;
22242 case V8HI_FTYPE_V8HI_SI_COUNT:
22243 type = v8hi_ftype_v8hi_int;
22245 case V8SF_FTYPE_V8SF_V8SF:
22246 type = v8sf_ftype_v8sf_v8sf;
22248 case V8SF_FTYPE_V8SF_V8SI:
22249 type = v8sf_ftype_v8sf_v8si;
22251 case V4SI_FTYPE_V4SI_V4SI:
22252 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22253 type = v4si_ftype_v4si_v4si;
22255 case V4SI_FTYPE_V8HI_V8HI:
22256 type = v4si_ftype_v8hi_v8hi;
22258 case V4SI_FTYPE_V4SF_V4SF:
22259 type = v4si_ftype_v4sf_v4sf;
22261 case V4SI_FTYPE_V2DF_V2DF:
22262 type = v4si_ftype_v2df_v2df;
22264 case V4SI_FTYPE_V4SI_SI_COUNT:
22265 type = v4si_ftype_v4si_int;
22267 case V4HI_FTYPE_V4HI_V4HI:
22268 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22269 type = v4hi_ftype_v4hi_v4hi;
22271 case V4HI_FTYPE_V8QI_V8QI:
22272 type = v4hi_ftype_v8qi_v8qi;
22274 case V4HI_FTYPE_V2SI_V2SI:
22275 type = v4hi_ftype_v2si_v2si;
22277 case V4HI_FTYPE_V4HI_SI_COUNT:
22278 type = v4hi_ftype_v4hi_int;
22280 case V4DF_FTYPE_V4DF_V4DF:
22281 type = v4df_ftype_v4df_v4df;
22283 case V4DF_FTYPE_V4DF_V4DI:
22284 type = v4df_ftype_v4df_v4di;
22286 case V4SF_FTYPE_V4SF_V4SF:
22287 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22288 type = v4sf_ftype_v4sf_v4sf;
22290 case V4SF_FTYPE_V4SF_V4SI:
22291 type = v4sf_ftype_v4sf_v4si;
22293 case V4SF_FTYPE_V4SF_V2SI:
22294 type = v4sf_ftype_v4sf_v2si;
22296 case V4SF_FTYPE_V4SF_V2DF:
22297 type = v4sf_ftype_v4sf_v2df;
22299 case V4SF_FTYPE_V4SF_DI:
22300 type = v4sf_ftype_v4sf_int64;
22302 case V4SF_FTYPE_V4SF_SI:
22303 type = v4sf_ftype_v4sf_int;
22305 case V2DI_FTYPE_V2DI_V2DI:
22306 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22307 type = v2di_ftype_v2di_v2di;
22309 case V2DI_FTYPE_V16QI_V16QI:
22310 type = v2di_ftype_v16qi_v16qi;
22312 case V2DI_FTYPE_V4SI_V4SI:
22313 type = v2di_ftype_v4si_v4si;
22315 case V2DI_FTYPE_V2DI_V16QI:
22316 type = v2di_ftype_v2di_v16qi;
22318 case V2DI_FTYPE_V2DF_V2DF:
22319 type = v2di_ftype_v2df_v2df;
22321 case V2DI_FTYPE_V2DI_SI_COUNT:
22322 type = v2di_ftype_v2di_int;
22324 case V2SI_FTYPE_V2SI_V2SI:
22325 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22326 type = v2si_ftype_v2si_v2si;
22328 case V2SI_FTYPE_V4HI_V4HI:
22329 type = v2si_ftype_v4hi_v4hi;
22331 case V2SI_FTYPE_V2SF_V2SF:
22332 type = v2si_ftype_v2sf_v2sf;
22334 case V2SI_FTYPE_V2SI_SI_COUNT:
22335 type = v2si_ftype_v2si_int;
22337 case V2DF_FTYPE_V2DF_V2DF:
22338 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22339 type = v2df_ftype_v2df_v2df;
22341 case V2DF_FTYPE_V2DF_V4SF:
22342 type = v2df_ftype_v2df_v4sf;
22344 case V2DF_FTYPE_V2DF_V2DI:
22345 type = v2df_ftype_v2df_v2di;
22347 case V2DF_FTYPE_V2DF_DI:
22348 type = v2df_ftype_v2df_int64;
22350 case V2DF_FTYPE_V2DF_SI:
22351 type = v2df_ftype_v2df_int;
22353 case V2SF_FTYPE_V2SF_V2SF:
22354 type = v2sf_ftype_v2sf_v2sf;
22356 case V1DI_FTYPE_V1DI_V1DI:
22357 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22358 type = v1di_ftype_v1di_v1di;
22360 case V1DI_FTYPE_V8QI_V8QI:
22361 type = v1di_ftype_v8qi_v8qi;
22363 case V1DI_FTYPE_V2SI_V2SI:
22364 type = v1di_ftype_v2si_v2si;
22366 case V1DI_FTYPE_V1DI_SI_COUNT:
22367 type = v1di_ftype_v1di_int;
22369 case UINT64_FTYPE_UINT64_UINT64:
22370 type = uint64_ftype_uint64_uint64;
22372 case UINT_FTYPE_UINT_UINT:
22373 type = unsigned_ftype_unsigned_unsigned;
22375 case UINT_FTYPE_UINT_USHORT:
22376 type = unsigned_ftype_unsigned_ushort;
22378 case UINT_FTYPE_UINT_UCHAR:
22379 type = unsigned_ftype_unsigned_uchar;
22381 case V8HI_FTYPE_V8HI_INT:
22382 type = v8hi_ftype_v8hi_int;
22384 case V8SF_FTYPE_V8SF_INT:
22385 type = v8sf_ftype_v8sf_int;
22387 case V4SI_FTYPE_V4SI_INT:
22388 type = v4si_ftype_v4si_int;
22390 case V4SI_FTYPE_V8SI_INT:
22391 type = v4si_ftype_v8si_int;
22393 case V4HI_FTYPE_V4HI_INT:
22394 type = v4hi_ftype_v4hi_int;
22396 case V4DF_FTYPE_V4DF_INT:
22397 type = v4df_ftype_v4df_int;
22399 case V4SF_FTYPE_V4SF_INT:
22400 type = v4sf_ftype_v4sf_int;
22402 case V4SF_FTYPE_V8SF_INT:
22403 type = v4sf_ftype_v8sf_int;
22405 case V2DI_FTYPE_V2DI_INT:
22406 case V2DI2TI_FTYPE_V2DI_INT:
22407 type = v2di_ftype_v2di_int;
22409 case V2DF_FTYPE_V2DF_INT:
22410 type = v2df_ftype_v2df_int;
22412 case V2DF_FTYPE_V4DF_INT:
22413 type = v2df_ftype_v4df_int;
22415 case V16QI_FTYPE_V16QI_V16QI_V16QI:
22416 type = v16qi_ftype_v16qi_v16qi_v16qi;
22418 case V8SF_FTYPE_V8SF_V8SF_V8SF:
22419 type = v8sf_ftype_v8sf_v8sf_v8sf;
22421 case V4DF_FTYPE_V4DF_V4DF_V4DF:
22422 type = v4df_ftype_v4df_v4df_v4df;
22424 case V4SF_FTYPE_V4SF_V4SF_V4SF:
22425 type = v4sf_ftype_v4sf_v4sf_v4sf;
22427 case V2DF_FTYPE_V2DF_V2DF_V2DF:
22428 type = v2df_ftype_v2df_v2df_v2df;
22430 case V16QI_FTYPE_V16QI_V16QI_INT:
22431 type = v16qi_ftype_v16qi_v16qi_int;
22433 case V8SI_FTYPE_V8SI_V8SI_INT:
22434 type = v8si_ftype_v8si_v8si_int;
22436 case V8SI_FTYPE_V8SI_V4SI_INT:
22437 type = v8si_ftype_v8si_v4si_int;
22439 case V8HI_FTYPE_V8HI_V8HI_INT:
22440 type = v8hi_ftype_v8hi_v8hi_int;
22442 case V8SF_FTYPE_V8SF_V8SF_INT:
22443 type = v8sf_ftype_v8sf_v8sf_int;
22445 case V8SF_FTYPE_V8SF_V4SF_INT:
22446 type = v8sf_ftype_v8sf_v4sf_int;
22448 case V4SI_FTYPE_V4SI_V4SI_INT:
22449 type = v4si_ftype_v4si_v4si_int;
22451 case V4DF_FTYPE_V4DF_V4DF_INT:
22452 type = v4df_ftype_v4df_v4df_int;
22454 case V4DF_FTYPE_V4DF_V2DF_INT:
22455 type = v4df_ftype_v4df_v2df_int;
22457 case V4SF_FTYPE_V4SF_V4SF_INT:
22458 type = v4sf_ftype_v4sf_v4sf_int;
22460 case V2DI_FTYPE_V2DI_V2DI_INT:
22461 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
22462 type = v2di_ftype_v2di_v2di_int;
22464 case V2DF_FTYPE_V2DF_V2DF_INT:
22465 type = v2df_ftype_v2df_v2df_int;
22467 case V2DI_FTYPE_V2DI_UINT_UINT:
22468 type = v2di_ftype_v2di_unsigned_unsigned;
22470 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
22471 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
22473 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
22474 type = v1di_ftype_v1di_v1di_int;
22476 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
22477 type = v8sf_ftype_v8sf_v8sf_v8si_int;
22479 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
22480 type = v4df_ftype_v4df_v4df_v4di_int;
22482 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
22483 type = v4sf_ftype_v4sf_v4sf_v4si_int;
22485 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
22486 type = v2df_ftype_v2df_v2df_v2di_int;
22489 gcc_unreachable ();
22492 def_builtin_const (d->mask, d->name, type, d->code);
22495 /* pcmpestr[im] insns. */
22496 for (i = 0, d = bdesc_pcmpestr;
22497 i < ARRAY_SIZE (bdesc_pcmpestr);
22500 if (d->code == IX86_BUILTIN_PCMPESTRM128)
22501 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
22503 ftype = int_ftype_v16qi_int_v16qi_int_int;
22504 def_builtin_const (d->mask, d->name, ftype, d->code);
22507 /* pcmpistr[im] insns. */
22508 for (i = 0, d = bdesc_pcmpistr;
22509 i < ARRAY_SIZE (bdesc_pcmpistr);
22512 if (d->code == IX86_BUILTIN_PCMPISTRM128)
22513 ftype = v16qi_ftype_v16qi_v16qi_int;
22515 ftype = int_ftype_v16qi_v16qi_int;
22516 def_builtin_const (d->mask, d->name, ftype, d->code);
22519 /* comi/ucomi insns. */
22520 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22521 if (d->mask == OPTION_MASK_ISA_SSE2)
22522 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
22524 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
22527 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
22528 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
22530 /* SSE or 3DNow!A */
22531 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
22534 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
22536 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
22537 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
22540 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
22541 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
22544 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
22545 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
22546 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
22547 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
22548 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
22549 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
22552 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
22555 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
22556 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
22558 /* Access to the vec_init patterns. */
22559 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
22560 integer_type_node, NULL_TREE);
22561 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
22563 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
22564 short_integer_type_node,
22565 short_integer_type_node,
22566 short_integer_type_node, NULL_TREE);
22567 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
22569 ftype = build_function_type_list (V8QI_type_node, char_type_node,
22570 char_type_node, char_type_node,
22571 char_type_node, char_type_node,
22572 char_type_node, char_type_node,
22573 char_type_node, NULL_TREE);
22574 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
22576 /* Access to the vec_extract patterns. */
22577 ftype = build_function_type_list (double_type_node, V2DF_type_node,
22578 integer_type_node, NULL_TREE);
22579 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
22581 ftype = build_function_type_list (long_long_integer_type_node,
22582 V2DI_type_node, integer_type_node,
22584 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
22586 ftype = build_function_type_list (float_type_node, V4SF_type_node,
22587 integer_type_node, NULL_TREE);
22588 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
22590 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
22591 integer_type_node, NULL_TREE);
22592 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
22594 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
22595 integer_type_node, NULL_TREE);
22596 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
22598 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
22599 integer_type_node, NULL_TREE);
22600 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
22602 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
22603 integer_type_node, NULL_TREE);
22604 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
22606 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
22607 integer_type_node, NULL_TREE);
22608 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
22610 /* Access to the vec_set patterns. */
22611 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
22613 integer_type_node, NULL_TREE);
22614 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
22616 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
22618 integer_type_node, NULL_TREE);
22619 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
22621 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
22623 integer_type_node, NULL_TREE);
22624 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
22626 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
22628 integer_type_node, NULL_TREE);
22629 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
22631 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
22633 integer_type_node, NULL_TREE);
22634 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
22636 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
22638 integer_type_node, NULL_TREE);
22639 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
22641 /* Add SSE5 multi-arg argument instructions */
22642 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
22644 tree mtype = NULL_TREE;
22649 switch ((enum multi_arg_type)d->flag)
22651 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
22652 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
22653 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
22654 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
22655 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
22656 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
22657 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
22658 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
22659 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
22660 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
22661 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
22662 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
22663 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
22664 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
22665 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
22666 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
22667 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
22668 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
22669 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
22670 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
22671 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
22672 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
22673 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
22674 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
22675 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
22676 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
22677 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
22678 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
22679 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
22680 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
22681 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
22682 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
22683 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
22684 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
22685 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
22686 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
22687 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
22688 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
22689 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
22690 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
22691 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
22692 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
22693 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
22694 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
22695 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
22696 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
22697 case MULTI_ARG_UNKNOWN:
22699 gcc_unreachable ();
22703 def_builtin_const (d->mask, d->name, mtype, d->code);
22707 /* Internal method for ix86_init_builtins. */
22710 ix86_init_builtins_va_builtins_abi (void)
22712 tree ms_va_ref, sysv_va_ref;
22713 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
22714 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
22715 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
22716 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
22720 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
22721 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
22722 ms_va_ref = build_reference_type (ms_va_list_type_node);
22724 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
22727 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22728 fnvoid_va_start_ms =
22729 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22730 fnvoid_va_end_sysv =
22731 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
22732 fnvoid_va_start_sysv =
22733 build_varargs_function_type_list (void_type_node, sysv_va_ref,
22735 fnvoid_va_copy_ms =
22736 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
22738 fnvoid_va_copy_sysv =
22739 build_function_type_list (void_type_node, sysv_va_ref,
22740 sysv_va_ref, NULL_TREE);
22742 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
22743 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
22744 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
22745 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
22746 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
22747 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
22748 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
22749 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22750 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
22751 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22752 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
22753 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22757 ix86_init_builtins (void)
22759 tree float128_type_node = make_node (REAL_TYPE);
22762 /* The __float80 type. */
22763 if (TYPE_MODE (long_double_type_node) == XFmode)
22764 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
22768 /* The __float80 type. */
22769 tree float80_type_node = make_node (REAL_TYPE);
22771 TYPE_PRECISION (float80_type_node) = 80;
22772 layout_type (float80_type_node);
22773 (*lang_hooks.types.register_builtin_type) (float80_type_node,
22777 /* The __float128 type. */
22778 TYPE_PRECISION (float128_type_node) = 128;
22779 layout_type (float128_type_node);
22780 (*lang_hooks.types.register_builtin_type) (float128_type_node,
22783 /* TFmode support builtins. */
22784 ftype = build_function_type (float128_type_node, void_list_node);
22785 decl = add_builtin_function ("__builtin_infq", ftype,
22786 IX86_BUILTIN_INFQ, BUILT_IN_MD,
22788 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
22790 /* We will expand them to normal call if SSE2 isn't available since
22791 they are used by libgcc. */
22792 ftype = build_function_type_list (float128_type_node,
22793 float128_type_node,
22795 decl = add_builtin_function ("__builtin_fabsq", ftype,
22796 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
22797 "__fabstf2", NULL_TREE);
22798 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
22799 TREE_READONLY (decl) = 1;
22801 ftype = build_function_type_list (float128_type_node,
22802 float128_type_node,
22803 float128_type_node,
22805 decl = add_builtin_function ("__builtin_copysignq", ftype,
22806 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
22807 "__copysigntf3", NULL_TREE);
22808 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
22809 TREE_READONLY (decl) = 1;
22811 ix86_init_mmx_sse_builtins ();
22813 ix86_init_builtins_va_builtins_abi ();
22816 /* Errors in the source file can cause expand_expr to return const0_rtx
22817 where we expect a vector. To avoid crashing, use one of the vector
22818 clear instructions. */
22820 safe_vector_operand (rtx x, enum machine_mode mode)
22822 if (x == const0_rtx)
22823 x = CONST0_RTX (mode);
22827 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
22830 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
22833 tree arg0 = CALL_EXPR_ARG (exp, 0);
22834 tree arg1 = CALL_EXPR_ARG (exp, 1);
22835 rtx op0 = expand_normal (arg0);
22836 rtx op1 = expand_normal (arg1);
22837 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22838 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22839 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22841 if (VECTOR_MODE_P (mode0))
22842 op0 = safe_vector_operand (op0, mode0);
22843 if (VECTOR_MODE_P (mode1))
22844 op1 = safe_vector_operand (op1, mode1);
22846 if (optimize || !target
22847 || GET_MODE (target) != tmode
22848 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22849 target = gen_reg_rtx (tmode);
22851 if (GET_MODE (op1) == SImode && mode1 == TImode)
22853 rtx x = gen_reg_rtx (V4SImode);
22854 emit_insn (gen_sse2_loadd (x, op1));
22855 op1 = gen_lowpart (TImode, x);
22858 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22859 op0 = copy_to_mode_reg (mode0, op0);
22860 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22861 op1 = copy_to_mode_reg (mode1, op1);
22863 pat = GEN_FCN (icode) (target, op0, op1);
22872 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
22875 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
22876 enum multi_arg_type m_type,
22877 enum insn_code sub_code)
22882 bool comparison_p = false;
22884 bool last_arg_constant = false;
22885 int num_memory = 0;
22888 enum machine_mode mode;
22891 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22895 case MULTI_ARG_3_SF:
22896 case MULTI_ARG_3_DF:
22897 case MULTI_ARG_3_DI:
22898 case MULTI_ARG_3_SI:
22899 case MULTI_ARG_3_SI_DI:
22900 case MULTI_ARG_3_HI:
22901 case MULTI_ARG_3_HI_SI:
22902 case MULTI_ARG_3_QI:
22903 case MULTI_ARG_3_PERMPS:
22904 case MULTI_ARG_3_PERMPD:
22908 case MULTI_ARG_2_SF:
22909 case MULTI_ARG_2_DF:
22910 case MULTI_ARG_2_DI:
22911 case MULTI_ARG_2_SI:
22912 case MULTI_ARG_2_HI:
22913 case MULTI_ARG_2_QI:
22917 case MULTI_ARG_2_DI_IMM:
22918 case MULTI_ARG_2_SI_IMM:
22919 case MULTI_ARG_2_HI_IMM:
22920 case MULTI_ARG_2_QI_IMM:
22922 last_arg_constant = true;
22925 case MULTI_ARG_1_SF:
22926 case MULTI_ARG_1_DF:
22927 case MULTI_ARG_1_DI:
22928 case MULTI_ARG_1_SI:
22929 case MULTI_ARG_1_HI:
22930 case MULTI_ARG_1_QI:
22931 case MULTI_ARG_1_SI_DI:
22932 case MULTI_ARG_1_HI_DI:
22933 case MULTI_ARG_1_HI_SI:
22934 case MULTI_ARG_1_QI_DI:
22935 case MULTI_ARG_1_QI_SI:
22936 case MULTI_ARG_1_QI_HI:
22937 case MULTI_ARG_1_PH2PS:
22938 case MULTI_ARG_1_PS2PH:
22942 case MULTI_ARG_2_SF_CMP:
22943 case MULTI_ARG_2_DF_CMP:
22944 case MULTI_ARG_2_DI_CMP:
22945 case MULTI_ARG_2_SI_CMP:
22946 case MULTI_ARG_2_HI_CMP:
22947 case MULTI_ARG_2_QI_CMP:
22949 comparison_p = true;
22952 case MULTI_ARG_2_SF_TF:
22953 case MULTI_ARG_2_DF_TF:
22954 case MULTI_ARG_2_DI_TF:
22955 case MULTI_ARG_2_SI_TF:
22956 case MULTI_ARG_2_HI_TF:
22957 case MULTI_ARG_2_QI_TF:
22962 case MULTI_ARG_UNKNOWN:
22964 gcc_unreachable ();
22967 if (optimize || !target
22968 || GET_MODE (target) != tmode
22969 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22970 target = gen_reg_rtx (tmode);
22972 gcc_assert (nargs <= 4);
22974 for (i = 0; i < nargs; i++)
22976 tree arg = CALL_EXPR_ARG (exp, i);
22977 rtx op = expand_normal (arg);
22978 int adjust = (comparison_p) ? 1 : 0;
22979 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
22981 if (last_arg_constant && i == nargs-1)
22983 if (GET_CODE (op) != CONST_INT)
22985 error ("last argument must be an immediate");
22986 return gen_reg_rtx (tmode);
22991 if (VECTOR_MODE_P (mode))
22992 op = safe_vector_operand (op, mode);
22994 /* If we aren't optimizing, only allow one memory operand to be
22996 if (memory_operand (op, mode))
22999 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23002 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23004 op = force_reg (mode, op);
23008 args[i].mode = mode;
23014 pat = GEN_FCN (icode) (target, args[0].op);
23019 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23020 GEN_INT ((int)sub_code));
23021 else if (! comparison_p)
23022 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23025 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23029 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23034 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23038 gcc_unreachable ();
23048 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23049 insns with vec_merge. */
23052 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23056 tree arg0 = CALL_EXPR_ARG (exp, 0);
23057 rtx op1, op0 = expand_normal (arg0);
23058 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23059 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23061 if (optimize || !target
23062 || GET_MODE (target) != tmode
23063 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23064 target = gen_reg_rtx (tmode);
23066 if (VECTOR_MODE_P (mode0))
23067 op0 = safe_vector_operand (op0, mode0);
23069 if ((optimize && !register_operand (op0, mode0))
23070 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23071 op0 = copy_to_mode_reg (mode0, op0);
23074 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23075 op1 = copy_to_mode_reg (mode0, op1);
23077 pat = GEN_FCN (icode) (target, op0, op1);
23084 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23087 ix86_expand_sse_compare (const struct builtin_description *d,
23088 tree exp, rtx target, bool swap)
23091 tree arg0 = CALL_EXPR_ARG (exp, 0);
23092 tree arg1 = CALL_EXPR_ARG (exp, 1);
23093 rtx op0 = expand_normal (arg0);
23094 rtx op1 = expand_normal (arg1);
23096 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23097 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23098 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23099 enum rtx_code comparison = d->comparison;
23101 if (VECTOR_MODE_P (mode0))
23102 op0 = safe_vector_operand (op0, mode0);
23103 if (VECTOR_MODE_P (mode1))
23104 op1 = safe_vector_operand (op1, mode1);
23106 /* Swap operands if we have a comparison that isn't available in
23110 rtx tmp = gen_reg_rtx (mode1);
23111 emit_move_insn (tmp, op1);
23116 if (optimize || !target
23117 || GET_MODE (target) != tmode
23118 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23119 target = gen_reg_rtx (tmode);
23121 if ((optimize && !register_operand (op0, mode0))
23122 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23123 op0 = copy_to_mode_reg (mode0, op0);
23124 if ((optimize && !register_operand (op1, mode1))
23125 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23126 op1 = copy_to_mode_reg (mode1, op1);
23128 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23129 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23136 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23139 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23143 tree arg0 = CALL_EXPR_ARG (exp, 0);
23144 tree arg1 = CALL_EXPR_ARG (exp, 1);
23145 rtx op0 = expand_normal (arg0);
23146 rtx op1 = expand_normal (arg1);
23147 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23148 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23149 enum rtx_code comparison = d->comparison;
23151 if (VECTOR_MODE_P (mode0))
23152 op0 = safe_vector_operand (op0, mode0);
23153 if (VECTOR_MODE_P (mode1))
23154 op1 = safe_vector_operand (op1, mode1);
23156 /* Swap operands if we have a comparison that isn't available in
23158 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23165 target = gen_reg_rtx (SImode);
23166 emit_move_insn (target, const0_rtx);
23167 target = gen_rtx_SUBREG (QImode, target, 0);
23169 if ((optimize && !register_operand (op0, mode0))
23170 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23171 op0 = copy_to_mode_reg (mode0, op0);
23172 if ((optimize && !register_operand (op1, mode1))
23173 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23174 op1 = copy_to_mode_reg (mode1, op1);
23176 pat = GEN_FCN (d->icode) (op0, op1);
23180 emit_insn (gen_rtx_SET (VOIDmode,
23181 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23182 gen_rtx_fmt_ee (comparison, QImode,
23186 return SUBREG_REG (target);
23189 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
23192 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23196 tree arg0 = CALL_EXPR_ARG (exp, 0);
23197 tree arg1 = CALL_EXPR_ARG (exp, 1);
23198 rtx op0 = expand_normal (arg0);
23199 rtx op1 = expand_normal (arg1);
23200 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23201 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23202 enum rtx_code comparison = d->comparison;
23204 if (VECTOR_MODE_P (mode0))
23205 op0 = safe_vector_operand (op0, mode0);
23206 if (VECTOR_MODE_P (mode1))
23207 op1 = safe_vector_operand (op1, mode1);
23209 target = gen_reg_rtx (SImode);
23210 emit_move_insn (target, const0_rtx);
23211 target = gen_rtx_SUBREG (QImode, target, 0);
23213 if ((optimize && !register_operand (op0, mode0))
23214 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23215 op0 = copy_to_mode_reg (mode0, op0);
23216 if ((optimize && !register_operand (op1, mode1))
23217 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23218 op1 = copy_to_mode_reg (mode1, op1);
23220 pat = GEN_FCN (d->icode) (op0, op1);
23224 emit_insn (gen_rtx_SET (VOIDmode,
23225 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23226 gen_rtx_fmt_ee (comparison, QImode,
23230 return SUBREG_REG (target);
23233 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
23236 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23237 tree exp, rtx target)
23240 tree arg0 = CALL_EXPR_ARG (exp, 0);
23241 tree arg1 = CALL_EXPR_ARG (exp, 1);
23242 tree arg2 = CALL_EXPR_ARG (exp, 2);
23243 tree arg3 = CALL_EXPR_ARG (exp, 3);
23244 tree arg4 = CALL_EXPR_ARG (exp, 4);
23245 rtx scratch0, scratch1;
23246 rtx op0 = expand_normal (arg0);
23247 rtx op1 = expand_normal (arg1);
23248 rtx op2 = expand_normal (arg2);
23249 rtx op3 = expand_normal (arg3);
23250 rtx op4 = expand_normal (arg4);
23251 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23253 tmode0 = insn_data[d->icode].operand[0].mode;
23254 tmode1 = insn_data[d->icode].operand[1].mode;
23255 modev2 = insn_data[d->icode].operand[2].mode;
23256 modei3 = insn_data[d->icode].operand[3].mode;
23257 modev4 = insn_data[d->icode].operand[4].mode;
23258 modei5 = insn_data[d->icode].operand[5].mode;
23259 modeimm = insn_data[d->icode].operand[6].mode;
23261 if (VECTOR_MODE_P (modev2))
23262 op0 = safe_vector_operand (op0, modev2);
23263 if (VECTOR_MODE_P (modev4))
23264 op2 = safe_vector_operand (op2, modev4);
23266 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23267 op0 = copy_to_mode_reg (modev2, op0);
23268 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23269 op1 = copy_to_mode_reg (modei3, op1);
23270 if ((optimize && !register_operand (op2, modev4))
23271 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23272 op2 = copy_to_mode_reg (modev4, op2);
23273 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23274 op3 = copy_to_mode_reg (modei5, op3);
23276 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23278 error ("the fifth argument must be a 8-bit immediate");
23282 if (d->code == IX86_BUILTIN_PCMPESTRI128)
23284 if (optimize || !target
23285 || GET_MODE (target) != tmode0
23286 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23287 target = gen_reg_rtx (tmode0);
23289 scratch1 = gen_reg_rtx (tmode1);
23291 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23293 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23295 if (optimize || !target
23296 || GET_MODE (target) != tmode1
23297 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23298 target = gen_reg_rtx (tmode1);
23300 scratch0 = gen_reg_rtx (tmode0);
23302 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23306 gcc_assert (d->flag);
23308 scratch0 = gen_reg_rtx (tmode0);
23309 scratch1 = gen_reg_rtx (tmode1);
23311 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23321 target = gen_reg_rtx (SImode);
23322 emit_move_insn (target, const0_rtx);
23323 target = gen_rtx_SUBREG (QImode, target, 0);
23326 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23327 gen_rtx_fmt_ee (EQ, QImode,
23328 gen_rtx_REG ((enum machine_mode) d->flag,
23331 return SUBREG_REG (target);
23338 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
23341 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23342 tree exp, rtx target)
23345 tree arg0 = CALL_EXPR_ARG (exp, 0);
23346 tree arg1 = CALL_EXPR_ARG (exp, 1);
23347 tree arg2 = CALL_EXPR_ARG (exp, 2);
23348 rtx scratch0, scratch1;
23349 rtx op0 = expand_normal (arg0);
23350 rtx op1 = expand_normal (arg1);
23351 rtx op2 = expand_normal (arg2);
23352 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23354 tmode0 = insn_data[d->icode].operand[0].mode;
23355 tmode1 = insn_data[d->icode].operand[1].mode;
23356 modev2 = insn_data[d->icode].operand[2].mode;
23357 modev3 = insn_data[d->icode].operand[3].mode;
23358 modeimm = insn_data[d->icode].operand[4].mode;
23360 if (VECTOR_MODE_P (modev2))
23361 op0 = safe_vector_operand (op0, modev2);
23362 if (VECTOR_MODE_P (modev3))
23363 op1 = safe_vector_operand (op1, modev3);
23365 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23366 op0 = copy_to_mode_reg (modev2, op0);
23367 if ((optimize && !register_operand (op1, modev3))
23368 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23369 op1 = copy_to_mode_reg (modev3, op1);
23371 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
23373 error ("the third argument must be a 8-bit immediate");
23377 if (d->code == IX86_BUILTIN_PCMPISTRI128)
23379 if (optimize || !target
23380 || GET_MODE (target) != tmode0
23381 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23382 target = gen_reg_rtx (tmode0);
23384 scratch1 = gen_reg_rtx (tmode1);
23386 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
23388 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
23390 if (optimize || !target
23391 || GET_MODE (target) != tmode1
23392 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23393 target = gen_reg_rtx (tmode1);
23395 scratch0 = gen_reg_rtx (tmode0);
23397 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
23401 gcc_assert (d->flag);
23403 scratch0 = gen_reg_rtx (tmode0);
23404 scratch1 = gen_reg_rtx (tmode1);
23406 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
23416 target = gen_reg_rtx (SImode);
23417 emit_move_insn (target, const0_rtx);
23418 target = gen_rtx_SUBREG (QImode, target, 0);
23421 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23422 gen_rtx_fmt_ee (EQ, QImode,
23423 gen_rtx_REG ((enum machine_mode) d->flag,
23426 return SUBREG_REG (target);
23432 /* Subroutine of ix86_expand_builtin to take care of insns with
23433 variable number of operands. */
23436 ix86_expand_args_builtin (const struct builtin_description *d,
23437 tree exp, rtx target)
23439 rtx pat, real_target;
23440 unsigned int i, nargs;
23441 unsigned int nargs_constant = 0;
23442 int num_memory = 0;
23446 enum machine_mode mode;
23448 bool last_arg_count = false;
23449 enum insn_code icode = d->icode;
23450 const struct insn_data *insn_p = &insn_data[icode];
23451 enum machine_mode tmode = insn_p->operand[0].mode;
23452 enum machine_mode rmode = VOIDmode;
23454 enum rtx_code comparison = d->comparison;
23456 switch ((enum ix86_builtin_type) d->flag)
23458 case INT_FTYPE_V8SF_V8SF_PTEST:
23459 case INT_FTYPE_V4DI_V4DI_PTEST:
23460 case INT_FTYPE_V4DF_V4DF_PTEST:
23461 case INT_FTYPE_V4SF_V4SF_PTEST:
23462 case INT_FTYPE_V2DI_V2DI_PTEST:
23463 case INT_FTYPE_V2DF_V2DF_PTEST:
23464 return ix86_expand_sse_ptest (d, exp, target);
23465 case FLOAT128_FTYPE_FLOAT128:
23466 case FLOAT_FTYPE_FLOAT:
23467 case INT64_FTYPE_V4SF:
23468 case INT64_FTYPE_V2DF:
23469 case INT_FTYPE_V16QI:
23470 case INT_FTYPE_V8QI:
23471 case INT_FTYPE_V8SF:
23472 case INT_FTYPE_V4DF:
23473 case INT_FTYPE_V4SF:
23474 case INT_FTYPE_V2DF:
23475 case V16QI_FTYPE_V16QI:
23476 case V8SI_FTYPE_V8SF:
23477 case V8SI_FTYPE_V4SI:
23478 case V8HI_FTYPE_V8HI:
23479 case V8HI_FTYPE_V16QI:
23480 case V8QI_FTYPE_V8QI:
23481 case V8SF_FTYPE_V8SF:
23482 case V8SF_FTYPE_V8SI:
23483 case V8SF_FTYPE_V4SF:
23484 case V4SI_FTYPE_V4SI:
23485 case V4SI_FTYPE_V16QI:
23486 case V4SI_FTYPE_V4SF:
23487 case V4SI_FTYPE_V8SI:
23488 case V4SI_FTYPE_V8HI:
23489 case V4SI_FTYPE_V4DF:
23490 case V4SI_FTYPE_V2DF:
23491 case V4HI_FTYPE_V4HI:
23492 case V4DF_FTYPE_V4DF:
23493 case V4DF_FTYPE_V4SI:
23494 case V4DF_FTYPE_V4SF:
23495 case V4DF_FTYPE_V2DF:
23496 case V4SF_FTYPE_V4SF:
23497 case V4SF_FTYPE_V4SI:
23498 case V4SF_FTYPE_V8SF:
23499 case V4SF_FTYPE_V4DF:
23500 case V4SF_FTYPE_V2DF:
23501 case V2DI_FTYPE_V2DI:
23502 case V2DI_FTYPE_V16QI:
23503 case V2DI_FTYPE_V8HI:
23504 case V2DI_FTYPE_V4SI:
23505 case V2DF_FTYPE_V2DF:
23506 case V2DF_FTYPE_V4SI:
23507 case V2DF_FTYPE_V4DF:
23508 case V2DF_FTYPE_V4SF:
23509 case V2DF_FTYPE_V2SI:
23510 case V2SI_FTYPE_V2SI:
23511 case V2SI_FTYPE_V4SF:
23512 case V2SI_FTYPE_V2SF:
23513 case V2SI_FTYPE_V2DF:
23514 case V2SF_FTYPE_V2SF:
23515 case V2SF_FTYPE_V2SI:
23518 case V4SF_FTYPE_V4SF_VEC_MERGE:
23519 case V2DF_FTYPE_V2DF_VEC_MERGE:
23520 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
23521 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
23522 case V16QI_FTYPE_V16QI_V16QI:
23523 case V16QI_FTYPE_V8HI_V8HI:
23524 case V8QI_FTYPE_V8QI_V8QI:
23525 case V8QI_FTYPE_V4HI_V4HI:
23526 case V8HI_FTYPE_V8HI_V8HI:
23527 case V8HI_FTYPE_V16QI_V16QI:
23528 case V8HI_FTYPE_V4SI_V4SI:
23529 case V8SF_FTYPE_V8SF_V8SF:
23530 case V8SF_FTYPE_V8SF_V8SI:
23531 case V4SI_FTYPE_V4SI_V4SI:
23532 case V4SI_FTYPE_V8HI_V8HI:
23533 case V4SI_FTYPE_V4SF_V4SF:
23534 case V4SI_FTYPE_V2DF_V2DF:
23535 case V4HI_FTYPE_V4HI_V4HI:
23536 case V4HI_FTYPE_V8QI_V8QI:
23537 case V4HI_FTYPE_V2SI_V2SI:
23538 case V4DF_FTYPE_V4DF_V4DF:
23539 case V4DF_FTYPE_V4DF_V4DI:
23540 case V4SF_FTYPE_V4SF_V4SF:
23541 case V4SF_FTYPE_V4SF_V4SI:
23542 case V4SF_FTYPE_V4SF_V2SI:
23543 case V4SF_FTYPE_V4SF_V2DF:
23544 case V4SF_FTYPE_V4SF_DI:
23545 case V4SF_FTYPE_V4SF_SI:
23546 case V2DI_FTYPE_V2DI_V2DI:
23547 case V2DI_FTYPE_V16QI_V16QI:
23548 case V2DI_FTYPE_V4SI_V4SI:
23549 case V2DI_FTYPE_V2DI_V16QI:
23550 case V2DI_FTYPE_V2DF_V2DF:
23551 case V2SI_FTYPE_V2SI_V2SI:
23552 case V2SI_FTYPE_V4HI_V4HI:
23553 case V2SI_FTYPE_V2SF_V2SF:
23554 case V2DF_FTYPE_V2DF_V2DF:
23555 case V2DF_FTYPE_V2DF_V4SF:
23556 case V2DF_FTYPE_V2DF_V2DI:
23557 case V2DF_FTYPE_V2DF_DI:
23558 case V2DF_FTYPE_V2DF_SI:
23559 case V2SF_FTYPE_V2SF_V2SF:
23560 case V1DI_FTYPE_V1DI_V1DI:
23561 case V1DI_FTYPE_V8QI_V8QI:
23562 case V1DI_FTYPE_V2SI_V2SI:
23563 if (comparison == UNKNOWN)
23564 return ix86_expand_binop_builtin (icode, exp, target);
23567 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23568 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23569 gcc_assert (comparison != UNKNOWN);
23573 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23574 case V8HI_FTYPE_V8HI_SI_COUNT:
23575 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23576 case V4SI_FTYPE_V4SI_SI_COUNT:
23577 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23578 case V4HI_FTYPE_V4HI_SI_COUNT:
23579 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23580 case V2DI_FTYPE_V2DI_SI_COUNT:
23581 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23582 case V2SI_FTYPE_V2SI_SI_COUNT:
23583 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23584 case V1DI_FTYPE_V1DI_SI_COUNT:
23586 last_arg_count = true;
23588 case UINT64_FTYPE_UINT64_UINT64:
23589 case UINT_FTYPE_UINT_UINT:
23590 case UINT_FTYPE_UINT_USHORT:
23591 case UINT_FTYPE_UINT_UCHAR:
23594 case V2DI2TI_FTYPE_V2DI_INT:
23597 nargs_constant = 1;
23599 case V8HI_FTYPE_V8HI_INT:
23600 case V8SF_FTYPE_V8SF_INT:
23601 case V4SI_FTYPE_V4SI_INT:
23602 case V4SI_FTYPE_V8SI_INT:
23603 case V4HI_FTYPE_V4HI_INT:
23604 case V4DF_FTYPE_V4DF_INT:
23605 case V4SF_FTYPE_V4SF_INT:
23606 case V4SF_FTYPE_V8SF_INT:
23607 case V2DI_FTYPE_V2DI_INT:
23608 case V2DF_FTYPE_V2DF_INT:
23609 case V2DF_FTYPE_V4DF_INT:
23611 nargs_constant = 1;
23613 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23614 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23615 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23616 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23617 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23620 case V16QI_FTYPE_V16QI_V16QI_INT:
23621 case V8HI_FTYPE_V8HI_V8HI_INT:
23622 case V8SI_FTYPE_V8SI_V8SI_INT:
23623 case V8SI_FTYPE_V8SI_V4SI_INT:
23624 case V8SF_FTYPE_V8SF_V8SF_INT:
23625 case V8SF_FTYPE_V8SF_V4SF_INT:
23626 case V4SI_FTYPE_V4SI_V4SI_INT:
23627 case V4DF_FTYPE_V4DF_V4DF_INT:
23628 case V4DF_FTYPE_V4DF_V2DF_INT:
23629 case V4SF_FTYPE_V4SF_V4SF_INT:
23630 case V2DI_FTYPE_V2DI_V2DI_INT:
23631 case V2DF_FTYPE_V2DF_V2DF_INT:
23633 nargs_constant = 1;
23635 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23638 nargs_constant = 1;
23640 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23643 nargs_constant = 1;
23645 case V2DI_FTYPE_V2DI_UINT_UINT:
23647 nargs_constant = 2;
23649 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
23650 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
23651 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
23652 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
23654 nargs_constant = 1;
23656 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23658 nargs_constant = 2;
23661 gcc_unreachable ();
23664 gcc_assert (nargs <= ARRAY_SIZE (args));
23666 if (comparison != UNKNOWN)
23668 gcc_assert (nargs == 2);
23669 return ix86_expand_sse_compare (d, exp, target, swap);
23672 if (rmode == VOIDmode || rmode == tmode)
23676 || GET_MODE (target) != tmode
23677 || ! (*insn_p->operand[0].predicate) (target, tmode))
23678 target = gen_reg_rtx (tmode);
23679 real_target = target;
23683 target = gen_reg_rtx (rmode);
23684 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
23687 for (i = 0; i < nargs; i++)
23689 tree arg = CALL_EXPR_ARG (exp, i);
23690 rtx op = expand_normal (arg);
23691 enum machine_mode mode = insn_p->operand[i + 1].mode;
23692 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
23694 if (last_arg_count && (i + 1) == nargs)
23696 /* SIMD shift insns take either an 8-bit immediate or
23697 register as count. But builtin functions take int as
23698 count. If count doesn't match, we put it in register. */
23701 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
23702 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
23703 op = copy_to_reg (op);
23706 else if ((nargs - i) <= nargs_constant)
23711 case CODE_FOR_sse4_1_roundpd:
23712 case CODE_FOR_sse4_1_roundps:
23713 case CODE_FOR_sse4_1_roundsd:
23714 case CODE_FOR_sse4_1_roundss:
23715 case CODE_FOR_sse4_1_blendps:
23716 case CODE_FOR_avx_blendpd256:
23717 case CODE_FOR_avx_vpermilv4df:
23718 case CODE_FOR_avx_roundpd256:
23719 case CODE_FOR_avx_roundps256:
23720 error ("the last argument must be a 4-bit immediate");
23723 case CODE_FOR_sse4_1_blendpd:
23724 case CODE_FOR_avx_vpermilv2df:
23725 case CODE_FOR_avx_vpermil2v2df3:
23726 case CODE_FOR_avx_vpermil2v4sf3:
23727 case CODE_FOR_avx_vpermil2v4df3:
23728 case CODE_FOR_avx_vpermil2v8sf3:
23729 error ("the last argument must be a 2-bit immediate");
23732 case CODE_FOR_avx_vextractf128v4df:
23733 case CODE_FOR_avx_vextractf128v8sf:
23734 case CODE_FOR_avx_vextractf128v8si:
23735 case CODE_FOR_avx_vinsertf128v4df:
23736 case CODE_FOR_avx_vinsertf128v8sf:
23737 case CODE_FOR_avx_vinsertf128v8si:
23738 error ("the last argument must be a 1-bit immediate");
23741 case CODE_FOR_avx_cmpsdv2df3:
23742 case CODE_FOR_avx_cmpssv4sf3:
23743 case CODE_FOR_avx_cmppdv2df3:
23744 case CODE_FOR_avx_cmppsv4sf3:
23745 case CODE_FOR_avx_cmppdv4df3:
23746 case CODE_FOR_avx_cmppsv8sf3:
23747 error ("the last argument must be a 5-bit immediate");
23751 switch (nargs_constant)
23754 if ((nargs - i) == nargs_constant)
23756 error ("the next to last argument must be an 8-bit immediate");
23760 error ("the last argument must be an 8-bit immediate");
23763 gcc_unreachable ();
23770 if (VECTOR_MODE_P (mode))
23771 op = safe_vector_operand (op, mode);
23773 /* If we aren't optimizing, only allow one memory operand to
23775 if (memory_operand (op, mode))
23778 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
23780 if (optimize || !match || num_memory > 1)
23781 op = copy_to_mode_reg (mode, op);
23785 op = copy_to_reg (op);
23786 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
23791 args[i].mode = mode;
23797 pat = GEN_FCN (icode) (real_target, args[0].op);
23800 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
23803 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23807 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23808 args[2].op, args[3].op);
23811 gcc_unreachable ();
23821 /* Subroutine of ix86_expand_builtin to take care of special insns
23822 with variable number of operands. */
23825 ix86_expand_special_args_builtin (const struct builtin_description *d,
23826 tree exp, rtx target)
23830 unsigned int i, nargs, arg_adjust, memory;
23834 enum machine_mode mode;
23836 enum insn_code icode = d->icode;
23837 bool last_arg_constant = false;
23838 const struct insn_data *insn_p = &insn_data[icode];
23839 enum machine_mode tmode = insn_p->operand[0].mode;
23840 enum { load, store } klass;
23842 switch ((enum ix86_special_builtin_type) d->flag)
23844 case VOID_FTYPE_VOID:
23845 emit_insn (GEN_FCN (icode) (target));
23847 case V2DI_FTYPE_PV2DI:
23848 case V32QI_FTYPE_PCCHAR:
23849 case V16QI_FTYPE_PCCHAR:
23850 case V8SF_FTYPE_PCV4SF:
23851 case V8SF_FTYPE_PCFLOAT:
23852 case V4SF_FTYPE_PCFLOAT:
23853 case V4DF_FTYPE_PCV2DF:
23854 case V4DF_FTYPE_PCDOUBLE:
23855 case V2DF_FTYPE_PCDOUBLE:
23860 case VOID_FTYPE_PV2SF_V4SF:
23861 case VOID_FTYPE_PV2DI_V2DI:
23862 case VOID_FTYPE_PCHAR_V32QI:
23863 case VOID_FTYPE_PCHAR_V16QI:
23864 case VOID_FTYPE_PFLOAT_V8SF:
23865 case VOID_FTYPE_PFLOAT_V4SF:
23866 case VOID_FTYPE_PDOUBLE_V4DF:
23867 case VOID_FTYPE_PDOUBLE_V2DF:
23868 case VOID_FTYPE_PDI_DI:
23869 case VOID_FTYPE_PINT_INT:
23872 /* Reserve memory operand for target. */
23873 memory = ARRAY_SIZE (args);
23875 case V4SF_FTYPE_V4SF_PCV2SF:
23876 case V2DF_FTYPE_V2DF_PCDOUBLE:
23881 case V8SF_FTYPE_PCV8SF_V8SF:
23882 case V4DF_FTYPE_PCV4DF_V4DF:
23883 case V4SF_FTYPE_PCV4SF_V4SF:
23884 case V2DF_FTYPE_PCV2DF_V2DF:
23889 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23890 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23891 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23892 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23895 /* Reserve memory operand for target. */
23896 memory = ARRAY_SIZE (args);
23899 gcc_unreachable ();
23902 gcc_assert (nargs <= ARRAY_SIZE (args));
23904 if (klass == store)
23906 arg = CALL_EXPR_ARG (exp, 0);
23907 op = expand_normal (arg);
23908 gcc_assert (target == 0);
23909 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
23917 || GET_MODE (target) != tmode
23918 || ! (*insn_p->operand[0].predicate) (target, tmode))
23919 target = gen_reg_rtx (tmode);
23922 for (i = 0; i < nargs; i++)
23924 enum machine_mode mode = insn_p->operand[i + 1].mode;
23927 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
23928 op = expand_normal (arg);
23929 match = (*insn_p->operand[i + 1].predicate) (op, mode);
23931 if (last_arg_constant && (i + 1) == nargs)
23937 error ("the last argument must be an 8-bit immediate");
23945 /* This must be the memory operand. */
23946 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
23947 gcc_assert (GET_MODE (op) == mode
23948 || GET_MODE (op) == VOIDmode);
23952 /* This must be register. */
23953 if (VECTOR_MODE_P (mode))
23954 op = safe_vector_operand (op, mode);
23956 gcc_assert (GET_MODE (op) == mode
23957 || GET_MODE (op) == VOIDmode);
23958 op = copy_to_mode_reg (mode, op);
23963 args[i].mode = mode;
23969 pat = GEN_FCN (icode) (target, args[0].op);
23972 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23975 gcc_unreachable ();
23981 return klass == store ? 0 : target;
23984 /* Return the integer constant in ARG. Constrain it to be in the range
23985 of the subparts of VEC_TYPE; issue an error if not. */
23988 get_element_number (tree vec_type, tree arg)
23990 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
23992 if (!host_integerp (arg, 1)
23993 || (elt = tree_low_cst (arg, 1), elt > max))
23995 error ("selector must be an integer constant in the range 0..%wi", max);
24002 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24003 ix86_expand_vector_init. We DO have language-level syntax for this, in
24004 the form of (type){ init-list }. Except that since we can't place emms
24005 instructions from inside the compiler, we can't allow the use of MMX
24006 registers unless the user explicitly asks for it. So we do *not* define
24007 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24008 we have builtins invoked by mmintrin.h that gives us license to emit
24009 these sorts of instructions. */
24012 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24014 enum machine_mode tmode = TYPE_MODE (type);
24015 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24016 int i, n_elt = GET_MODE_NUNITS (tmode);
24017 rtvec v = rtvec_alloc (n_elt);
24019 gcc_assert (VECTOR_MODE_P (tmode));
24020 gcc_assert (call_expr_nargs (exp) == n_elt);
24022 for (i = 0; i < n_elt; ++i)
24024 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24025 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24028 if (!target || !register_operand (target, tmode))
24029 target = gen_reg_rtx (tmode);
24031 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24035 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24036 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24037 had a language-level syntax for referencing vector elements. */
24040 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24042 enum machine_mode tmode, mode0;
24047 arg0 = CALL_EXPR_ARG (exp, 0);
24048 arg1 = CALL_EXPR_ARG (exp, 1);
24050 op0 = expand_normal (arg0);
24051 elt = get_element_number (TREE_TYPE (arg0), arg1);
24053 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24054 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24055 gcc_assert (VECTOR_MODE_P (mode0));
24057 op0 = force_reg (mode0, op0);
24059 if (optimize || !target || !register_operand (target, tmode))
24060 target = gen_reg_rtx (tmode);
24062 ix86_expand_vector_extract (true, target, op0, elt);
24067 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24068 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24069 a language-level syntax for referencing vector elements. */
24072 ix86_expand_vec_set_builtin (tree exp)
24074 enum machine_mode tmode, mode1;
24075 tree arg0, arg1, arg2;
24077 rtx op0, op1, target;
24079 arg0 = CALL_EXPR_ARG (exp, 0);
24080 arg1 = CALL_EXPR_ARG (exp, 1);
24081 arg2 = CALL_EXPR_ARG (exp, 2);
24083 tmode = TYPE_MODE (TREE_TYPE (arg0));
24084 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24085 gcc_assert (VECTOR_MODE_P (tmode));
24087 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24088 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24089 elt = get_element_number (TREE_TYPE (arg0), arg2);
24091 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24092 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24094 op0 = force_reg (tmode, op0);
24095 op1 = force_reg (mode1, op1);
24097 /* OP0 is the source of these builtin functions and shouldn't be
24098 modified. Create a copy, use it and return it as target. */
24099 target = gen_reg_rtx (tmode);
24100 emit_move_insn (target, op0);
24101 ix86_expand_vector_set (true, target, op1, elt);
24106 /* Expand an expression EXP that calls a built-in function,
24107 with result going to TARGET if that's convenient
24108 (and in mode MODE if that's convenient).
24109 SUBTARGET may be used as the target for computing one of EXP's operands.
24110 IGNORE is nonzero if the value is to be ignored. */
24113 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24114 enum machine_mode mode ATTRIBUTE_UNUSED,
24115 int ignore ATTRIBUTE_UNUSED)
24117 const struct builtin_description *d;
24119 enum insn_code icode;
24120 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24121 tree arg0, arg1, arg2;
24122 rtx op0, op1, op2, pat;
24123 enum machine_mode mode0, mode1, mode2;
24124 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24126 /* Determine whether the builtin function is available under the current ISA.
24127 Originally the builtin was not created if it wasn't applicable to the
24128 current ISA based on the command line switches. With function specific
24129 options, we need to check in the context of the function making the call
24130 whether it is supported. */
24131 if (ix86_builtins_isa[fcode].isa
24132 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24134 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24135 NULL, NULL, false);
24138 error ("%qE needs unknown isa option", fndecl);
24141 gcc_assert (opts != NULL);
24142 error ("%qE needs isa option %s", fndecl, opts);
24150 case IX86_BUILTIN_MASKMOVQ:
24151 case IX86_BUILTIN_MASKMOVDQU:
24152 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24153 ? CODE_FOR_mmx_maskmovq
24154 : CODE_FOR_sse2_maskmovdqu);
24155 /* Note the arg order is different from the operand order. */
24156 arg1 = CALL_EXPR_ARG (exp, 0);
24157 arg2 = CALL_EXPR_ARG (exp, 1);
24158 arg0 = CALL_EXPR_ARG (exp, 2);
24159 op0 = expand_normal (arg0);
24160 op1 = expand_normal (arg1);
24161 op2 = expand_normal (arg2);
24162 mode0 = insn_data[icode].operand[0].mode;
24163 mode1 = insn_data[icode].operand[1].mode;
24164 mode2 = insn_data[icode].operand[2].mode;
24166 op0 = force_reg (Pmode, op0);
24167 op0 = gen_rtx_MEM (mode1, op0);
24169 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24170 op0 = copy_to_mode_reg (mode0, op0);
24171 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24172 op1 = copy_to_mode_reg (mode1, op1);
24173 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24174 op2 = copy_to_mode_reg (mode2, op2);
24175 pat = GEN_FCN (icode) (op0, op1, op2);
24181 case IX86_BUILTIN_LDMXCSR:
24182 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24183 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24184 emit_move_insn (target, op0);
24185 emit_insn (gen_sse_ldmxcsr (target));
24188 case IX86_BUILTIN_STMXCSR:
24189 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24190 emit_insn (gen_sse_stmxcsr (target));
24191 return copy_to_mode_reg (SImode, target);
24193 case IX86_BUILTIN_CLFLUSH:
24194 arg0 = CALL_EXPR_ARG (exp, 0);
24195 op0 = expand_normal (arg0);
24196 icode = CODE_FOR_sse2_clflush;
24197 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24198 op0 = copy_to_mode_reg (Pmode, op0);
24200 emit_insn (gen_sse2_clflush (op0));
24203 case IX86_BUILTIN_MONITOR:
24204 arg0 = CALL_EXPR_ARG (exp, 0);
24205 arg1 = CALL_EXPR_ARG (exp, 1);
24206 arg2 = CALL_EXPR_ARG (exp, 2);
24207 op0 = expand_normal (arg0);
24208 op1 = expand_normal (arg1);
24209 op2 = expand_normal (arg2);
24211 op0 = copy_to_mode_reg (Pmode, op0);
24213 op1 = copy_to_mode_reg (SImode, op1);
24215 op2 = copy_to_mode_reg (SImode, op2);
24216 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24219 case IX86_BUILTIN_MWAIT:
24220 arg0 = CALL_EXPR_ARG (exp, 0);
24221 arg1 = CALL_EXPR_ARG (exp, 1);
24222 op0 = expand_normal (arg0);
24223 op1 = expand_normal (arg1);
24225 op0 = copy_to_mode_reg (SImode, op0);
24227 op1 = copy_to_mode_reg (SImode, op1);
24228 emit_insn (gen_sse3_mwait (op0, op1));
24231 case IX86_BUILTIN_VEC_INIT_V2SI:
24232 case IX86_BUILTIN_VEC_INIT_V4HI:
24233 case IX86_BUILTIN_VEC_INIT_V8QI:
24234 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24236 case IX86_BUILTIN_VEC_EXT_V2DF:
24237 case IX86_BUILTIN_VEC_EXT_V2DI:
24238 case IX86_BUILTIN_VEC_EXT_V4SF:
24239 case IX86_BUILTIN_VEC_EXT_V4SI:
24240 case IX86_BUILTIN_VEC_EXT_V8HI:
24241 case IX86_BUILTIN_VEC_EXT_V2SI:
24242 case IX86_BUILTIN_VEC_EXT_V4HI:
24243 case IX86_BUILTIN_VEC_EXT_V16QI:
24244 return ix86_expand_vec_ext_builtin (exp, target);
24246 case IX86_BUILTIN_VEC_SET_V2DI:
24247 case IX86_BUILTIN_VEC_SET_V4SF:
24248 case IX86_BUILTIN_VEC_SET_V4SI:
24249 case IX86_BUILTIN_VEC_SET_V8HI:
24250 case IX86_BUILTIN_VEC_SET_V4HI:
24251 case IX86_BUILTIN_VEC_SET_V16QI:
24252 return ix86_expand_vec_set_builtin (exp);
24254 case IX86_BUILTIN_INFQ:
24256 REAL_VALUE_TYPE inf;
24260 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24262 tmp = validize_mem (force_const_mem (mode, tmp));
24265 target = gen_reg_rtx (mode);
24267 emit_move_insn (target, tmp);
24275 for (i = 0, d = bdesc_special_args;
24276 i < ARRAY_SIZE (bdesc_special_args);
24278 if (d->code == fcode)
24279 return ix86_expand_special_args_builtin (d, exp, target);
24281 for (i = 0, d = bdesc_args;
24282 i < ARRAY_SIZE (bdesc_args);
24284 if (d->code == fcode)
24287 case IX86_BUILTIN_FABSQ:
24288 case IX86_BUILTIN_COPYSIGNQ:
24290 /* Emit a normal call if SSE2 isn't available. */
24291 return expand_call (exp, target, ignore);
24293 return ix86_expand_args_builtin (d, exp, target);
24296 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24297 if (d->code == fcode)
24298 return ix86_expand_sse_comi (d, exp, target);
24300 for (i = 0, d = bdesc_pcmpestr;
24301 i < ARRAY_SIZE (bdesc_pcmpestr);
24303 if (d->code == fcode)
24304 return ix86_expand_sse_pcmpestr (d, exp, target);
24306 for (i = 0, d = bdesc_pcmpistr;
24307 i < ARRAY_SIZE (bdesc_pcmpistr);
24309 if (d->code == fcode)
24310 return ix86_expand_sse_pcmpistr (d, exp, target);
24312 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24313 if (d->code == fcode)
24314 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24315 (enum multi_arg_type)d->flag,
24318 gcc_unreachable ();
24321 /* Returns a function decl for a vectorized version of the builtin function
24322 with builtin function code FN and the result vector type TYPE, or NULL_TREE
24323 if it is not available. */
24326 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
24329 enum machine_mode in_mode, out_mode;
24332 if (TREE_CODE (type_out) != VECTOR_TYPE
24333 || TREE_CODE (type_in) != VECTOR_TYPE)
24336 out_mode = TYPE_MODE (TREE_TYPE (type_out));
24337 out_n = TYPE_VECTOR_SUBPARTS (type_out);
24338 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24339 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24343 case BUILT_IN_SQRT:
24344 if (out_mode == DFmode && out_n == 2
24345 && in_mode == DFmode && in_n == 2)
24346 return ix86_builtins[IX86_BUILTIN_SQRTPD];
24349 case BUILT_IN_SQRTF:
24350 if (out_mode == SFmode && out_n == 4
24351 && in_mode == SFmode && in_n == 4)
24352 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24355 case BUILT_IN_LRINT:
24356 if (out_mode == SImode && out_n == 4
24357 && in_mode == DFmode && in_n == 2)
24358 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24361 case BUILT_IN_LRINTF:
24362 if (out_mode == SImode && out_n == 4
24363 && in_mode == SFmode && in_n == 4)
24364 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24371 /* Dispatch to a handler for a vectorization library. */
24372 if (ix86_veclib_handler)
24373 return (*ix86_veclib_handler)(fn, type_out, type_in);
24378 /* Handler for an SVML-style interface to
24379 a library with vectorized intrinsics. */
24382 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
24385 tree fntype, new_fndecl, args;
24388 enum machine_mode el_mode, in_mode;
24391 /* The SVML is suitable for unsafe math only. */
24392 if (!flag_unsafe_math_optimizations)
24395 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24396 n = TYPE_VECTOR_SUBPARTS (type_out);
24397 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24398 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24399 if (el_mode != in_mode
24407 case BUILT_IN_LOG10:
24409 case BUILT_IN_TANH:
24411 case BUILT_IN_ATAN:
24412 case BUILT_IN_ATAN2:
24413 case BUILT_IN_ATANH:
24414 case BUILT_IN_CBRT:
24415 case BUILT_IN_SINH:
24417 case BUILT_IN_ASINH:
24418 case BUILT_IN_ASIN:
24419 case BUILT_IN_COSH:
24421 case BUILT_IN_ACOSH:
24422 case BUILT_IN_ACOS:
24423 if (el_mode != DFmode || n != 2)
24427 case BUILT_IN_EXPF:
24428 case BUILT_IN_LOGF:
24429 case BUILT_IN_LOG10F:
24430 case BUILT_IN_POWF:
24431 case BUILT_IN_TANHF:
24432 case BUILT_IN_TANF:
24433 case BUILT_IN_ATANF:
24434 case BUILT_IN_ATAN2F:
24435 case BUILT_IN_ATANHF:
24436 case BUILT_IN_CBRTF:
24437 case BUILT_IN_SINHF:
24438 case BUILT_IN_SINF:
24439 case BUILT_IN_ASINHF:
24440 case BUILT_IN_ASINF:
24441 case BUILT_IN_COSHF:
24442 case BUILT_IN_COSF:
24443 case BUILT_IN_ACOSHF:
24444 case BUILT_IN_ACOSF:
24445 if (el_mode != SFmode || n != 4)
24453 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24455 if (fn == BUILT_IN_LOGF)
24456 strcpy (name, "vmlsLn4");
24457 else if (fn == BUILT_IN_LOG)
24458 strcpy (name, "vmldLn2");
24461 sprintf (name, "vmls%s", bname+10);
24462 name[strlen (name)-1] = '4';
24465 sprintf (name, "vmld%s2", bname+10);
24467 /* Convert to uppercase. */
24471 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24472 args = TREE_CHAIN (args))
24476 fntype = build_function_type_list (type_out, type_in, NULL);
24478 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24480 /* Build a function declaration for the vectorized function. */
24481 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24482 TREE_PUBLIC (new_fndecl) = 1;
24483 DECL_EXTERNAL (new_fndecl) = 1;
24484 DECL_IS_NOVOPS (new_fndecl) = 1;
24485 TREE_READONLY (new_fndecl) = 1;
24490 /* Handler for an ACML-style interface to
24491 a library with vectorized intrinsics. */
24494 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
24496 char name[20] = "__vr.._";
24497 tree fntype, new_fndecl, args;
24500 enum machine_mode el_mode, in_mode;
24503 /* The ACML is 64bits only and suitable for unsafe math only as
24504 it does not correctly support parts of IEEE with the required
24505 precision such as denormals. */
24507 || !flag_unsafe_math_optimizations)
24510 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24511 n = TYPE_VECTOR_SUBPARTS (type_out);
24512 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24513 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24514 if (el_mode != in_mode
24524 case BUILT_IN_LOG2:
24525 case BUILT_IN_LOG10:
24528 if (el_mode != DFmode
24533 case BUILT_IN_SINF:
24534 case BUILT_IN_COSF:
24535 case BUILT_IN_EXPF:
24536 case BUILT_IN_POWF:
24537 case BUILT_IN_LOGF:
24538 case BUILT_IN_LOG2F:
24539 case BUILT_IN_LOG10F:
24542 if (el_mode != SFmode
24551 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24552 sprintf (name + 7, "%s", bname+10);
24555 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24556 args = TREE_CHAIN (args))
24560 fntype = build_function_type_list (type_out, type_in, NULL);
24562 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24564 /* Build a function declaration for the vectorized function. */
24565 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24566 TREE_PUBLIC (new_fndecl) = 1;
24567 DECL_EXTERNAL (new_fndecl) = 1;
24568 DECL_IS_NOVOPS (new_fndecl) = 1;
24569 TREE_READONLY (new_fndecl) = 1;
24575 /* Returns a decl of a function that implements conversion of an integer vector
24576 into a floating-point vector, or vice-versa. TYPE is the type of the integer
24577 side of the conversion.
24578 Return NULL_TREE if it is not available. */
24581 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
24583 if (TREE_CODE (type) != VECTOR_TYPE)
24589 switch (TYPE_MODE (type))
24592 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
24597 case FIX_TRUNC_EXPR:
24598 switch (TYPE_MODE (type))
24601 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
24611 /* Returns a code for a target-specific builtin that implements
24612 reciprocal of the function, or NULL_TREE if not available. */
24615 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
24616 bool sqrt ATTRIBUTE_UNUSED)
24618 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
24619 && flag_finite_math_only && !flag_trapping_math
24620 && flag_unsafe_math_optimizations))
24624 /* Machine dependent builtins. */
24627 /* Vectorized version of sqrt to rsqrt conversion. */
24628 case IX86_BUILTIN_SQRTPS_NR:
24629 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
24635 /* Normal builtins. */
24638 /* Sqrt to rsqrt conversion. */
24639 case BUILT_IN_SQRTF:
24640 return ix86_builtins[IX86_BUILTIN_RSQRTF];
24647 /* Store OPERAND to the memory after reload is completed. This means
24648 that we can't easily use assign_stack_local. */
24650 ix86_force_to_memory (enum machine_mode mode, rtx operand)
24654 gcc_assert (reload_completed);
24655 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
24657 result = gen_rtx_MEM (mode,
24658 gen_rtx_PLUS (Pmode,
24660 GEN_INT (-RED_ZONE_SIZE)));
24661 emit_move_insn (result, operand);
24663 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
24669 operand = gen_lowpart (DImode, operand);
24673 gen_rtx_SET (VOIDmode,
24674 gen_rtx_MEM (DImode,
24675 gen_rtx_PRE_DEC (DImode,
24676 stack_pointer_rtx)),
24680 gcc_unreachable ();
24682 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24691 split_di (&operand, 1, operands, operands + 1);
24693 gen_rtx_SET (VOIDmode,
24694 gen_rtx_MEM (SImode,
24695 gen_rtx_PRE_DEC (Pmode,
24696 stack_pointer_rtx)),
24699 gen_rtx_SET (VOIDmode,
24700 gen_rtx_MEM (SImode,
24701 gen_rtx_PRE_DEC (Pmode,
24702 stack_pointer_rtx)),
24707 /* Store HImodes as SImodes. */
24708 operand = gen_lowpart (SImode, operand);
24712 gen_rtx_SET (VOIDmode,
24713 gen_rtx_MEM (GET_MODE (operand),
24714 gen_rtx_PRE_DEC (SImode,
24715 stack_pointer_rtx)),
24719 gcc_unreachable ();
24721 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24726 /* Free operand from the memory. */
24728 ix86_free_from_memory (enum machine_mode mode)
24730 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
24734 if (mode == DImode || TARGET_64BIT)
24738 /* Use LEA to deallocate stack space. In peephole2 it will be converted
24739 to pop or add instruction if registers are available. */
24740 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24741 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
24746 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
24747 QImode must go into class Q_REGS.
24748 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
24749 movdf to do mem-to-mem moves through integer regs. */
24751 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
24753 enum machine_mode mode = GET_MODE (x);
24755 /* We're only allowed to return a subclass of CLASS. Many of the
24756 following checks fail for NO_REGS, so eliminate that early. */
24757 if (regclass == NO_REGS)
24760 /* All classes can load zeros. */
24761 if (x == CONST0_RTX (mode))
24764 /* Force constants into memory if we are loading a (nonzero) constant into
24765 an MMX or SSE register. This is because there are no MMX/SSE instructions
24766 to load from a constant. */
24768 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
24771 /* Prefer SSE regs only, if we can use them for math. */
24772 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
24773 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
24775 /* Floating-point constants need more complex checks. */
24776 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
24778 /* General regs can load everything. */
24779 if (reg_class_subset_p (regclass, GENERAL_REGS))
24782 /* Floats can load 0 and 1 plus some others. Note that we eliminated
24783 zero above. We only want to wind up preferring 80387 registers if
24784 we plan on doing computation with them. */
24786 && standard_80387_constant_p (x))
24788 /* Limit class to non-sse. */
24789 if (regclass == FLOAT_SSE_REGS)
24791 if (regclass == FP_TOP_SSE_REGS)
24793 if (regclass == FP_SECOND_SSE_REGS)
24794 return FP_SECOND_REG;
24795 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
24802 /* Generally when we see PLUS here, it's the function invariant
24803 (plus soft-fp const_int). Which can only be computed into general
24805 if (GET_CODE (x) == PLUS)
24806 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
24808 /* QImode constants are easy to load, but non-constant QImode data
24809 must go into Q_REGS. */
24810 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
24812 if (reg_class_subset_p (regclass, Q_REGS))
24814 if (reg_class_subset_p (Q_REGS, regclass))
24822 /* Discourage putting floating-point values in SSE registers unless
24823 SSE math is being used, and likewise for the 387 registers. */
24825 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
24827 enum machine_mode mode = GET_MODE (x);
24829 /* Restrict the output reload class to the register bank that we are doing
24830 math on. If we would like not to return a subset of CLASS, reject this
24831 alternative: if reload cannot do this, it will still use its choice. */
24832 mode = GET_MODE (x);
24833 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
24834 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
24836 if (X87_FLOAT_MODE_P (mode))
24838 if (regclass == FP_TOP_SSE_REGS)
24840 else if (regclass == FP_SECOND_SSE_REGS)
24841 return FP_SECOND_REG;
24843 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
24849 static enum reg_class
24850 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
24851 enum machine_mode mode,
24852 secondary_reload_info *sri ATTRIBUTE_UNUSED)
24854 /* QImode spills from non-QI registers require
24855 intermediate register on 32bit targets. */
24856 if (!in_p && mode == QImode && !TARGET_64BIT
24857 && (rclass == GENERAL_REGS
24858 || rclass == LEGACY_REGS
24859 || rclass == INDEX_REGS))
24868 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
24869 regno = true_regnum (x);
24871 /* Return Q_REGS if the operand is in memory. */
24879 /* If we are copying between general and FP registers, we need a memory
24880 location. The same is true for SSE and MMX registers.
24882 To optimize register_move_cost performance, allow inline variant.
24884 The macro can't work reliably when one of the CLASSES is class containing
24885 registers from multiple units (SSE, MMX, integer). We avoid this by never
24886 combining those units in single alternative in the machine description.
24887 Ensure that this constraint holds to avoid unexpected surprises.
24889 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
24890 enforce these sanity checks. */
24893 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
24894 enum machine_mode mode, int strict)
24896 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
24897 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
24898 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
24899 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
24900 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
24901 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
24903 gcc_assert (!strict);
24907 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
24910 /* ??? This is a lie. We do have moves between mmx/general, and for
24911 mmx/sse2. But by saying we need secondary memory we discourage the
24912 register allocator from using the mmx registers unless needed. */
24913 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
24916 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
24918 /* SSE1 doesn't have any direct moves from other classes. */
24922 /* If the target says that inter-unit moves are more expensive
24923 than moving through memory, then don't generate them. */
24924 if (!TARGET_INTER_UNIT_MOVES)
24927 /* Between SSE and general, we have moves no larger than word size. */
24928 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
24936 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
24937 enum machine_mode mode, int strict)
24939 return inline_secondary_memory_needed (class1, class2, mode, strict);
24942 /* Return true if the registers in CLASS cannot represent the change from
24943 modes FROM to TO. */
24946 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
24947 enum reg_class regclass)
24952 /* x87 registers can't do subreg at all, as all values are reformatted
24953 to extended precision. */
24954 if (MAYBE_FLOAT_CLASS_P (regclass))
24957 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
24959 /* Vector registers do not support QI or HImode loads. If we don't
24960 disallow a change to these modes, reload will assume it's ok to
24961 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
24962 the vec_dupv4hi pattern. */
24963 if (GET_MODE_SIZE (from) < 4)
24966 /* Vector registers do not support subreg with nonzero offsets, which
24967 are otherwise valid for integer registers. Since we can't see
24968 whether we have a nonzero offset from here, prohibit all
24969 nonparadoxical subregs changing size. */
24970 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
24977 /* Return the cost of moving data of mode M between a
24978 register and memory. A value of 2 is the default; this cost is
24979 relative to those in `REGISTER_MOVE_COST'.
24981 This function is used extensively by register_move_cost that is used to
24982 build tables at startup. Make it inline in this case.
24983 When IN is 2, return maximum of in and out move cost.
24985 If moving between registers and memory is more expensive than
24986 between two registers, you should define this macro to express the
24989 Model also increased moving costs of QImode registers in non
24993 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
24997 if (FLOAT_CLASS_P (regclass))
25015 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25016 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25018 if (SSE_CLASS_P (regclass))
25021 switch (GET_MODE_SIZE (mode))
25036 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25037 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25039 if (MMX_CLASS_P (regclass))
25042 switch (GET_MODE_SIZE (mode))
25054 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25055 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25057 switch (GET_MODE_SIZE (mode))
25060 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25063 return ix86_cost->int_store[0];
25064 if (TARGET_PARTIAL_REG_DEPENDENCY
25065 && optimize_function_for_speed_p (cfun))
25066 cost = ix86_cost->movzbl_load;
25068 cost = ix86_cost->int_load[0];
25070 return MAX (cost, ix86_cost->int_store[0]);
25076 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25078 return ix86_cost->movzbl_load;
25080 return ix86_cost->int_store[0] + 4;
25085 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25086 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25088 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25089 if (mode == TFmode)
25092 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25094 cost = ix86_cost->int_load[2];
25096 cost = ix86_cost->int_store[2];
25097 return (cost * (((int) GET_MODE_SIZE (mode)
25098 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25103 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25105 return inline_memory_move_cost (mode, regclass, in);
25109 /* Return the cost of moving data from a register in class CLASS1 to
25110 one in class CLASS2.
25112 It is not required that the cost always equal 2 when FROM is the same as TO;
25113 on some machines it is expensive to move between registers if they are not
25114 general registers. */
25117 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25118 enum reg_class class2)
25120 /* In case we require secondary memory, compute cost of the store followed
25121 by load. In order to avoid bad register allocation choices, we need
25122 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25124 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25128 cost += inline_memory_move_cost (mode, class1, 2);
25129 cost += inline_memory_move_cost (mode, class2, 2);
25131 /* In case of copying from general_purpose_register we may emit multiple
25132 stores followed by single load causing memory size mismatch stall.
25133 Count this as arbitrarily high cost of 20. */
25134 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25137 /* In the case of FP/MMX moves, the registers actually overlap, and we
25138 have to switch modes in order to treat them differently. */
25139 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25140 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25146 /* Moves between SSE/MMX and integer unit are expensive. */
25147 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25148 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25150 /* ??? By keeping returned value relatively high, we limit the number
25151 of moves between integer and MMX/SSE registers for all targets.
25152 Additionally, high value prevents problem with x86_modes_tieable_p(),
25153 where integer modes in MMX/SSE registers are not tieable
25154 because of missing QImode and HImode moves to, from or between
25155 MMX/SSE registers. */
25156 return MAX (8, ix86_cost->mmxsse_to_integer);
25158 if (MAYBE_FLOAT_CLASS_P (class1))
25159 return ix86_cost->fp_move;
25160 if (MAYBE_SSE_CLASS_P (class1))
25161 return ix86_cost->sse_move;
25162 if (MAYBE_MMX_CLASS_P (class1))
25163 return ix86_cost->mmx_move;
25167 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
25170 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25172 /* Flags and only flags can only hold CCmode values. */
25173 if (CC_REGNO_P (regno))
25174 return GET_MODE_CLASS (mode) == MODE_CC;
25175 if (GET_MODE_CLASS (mode) == MODE_CC
25176 || GET_MODE_CLASS (mode) == MODE_RANDOM
25177 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25179 if (FP_REGNO_P (regno))
25180 return VALID_FP_MODE_P (mode);
25181 if (SSE_REGNO_P (regno))
25183 /* We implement the move patterns for all vector modes into and
25184 out of SSE registers, even when no operation instructions
25185 are available. OImode move is available only when AVX is
25187 return ((TARGET_AVX && mode == OImode)
25188 || VALID_AVX256_REG_MODE (mode)
25189 || VALID_SSE_REG_MODE (mode)
25190 || VALID_SSE2_REG_MODE (mode)
25191 || VALID_MMX_REG_MODE (mode)
25192 || VALID_MMX_REG_MODE_3DNOW (mode));
25194 if (MMX_REGNO_P (regno))
25196 /* We implement the move patterns for 3DNOW modes even in MMX mode,
25197 so if the register is available at all, then we can move data of
25198 the given mode into or out of it. */
25199 return (VALID_MMX_REG_MODE (mode)
25200 || VALID_MMX_REG_MODE_3DNOW (mode));
25203 if (mode == QImode)
25205 /* Take care for QImode values - they can be in non-QI regs,
25206 but then they do cause partial register stalls. */
25207 if (regno < 4 || TARGET_64BIT)
25209 if (!TARGET_PARTIAL_REG_STALL)
25211 return reload_in_progress || reload_completed;
25213 /* We handle both integer and floats in the general purpose registers. */
25214 else if (VALID_INT_MODE_P (mode))
25216 else if (VALID_FP_MODE_P (mode))
25218 else if (VALID_DFP_MODE_P (mode))
25220 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
25221 on to use that value in smaller contexts, this can easily force a
25222 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
25223 supporting DImode, allow it. */
25224 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25230 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
25231 tieable integer mode. */
25234 ix86_tieable_integer_mode_p (enum machine_mode mode)
25243 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25246 return TARGET_64BIT;
25253 /* Return true if MODE1 is accessible in a register that can hold MODE2
25254 without copying. That is, all register classes that can hold MODE2
25255 can also hold MODE1. */
25258 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25260 if (mode1 == mode2)
25263 if (ix86_tieable_integer_mode_p (mode1)
25264 && ix86_tieable_integer_mode_p (mode2))
25267 /* MODE2 being XFmode implies fp stack or general regs, which means we
25268 can tie any smaller floating point modes to it. Note that we do not
25269 tie this with TFmode. */
25270 if (mode2 == XFmode)
25271 return mode1 == SFmode || mode1 == DFmode;
25273 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25274 that we can tie it with SFmode. */
25275 if (mode2 == DFmode)
25276 return mode1 == SFmode;
25278 /* If MODE2 is only appropriate for an SSE register, then tie with
25279 any other mode acceptable to SSE registers. */
25280 if (GET_MODE_SIZE (mode2) == 16
25281 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25282 return (GET_MODE_SIZE (mode1) == 16
25283 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25285 /* If MODE2 is appropriate for an MMX register, then tie
25286 with any other mode acceptable to MMX registers. */
25287 if (GET_MODE_SIZE (mode2) == 8
25288 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25289 return (GET_MODE_SIZE (mode1) == 8
25290 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25295 /* Compute a (partial) cost for rtx X. Return true if the complete
25296 cost has been computed, and false if subexpressions should be
25297 scanned. In either case, *TOTAL contains the cost result. */
25300 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
25302 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25303 enum machine_mode mode = GET_MODE (x);
25304 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
25312 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25314 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25316 else if (flag_pic && SYMBOLIC_CONST (x)
25318 || (!GET_CODE (x) != LABEL_REF
25319 && (GET_CODE (x) != SYMBOL_REF
25320 || !SYMBOL_REF_LOCAL_P (x)))))
25327 if (mode == VOIDmode)
25330 switch (standard_80387_constant_p (x))
25335 default: /* Other constants */
25340 /* Start with (MEM (SYMBOL_REF)), since that's where
25341 it'll probably end up. Add a penalty for size. */
25342 *total = (COSTS_N_INSNS (1)
25343 + (flag_pic != 0 && !TARGET_64BIT)
25344 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25350 /* The zero extensions is often completely free on x86_64, so make
25351 it as cheap as possible. */
25352 if (TARGET_64BIT && mode == DImode
25353 && GET_MODE (XEXP (x, 0)) == SImode)
25355 else if (TARGET_ZERO_EXTEND_WITH_AND)
25356 *total = cost->add;
25358 *total = cost->movzx;
25362 *total = cost->movsx;
25366 if (CONST_INT_P (XEXP (x, 1))
25367 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25369 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25372 *total = cost->add;
25375 if ((value == 2 || value == 3)
25376 && cost->lea <= cost->shift_const)
25378 *total = cost->lea;
25388 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
25390 if (CONST_INT_P (XEXP (x, 1)))
25392 if (INTVAL (XEXP (x, 1)) > 32)
25393 *total = cost->shift_const + COSTS_N_INSNS (2);
25395 *total = cost->shift_const * 2;
25399 if (GET_CODE (XEXP (x, 1)) == AND)
25400 *total = cost->shift_var * 2;
25402 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
25407 if (CONST_INT_P (XEXP (x, 1)))
25408 *total = cost->shift_const;
25410 *total = cost->shift_var;
25415 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25417 /* ??? SSE scalar cost should be used here. */
25418 *total = cost->fmul;
25421 else if (X87_FLOAT_MODE_P (mode))
25423 *total = cost->fmul;
25426 else if (FLOAT_MODE_P (mode))
25428 /* ??? SSE vector cost should be used here. */
25429 *total = cost->fmul;
25434 rtx op0 = XEXP (x, 0);
25435 rtx op1 = XEXP (x, 1);
25437 if (CONST_INT_P (XEXP (x, 1)))
25439 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25440 for (nbits = 0; value != 0; value &= value - 1)
25444 /* This is arbitrary. */
25447 /* Compute costs correctly for widening multiplication. */
25448 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
25449 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
25450 == GET_MODE_SIZE (mode))
25452 int is_mulwiden = 0;
25453 enum machine_mode inner_mode = GET_MODE (op0);
25455 if (GET_CODE (op0) == GET_CODE (op1))
25456 is_mulwiden = 1, op1 = XEXP (op1, 0);
25457 else if (CONST_INT_P (op1))
25459 if (GET_CODE (op0) == SIGN_EXTEND)
25460 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
25463 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
25467 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
25470 *total = (cost->mult_init[MODE_INDEX (mode)]
25471 + nbits * cost->mult_bit
25472 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
25481 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25482 /* ??? SSE cost should be used here. */
25483 *total = cost->fdiv;
25484 else if (X87_FLOAT_MODE_P (mode))
25485 *total = cost->fdiv;
25486 else if (FLOAT_MODE_P (mode))
25487 /* ??? SSE vector cost should be used here. */
25488 *total = cost->fdiv;
25490 *total = cost->divide[MODE_INDEX (mode)];
25494 if (GET_MODE_CLASS (mode) == MODE_INT
25495 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
25497 if (GET_CODE (XEXP (x, 0)) == PLUS
25498 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
25499 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
25500 && CONSTANT_P (XEXP (x, 1)))
25502 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
25503 if (val == 2 || val == 4 || val == 8)
25505 *total = cost->lea;
25506 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25507 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
25508 outer_code, speed);
25509 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25513 else if (GET_CODE (XEXP (x, 0)) == MULT
25514 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
25516 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
25517 if (val == 2 || val == 4 || val == 8)
25519 *total = cost->lea;
25520 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25521 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25525 else if (GET_CODE (XEXP (x, 0)) == PLUS)
25527 *total = cost->lea;
25528 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25529 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25530 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25537 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25539 /* ??? SSE cost should be used here. */
25540 *total = cost->fadd;
25543 else if (X87_FLOAT_MODE_P (mode))
25545 *total = cost->fadd;
25548 else if (FLOAT_MODE_P (mode))
25550 /* ??? SSE vector cost should be used here. */
25551 *total = cost->fadd;
25559 if (!TARGET_64BIT && mode == DImode)
25561 *total = (cost->add * 2
25562 + (rtx_cost (XEXP (x, 0), outer_code, speed)
25563 << (GET_MODE (XEXP (x, 0)) != DImode))
25564 + (rtx_cost (XEXP (x, 1), outer_code, speed)
25565 << (GET_MODE (XEXP (x, 1)) != DImode)));
25571 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25573 /* ??? SSE cost should be used here. */
25574 *total = cost->fchs;
25577 else if (X87_FLOAT_MODE_P (mode))
25579 *total = cost->fchs;
25582 else if (FLOAT_MODE_P (mode))
25584 /* ??? SSE vector cost should be used here. */
25585 *total = cost->fchs;
25591 if (!TARGET_64BIT && mode == DImode)
25592 *total = cost->add * 2;
25594 *total = cost->add;
25598 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
25599 && XEXP (XEXP (x, 0), 1) == const1_rtx
25600 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
25601 && XEXP (x, 1) == const0_rtx)
25603 /* This kind of construct is implemented using test[bwl].
25604 Treat it as if we had an AND. */
25605 *total = (cost->add
25606 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
25607 + rtx_cost (const1_rtx, outer_code, speed));
25613 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
25618 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25619 /* ??? SSE cost should be used here. */
25620 *total = cost->fabs;
25621 else if (X87_FLOAT_MODE_P (mode))
25622 *total = cost->fabs;
25623 else if (FLOAT_MODE_P (mode))
25624 /* ??? SSE vector cost should be used here. */
25625 *total = cost->fabs;
25629 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25630 /* ??? SSE cost should be used here. */
25631 *total = cost->fsqrt;
25632 else if (X87_FLOAT_MODE_P (mode))
25633 *total = cost->fsqrt;
25634 else if (FLOAT_MODE_P (mode))
25635 /* ??? SSE vector cost should be used here. */
25636 *total = cost->fsqrt;
25640 if (XINT (x, 1) == UNSPEC_TP)
25651 static int current_machopic_label_num;
25653 /* Given a symbol name and its associated stub, write out the
25654 definition of the stub. */
25657 machopic_output_stub (FILE *file, const char *symb, const char *stub)
25659 unsigned int length;
25660 char *binder_name, *symbol_name, lazy_ptr_name[32];
25661 int label = ++current_machopic_label_num;
25663 /* For 64-bit we shouldn't get here. */
25664 gcc_assert (!TARGET_64BIT);
25666 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
25667 symb = (*targetm.strip_name_encoding) (symb);
25669 length = strlen (stub);
25670 binder_name = XALLOCAVEC (char, length + 32);
25671 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
25673 length = strlen (symb);
25674 symbol_name = XALLOCAVEC (char, length + 32);
25675 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
25677 sprintf (lazy_ptr_name, "L%d$lz", label);
25680 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
25682 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
25684 fprintf (file, "%s:\n", stub);
25685 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25689 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
25690 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
25691 fprintf (file, "\tjmp\t*%%edx\n");
25694 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
25696 fprintf (file, "%s:\n", binder_name);
25700 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
25701 fprintf (file, "\tpushl\t%%eax\n");
25704 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
25706 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
25708 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
25709 fprintf (file, "%s:\n", lazy_ptr_name);
25710 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25711 fprintf (file, "\t.long %s\n", binder_name);
25715 darwin_x86_file_end (void)
25717 darwin_file_end ();
25720 #endif /* TARGET_MACHO */
25722 /* Order the registers for register allocator. */
25725 x86_order_regs_for_local_alloc (void)
25730 /* First allocate the local general purpose registers. */
25731 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25732 if (GENERAL_REGNO_P (i) && call_used_regs[i])
25733 reg_alloc_order [pos++] = i;
25735 /* Global general purpose registers. */
25736 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25737 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
25738 reg_alloc_order [pos++] = i;
25740 /* x87 registers come first in case we are doing FP math
25742 if (!TARGET_SSE_MATH)
25743 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25744 reg_alloc_order [pos++] = i;
25746 /* SSE registers. */
25747 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
25748 reg_alloc_order [pos++] = i;
25749 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
25750 reg_alloc_order [pos++] = i;
25752 /* x87 registers. */
25753 if (TARGET_SSE_MATH)
25754 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25755 reg_alloc_order [pos++] = i;
25757 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
25758 reg_alloc_order [pos++] = i;
25760 /* Initialize the rest of array as we do not allocate some registers
25762 while (pos < FIRST_PSEUDO_REGISTER)
25763 reg_alloc_order [pos++] = 0;
25766 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
25767 struct attribute_spec.handler. */
25769 ix86_handle_abi_attribute (tree *node, tree name,
25770 tree args ATTRIBUTE_UNUSED,
25771 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25773 if (TREE_CODE (*node) != FUNCTION_TYPE
25774 && TREE_CODE (*node) != METHOD_TYPE
25775 && TREE_CODE (*node) != FIELD_DECL
25776 && TREE_CODE (*node) != TYPE_DECL)
25778 warning (OPT_Wattributes, "%qs attribute only applies to functions",
25779 IDENTIFIER_POINTER (name));
25780 *no_add_attrs = true;
25785 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
25786 IDENTIFIER_POINTER (name));
25787 *no_add_attrs = true;
25791 /* Can combine regparm with all attributes but fastcall. */
25792 if (is_attribute_p ("ms_abi", name))
25794 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
25796 error ("ms_abi and sysv_abi attributes are not compatible");
25801 else if (is_attribute_p ("sysv_abi", name))
25803 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
25805 error ("ms_abi and sysv_abi attributes are not compatible");
25814 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
25815 struct attribute_spec.handler. */
25817 ix86_handle_struct_attribute (tree *node, tree name,
25818 tree args ATTRIBUTE_UNUSED,
25819 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25822 if (DECL_P (*node))
25824 if (TREE_CODE (*node) == TYPE_DECL)
25825 type = &TREE_TYPE (*node);
25830 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
25831 || TREE_CODE (*type) == UNION_TYPE)))
25833 warning (OPT_Wattributes, "%qs attribute ignored",
25834 IDENTIFIER_POINTER (name));
25835 *no_add_attrs = true;
25838 else if ((is_attribute_p ("ms_struct", name)
25839 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
25840 || ((is_attribute_p ("gcc_struct", name)
25841 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
25843 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
25844 IDENTIFIER_POINTER (name));
25845 *no_add_attrs = true;
25852 ix86_ms_bitfield_layout_p (const_tree record_type)
25854 return (TARGET_MS_BITFIELD_LAYOUT &&
25855 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
25856 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
25859 /* Returns an expression indicating where the this parameter is
25860 located on entry to the FUNCTION. */
25863 x86_this_parameter (tree function)
25865 tree type = TREE_TYPE (function);
25866 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
25871 const int *parm_regs;
25873 if (ix86_function_type_abi (type) == MS_ABI)
25874 parm_regs = x86_64_ms_abi_int_parameter_registers;
25876 parm_regs = x86_64_int_parameter_registers;
25877 return gen_rtx_REG (DImode, parm_regs[aggr]);
25880 nregs = ix86_function_regparm (type, function);
25882 if (nregs > 0 && !stdarg_p (type))
25886 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
25887 regno = aggr ? DX_REG : CX_REG;
25895 return gen_rtx_MEM (SImode,
25896 plus_constant (stack_pointer_rtx, 4));
25899 return gen_rtx_REG (SImode, regno);
25902 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
25905 /* Determine whether x86_output_mi_thunk can succeed. */
25908 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
25909 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
25910 HOST_WIDE_INT vcall_offset, const_tree function)
25912 /* 64-bit can handle anything. */
25916 /* For 32-bit, everything's fine if we have one free register. */
25917 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
25920 /* Need a free register for vcall_offset. */
25924 /* Need a free register for GOT references. */
25925 if (flag_pic && !(*targetm.binds_local_p) (function))
25928 /* Otherwise ok. */
25932 /* Output the assembler code for a thunk function. THUNK_DECL is the
25933 declaration for the thunk function itself, FUNCTION is the decl for
25934 the target function. DELTA is an immediate constant offset to be
25935 added to THIS. If VCALL_OFFSET is nonzero, the word at
25936 *(*this + vcall_offset) should be added to THIS. */
25939 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
25940 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
25941 HOST_WIDE_INT vcall_offset, tree function)
25944 rtx this_param = x86_this_parameter (function);
25947 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
25948 pull it in now and let DELTA benefit. */
25949 if (REG_P (this_param))
25950 this_reg = this_param;
25951 else if (vcall_offset)
25953 /* Put the this parameter into %eax. */
25954 xops[0] = this_param;
25955 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
25956 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
25959 this_reg = NULL_RTX;
25961 /* Adjust the this parameter by a fixed constant. */
25964 xops[0] = GEN_INT (delta);
25965 xops[1] = this_reg ? this_reg : this_param;
25968 if (!x86_64_general_operand (xops[0], DImode))
25970 tmp = gen_rtx_REG (DImode, R10_REG);
25972 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
25974 xops[1] = this_param;
25976 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
25979 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
25982 /* Adjust the this parameter by a value stored in the vtable. */
25986 tmp = gen_rtx_REG (DImode, R10_REG);
25989 int tmp_regno = CX_REG;
25990 if (lookup_attribute ("fastcall",
25991 TYPE_ATTRIBUTES (TREE_TYPE (function))))
25992 tmp_regno = AX_REG;
25993 tmp = gen_rtx_REG (SImode, tmp_regno);
25996 xops[0] = gen_rtx_MEM (Pmode, this_reg);
25998 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26000 /* Adjust the this parameter. */
26001 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26002 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26004 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26005 xops[0] = GEN_INT (vcall_offset);
26007 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26008 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26010 xops[1] = this_reg;
26011 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26014 /* If necessary, drop THIS back to its stack slot. */
26015 if (this_reg && this_reg != this_param)
26017 xops[0] = this_reg;
26018 xops[1] = this_param;
26019 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26022 xops[0] = XEXP (DECL_RTL (function), 0);
26025 if (!flag_pic || (*targetm.binds_local_p) (function))
26026 output_asm_insn ("jmp\t%P0", xops);
26027 /* All thunks should be in the same object as their target,
26028 and thus binds_local_p should be true. */
26029 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26030 gcc_unreachable ();
26033 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26034 tmp = gen_rtx_CONST (Pmode, tmp);
26035 tmp = gen_rtx_MEM (QImode, tmp);
26037 output_asm_insn ("jmp\t%A0", xops);
26042 if (!flag_pic || (*targetm.binds_local_p) (function))
26043 output_asm_insn ("jmp\t%P0", xops);
26048 rtx sym_ref = XEXP (DECL_RTL (function), 0);
26049 tmp = (gen_rtx_SYMBOL_REF
26051 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26052 tmp = gen_rtx_MEM (QImode, tmp);
26054 output_asm_insn ("jmp\t%0", xops);
26057 #endif /* TARGET_MACHO */
26059 tmp = gen_rtx_REG (SImode, CX_REG);
26060 output_set_got (tmp, NULL_RTX);
26063 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26064 output_asm_insn ("jmp\t{*}%1", xops);
26070 x86_file_start (void)
26072 default_file_start ();
26074 darwin_file_start ();
26076 if (X86_FILE_START_VERSION_DIRECTIVE)
26077 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26078 if (X86_FILE_START_FLTUSED)
26079 fputs ("\t.global\t__fltused\n", asm_out_file);
26080 if (ix86_asm_dialect == ASM_INTEL)
26081 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26085 x86_field_alignment (tree field, int computed)
26087 enum machine_mode mode;
26088 tree type = TREE_TYPE (field);
26090 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26092 mode = TYPE_MODE (strip_array_types (type));
26093 if (mode == DFmode || mode == DCmode
26094 || GET_MODE_CLASS (mode) == MODE_INT
26095 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26096 return MIN (32, computed);
26100 /* Output assembler code to FILE to increment profiler label # LABELNO
26101 for profiling a function entry. */
26103 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26107 #ifndef NO_PROFILE_COUNTERS
26108 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
26111 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26112 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26114 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26118 #ifndef NO_PROFILE_COUNTERS
26119 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26120 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26122 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26126 #ifndef NO_PROFILE_COUNTERS
26127 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26128 PROFILE_COUNT_REGISTER);
26130 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26134 /* We don't have exact information about the insn sizes, but we may assume
26135 quite safely that we are informed about all 1 byte insns and memory
26136 address sizes. This is enough to eliminate unnecessary padding in
26140 min_insn_size (rtx insn)
26144 if (!INSN_P (insn) || !active_insn_p (insn))
26147 /* Discard alignments we've emit and jump instructions. */
26148 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26149 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26152 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26153 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26156 /* Important case - calls are always 5 bytes.
26157 It is common to have many calls in the row. */
26159 && symbolic_reference_mentioned_p (PATTERN (insn))
26160 && !SIBLING_CALL_P (insn))
26162 if (get_attr_length (insn) <= 1)
26165 /* For normal instructions we may rely on the sizes of addresses
26166 and the presence of symbol to require 4 bytes of encoding.
26167 This is not the case for jumps where references are PC relative. */
26168 if (!JUMP_P (insn))
26170 l = get_attr_length_address (insn);
26171 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26180 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26184 ix86_avoid_jump_misspredicts (void)
26186 rtx insn, start = get_insns ();
26187 int nbytes = 0, njumps = 0;
26190 /* Look for all minimal intervals of instructions containing 4 jumps.
26191 The intervals are bounded by START and INSN. NBYTES is the total
26192 size of instructions in the interval including INSN and not including
26193 START. When the NBYTES is smaller than 16 bytes, it is possible
26194 that the end of START and INSN ends up in the same 16byte page.
26196 The smallest offset in the page INSN can start is the case where START
26197 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
26198 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
26200 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26203 nbytes += min_insn_size (insn);
26205 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
26206 INSN_UID (insn), min_insn_size (insn));
26208 && GET_CODE (PATTERN (insn)) != ADDR_VEC
26209 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26217 start = NEXT_INSN (start);
26218 if ((JUMP_P (start)
26219 && GET_CODE (PATTERN (start)) != ADDR_VEC
26220 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26222 njumps--, isjump = 1;
26225 nbytes -= min_insn_size (start);
26227 gcc_assert (njumps >= 0);
26229 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26230 INSN_UID (start), INSN_UID (insn), nbytes);
26232 if (njumps == 3 && isjump && nbytes < 16)
26234 int padsize = 15 - nbytes + min_insn_size (insn);
26237 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26238 INSN_UID (insn), padsize);
26239 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
26244 /* AMD Athlon works faster
26245 when RET is not destination of conditional jump or directly preceded
26246 by other jump instruction. We avoid the penalty by inserting NOP just
26247 before the RET instructions in such cases. */
26249 ix86_pad_returns (void)
26254 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26256 basic_block bb = e->src;
26257 rtx ret = BB_END (bb);
26259 bool replace = false;
26261 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26262 || optimize_bb_for_size_p (bb))
26264 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26265 if (active_insn_p (prev) || LABEL_P (prev))
26267 if (prev && LABEL_P (prev))
26272 FOR_EACH_EDGE (e, ei, bb->preds)
26273 if (EDGE_FREQUENCY (e) && e->src->index >= 0
26274 && !(e->flags & EDGE_FALLTHRU))
26279 prev = prev_active_insn (ret);
26281 && ((JUMP_P (prev) && any_condjump_p (prev))
26284 /* Empty functions get branch mispredict even when the jump destination
26285 is not visible to us. */
26286 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26291 emit_insn_before (gen_return_internal_long (), ret);
26297 /* Implement machine specific optimizations. We implement padding of returns
26298 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
26302 if (TARGET_PAD_RETURNS && optimize
26303 && optimize_function_for_speed_p (cfun))
26304 ix86_pad_returns ();
26305 if (TARGET_FOUR_JUMP_LIMIT && optimize
26306 && optimize_function_for_speed_p (cfun))
26307 ix86_avoid_jump_misspredicts ();
26310 /* Return nonzero when QImode register that must be represented via REX prefix
26313 x86_extended_QIreg_mentioned_p (rtx insn)
26316 extract_insn_cached (insn);
26317 for (i = 0; i < recog_data.n_operands; i++)
26318 if (REG_P (recog_data.operand[i])
26319 && REGNO (recog_data.operand[i]) >= 4)
26324 /* Return nonzero when P points to register encoded via REX prefix.
26325 Called via for_each_rtx. */
26327 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26329 unsigned int regno;
26332 regno = REGNO (*p);
26333 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26336 /* Return true when INSN mentions register that must be encoded using REX
26339 x86_extended_reg_mentioned_p (rtx insn)
26341 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26342 extended_reg_mentioned_1, NULL);
26345 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
26346 optabs would emit if we didn't have TFmode patterns. */
26349 x86_emit_floatuns (rtx operands[2])
26351 rtx neglab, donelab, i0, i1, f0, in, out;
26352 enum machine_mode mode, inmode;
26354 inmode = GET_MODE (operands[1]);
26355 gcc_assert (inmode == SImode || inmode == DImode);
26358 in = force_reg (inmode, operands[1]);
26359 mode = GET_MODE (out);
26360 neglab = gen_label_rtx ();
26361 donelab = gen_label_rtx ();
26362 f0 = gen_reg_rtx (mode);
26364 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26366 expand_float (out, in, 0);
26368 emit_jump_insn (gen_jump (donelab));
26371 emit_label (neglab);
26373 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26375 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26377 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26379 expand_float (f0, i0, 0);
26381 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
26383 emit_label (donelab);
26386 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26387 with all elements equal to VAR. Return true if successful. */
26390 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
26391 rtx target, rtx val)
26393 enum machine_mode hmode, smode, wsmode, wvmode;
26408 val = force_reg (GET_MODE_INNER (mode), val);
26409 x = gen_rtx_VEC_DUPLICATE (mode, val);
26410 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26416 if (TARGET_SSE || TARGET_3DNOW_A)
26418 val = gen_lowpart (SImode, val);
26419 x = gen_rtx_TRUNCATE (HImode, val);
26420 x = gen_rtx_VEC_DUPLICATE (mode, x);
26421 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26443 /* Extend HImode to SImode using a paradoxical SUBREG. */
26444 tmp1 = gen_reg_rtx (SImode);
26445 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26446 /* Insert the SImode value as low element of V4SImode vector. */
26447 tmp2 = gen_reg_rtx (V4SImode);
26448 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26449 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26450 CONST0_RTX (V4SImode),
26452 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26453 /* Cast the V4SImode vector back to a V8HImode vector. */
26454 tmp1 = gen_reg_rtx (V8HImode);
26455 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
26456 /* Duplicate the low short through the whole low SImode word. */
26457 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
26458 /* Cast the V8HImode vector back to a V4SImode vector. */
26459 tmp2 = gen_reg_rtx (V4SImode);
26460 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26461 /* Replicate the low element of the V4SImode vector. */
26462 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26463 /* Cast the V2SImode back to V8HImode, and store in target. */
26464 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
26475 /* Extend QImode to SImode using a paradoxical SUBREG. */
26476 tmp1 = gen_reg_rtx (SImode);
26477 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26478 /* Insert the SImode value as low element of V4SImode vector. */
26479 tmp2 = gen_reg_rtx (V4SImode);
26480 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26481 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26482 CONST0_RTX (V4SImode),
26484 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26485 /* Cast the V4SImode vector back to a V16QImode vector. */
26486 tmp1 = gen_reg_rtx (V16QImode);
26487 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
26488 /* Duplicate the low byte through the whole low SImode word. */
26489 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26490 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26491 /* Cast the V16QImode vector back to a V4SImode vector. */
26492 tmp2 = gen_reg_rtx (V4SImode);
26493 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26494 /* Replicate the low element of the V4SImode vector. */
26495 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26496 /* Cast the V2SImode back to V16QImode, and store in target. */
26497 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
26505 /* Replicate the value once into the next wider mode and recurse. */
26506 val = convert_modes (wsmode, smode, val, true);
26507 x = expand_simple_binop (wsmode, ASHIFT, val,
26508 GEN_INT (GET_MODE_BITSIZE (smode)),
26509 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26510 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
26512 x = gen_reg_rtx (wvmode);
26513 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
26514 gcc_unreachable ();
26515 emit_move_insn (target, gen_lowpart (mode, x));
26538 rtx tmp = gen_reg_rtx (hmode);
26539 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
26540 emit_insn (gen_rtx_SET (VOIDmode, target,
26541 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
26550 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26551 whose ONE_VAR element is VAR, and other elements are zero. Return true
26555 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
26556 rtx target, rtx var, int one_var)
26558 enum machine_mode vsimode;
26561 bool use_vector_set = false;
26566 /* For SSE4.1, we normally use vector set. But if the second
26567 element is zero and inter-unit moves are OK, we use movq
26569 use_vector_set = (TARGET_64BIT
26571 && !(TARGET_INTER_UNIT_MOVES
26577 use_vector_set = TARGET_SSE4_1;
26580 use_vector_set = TARGET_SSE2;
26583 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
26591 use_vector_set = TARGET_AVX;
26597 if (use_vector_set)
26599 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
26600 var = force_reg (GET_MODE_INNER (mode), var);
26601 ix86_expand_vector_set (mmx_ok, target, var, one_var);
26617 var = force_reg (GET_MODE_INNER (mode), var);
26618 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
26619 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26624 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
26625 new_target = gen_reg_rtx (mode);
26627 new_target = target;
26628 var = force_reg (GET_MODE_INNER (mode), var);
26629 x = gen_rtx_VEC_DUPLICATE (mode, var);
26630 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
26631 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
26634 /* We need to shuffle the value to the correct position, so
26635 create a new pseudo to store the intermediate result. */
26637 /* With SSE2, we can use the integer shuffle insns. */
26638 if (mode != V4SFmode && TARGET_SSE2)
26640 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
26642 GEN_INT (one_var == 1 ? 0 : 1),
26643 GEN_INT (one_var == 2 ? 0 : 1),
26644 GEN_INT (one_var == 3 ? 0 : 1)));
26645 if (target != new_target)
26646 emit_move_insn (target, new_target);
26650 /* Otherwise convert the intermediate result to V4SFmode and
26651 use the SSE1 shuffle instructions. */
26652 if (mode != V4SFmode)
26654 tmp = gen_reg_rtx (V4SFmode);
26655 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
26660 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
26662 GEN_INT (one_var == 1 ? 0 : 1),
26663 GEN_INT (one_var == 2 ? 0+4 : 1+4),
26664 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
26666 if (mode != V4SFmode)
26667 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
26668 else if (tmp != target)
26669 emit_move_insn (target, tmp);
26671 else if (target != new_target)
26672 emit_move_insn (target, new_target);
26677 vsimode = V4SImode;
26683 vsimode = V2SImode;
26689 /* Zero extend the variable element to SImode and recurse. */
26690 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
26692 x = gen_reg_rtx (vsimode);
26693 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
26695 gcc_unreachable ();
26697 emit_move_insn (target, gen_lowpart (mode, x));
26705 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26706 consisting of the values in VALS. It is known that all elements
26707 except ONE_VAR are constants. Return true if successful. */
26710 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
26711 rtx target, rtx vals, int one_var)
26713 rtx var = XVECEXP (vals, 0, one_var);
26714 enum machine_mode wmode;
26717 const_vec = copy_rtx (vals);
26718 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
26719 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
26727 /* For the two element vectors, it's just as easy to use
26728 the general case. */
26752 /* There's no way to set one QImode entry easily. Combine
26753 the variable value with its adjacent constant value, and
26754 promote to an HImode set. */
26755 x = XVECEXP (vals, 0, one_var ^ 1);
26758 var = convert_modes (HImode, QImode, var, true);
26759 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
26760 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26761 x = GEN_INT (INTVAL (x) & 0xff);
26765 var = convert_modes (HImode, QImode, var, true);
26766 x = gen_int_mode (INTVAL (x) << 8, HImode);
26768 if (x != const0_rtx)
26769 var = expand_simple_binop (HImode, IOR, var, x, var,
26770 1, OPTAB_LIB_WIDEN);
26772 x = gen_reg_rtx (wmode);
26773 emit_move_insn (x, gen_lowpart (wmode, const_vec));
26774 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
26776 emit_move_insn (target, gen_lowpart (mode, x));
26783 emit_move_insn (target, const_vec);
26784 ix86_expand_vector_set (mmx_ok, target, var, one_var);
26788 /* A subroutine of ix86_expand_vector_init_general. Use vector
26789 concatenate to handle the most general case: all values variable,
26790 and none identical. */
26793 ix86_expand_vector_init_concat (enum machine_mode mode,
26794 rtx target, rtx *ops, int n)
26796 enum machine_mode cmode, hmode = VOIDmode;
26797 rtx first[8], second[4];
26837 gcc_unreachable ();
26840 if (!register_operand (ops[1], cmode))
26841 ops[1] = force_reg (cmode, ops[1]);
26842 if (!register_operand (ops[0], cmode))
26843 ops[0] = force_reg (cmode, ops[0]);
26844 emit_insn (gen_rtx_SET (VOIDmode, target,
26845 gen_rtx_VEC_CONCAT (mode, ops[0],
26865 gcc_unreachable ();
26881 gcc_unreachable ();
26886 /* FIXME: We process inputs backward to help RA. PR 36222. */
26889 for (; i > 0; i -= 2, j--)
26891 first[j] = gen_reg_rtx (cmode);
26892 v = gen_rtvec (2, ops[i - 1], ops[i]);
26893 ix86_expand_vector_init (false, first[j],
26894 gen_rtx_PARALLEL (cmode, v));
26900 gcc_assert (hmode != VOIDmode);
26901 for (i = j = 0; i < n; i += 2, j++)
26903 second[j] = gen_reg_rtx (hmode);
26904 ix86_expand_vector_init_concat (hmode, second [j],
26908 ix86_expand_vector_init_concat (mode, target, second, n);
26911 ix86_expand_vector_init_concat (mode, target, first, n);
26915 gcc_unreachable ();
26919 /* A subroutine of ix86_expand_vector_init_general. Use vector
26920 interleave to handle the most general case: all values variable,
26921 and none identical. */
26924 ix86_expand_vector_init_interleave (enum machine_mode mode,
26925 rtx target, rtx *ops, int n)
26927 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
26930 rtx (*gen_load_even) (rtx, rtx, rtx);
26931 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
26932 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
26937 gen_load_even = gen_vec_setv8hi;
26938 gen_interleave_first_low = gen_vec_interleave_lowv4si;
26939 gen_interleave_second_low = gen_vec_interleave_lowv2di;
26940 inner_mode = HImode;
26941 first_imode = V4SImode;
26942 second_imode = V2DImode;
26943 third_imode = VOIDmode;
26946 gen_load_even = gen_vec_setv16qi;
26947 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
26948 gen_interleave_second_low = gen_vec_interleave_lowv4si;
26949 inner_mode = QImode;
26950 first_imode = V8HImode;
26951 second_imode = V4SImode;
26952 third_imode = V2DImode;
26955 gcc_unreachable ();
26958 for (i = 0; i < n; i++)
26960 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
26961 op0 = gen_reg_rtx (SImode);
26962 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
26964 /* Insert the SImode value as low element of V4SImode vector. */
26965 op1 = gen_reg_rtx (V4SImode);
26966 op0 = gen_rtx_VEC_MERGE (V4SImode,
26967 gen_rtx_VEC_DUPLICATE (V4SImode,
26969 CONST0_RTX (V4SImode),
26971 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
26973 /* Cast the V4SImode vector back to a vector in orignal mode. */
26974 op0 = gen_reg_rtx (mode);
26975 emit_move_insn (op0, gen_lowpart (mode, op1));
26977 /* Load even elements into the second positon. */
26978 emit_insn ((*gen_load_even) (op0,
26979 force_reg (inner_mode,
26983 /* Cast vector to FIRST_IMODE vector. */
26984 ops[i] = gen_reg_rtx (first_imode);
26985 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
26988 /* Interleave low FIRST_IMODE vectors. */
26989 for (i = j = 0; i < n; i += 2, j++)
26991 op0 = gen_reg_rtx (first_imode);
26992 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
26994 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
26995 ops[j] = gen_reg_rtx (second_imode);
26996 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
26999 /* Interleave low SECOND_IMODE vectors. */
27000 switch (second_imode)
27003 for (i = j = 0; i < n / 2; i += 2, j++)
27005 op0 = gen_reg_rtx (second_imode);
27006 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27009 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27011 ops[j] = gen_reg_rtx (third_imode);
27012 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27014 second_imode = V2DImode;
27015 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27019 op0 = gen_reg_rtx (second_imode);
27020 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27023 /* Cast the SECOND_IMODE vector back to a vector on original
27025 emit_insn (gen_rtx_SET (VOIDmode, target,
27026 gen_lowpart (mode, op0)));
27030 gcc_unreachable ();
27034 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
27035 all values variable, and none identical. */
27038 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27039 rtx target, rtx vals)
27041 rtx ops[32], op0, op1;
27042 enum machine_mode half_mode = VOIDmode;
27049 if (!mmx_ok && !TARGET_SSE)
27061 n = GET_MODE_NUNITS (mode);
27062 for (i = 0; i < n; i++)
27063 ops[i] = XVECEXP (vals, 0, i);
27064 ix86_expand_vector_init_concat (mode, target, ops, n);
27068 half_mode = V16QImode;
27072 half_mode = V8HImode;
27076 n = GET_MODE_NUNITS (mode);
27077 for (i = 0; i < n; i++)
27078 ops[i] = XVECEXP (vals, 0, i);
27079 op0 = gen_reg_rtx (half_mode);
27080 op1 = gen_reg_rtx (half_mode);
27081 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27083 ix86_expand_vector_init_interleave (half_mode, op1,
27084 &ops [n >> 1], n >> 2);
27085 emit_insn (gen_rtx_SET (VOIDmode, target,
27086 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27090 if (!TARGET_SSE4_1)
27098 /* Don't use ix86_expand_vector_init_interleave if we can't
27099 move from GPR to SSE register directly. */
27100 if (!TARGET_INTER_UNIT_MOVES)
27103 n = GET_MODE_NUNITS (mode);
27104 for (i = 0; i < n; i++)
27105 ops[i] = XVECEXP (vals, 0, i);
27106 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27114 gcc_unreachable ();
27118 int i, j, n_elts, n_words, n_elt_per_word;
27119 enum machine_mode inner_mode;
27120 rtx words[4], shift;
27122 inner_mode = GET_MODE_INNER (mode);
27123 n_elts = GET_MODE_NUNITS (mode);
27124 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27125 n_elt_per_word = n_elts / n_words;
27126 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27128 for (i = 0; i < n_words; ++i)
27130 rtx word = NULL_RTX;
27132 for (j = 0; j < n_elt_per_word; ++j)
27134 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27135 elt = convert_modes (word_mode, inner_mode, elt, true);
27141 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27142 word, 1, OPTAB_LIB_WIDEN);
27143 word = expand_simple_binop (word_mode, IOR, word, elt,
27144 word, 1, OPTAB_LIB_WIDEN);
27152 emit_move_insn (target, gen_lowpart (mode, words[0]));
27153 else if (n_words == 2)
27155 rtx tmp = gen_reg_rtx (mode);
27156 emit_clobber (tmp);
27157 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27158 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27159 emit_move_insn (target, tmp);
27161 else if (n_words == 4)
27163 rtx tmp = gen_reg_rtx (V4SImode);
27164 gcc_assert (word_mode == SImode);
27165 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27166 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27167 emit_move_insn (target, gen_lowpart (mode, tmp));
27170 gcc_unreachable ();
27174 /* Initialize vector TARGET via VALS. Suppress the use of MMX
27175 instructions unless MMX_OK is true. */
27178 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27180 enum machine_mode mode = GET_MODE (target);
27181 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27182 int n_elts = GET_MODE_NUNITS (mode);
27183 int n_var = 0, one_var = -1;
27184 bool all_same = true, all_const_zero = true;
27188 for (i = 0; i < n_elts; ++i)
27190 x = XVECEXP (vals, 0, i);
27191 if (!(CONST_INT_P (x)
27192 || GET_CODE (x) == CONST_DOUBLE
27193 || GET_CODE (x) == CONST_FIXED))
27194 n_var++, one_var = i;
27195 else if (x != CONST0_RTX (inner_mode))
27196 all_const_zero = false;
27197 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27201 /* Constants are best loaded from the constant pool. */
27204 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27208 /* If all values are identical, broadcast the value. */
27210 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27211 XVECEXP (vals, 0, 0)))
27214 /* Values where only one field is non-constant are best loaded from
27215 the pool and overwritten via move later. */
27219 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27220 XVECEXP (vals, 0, one_var),
27224 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27228 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27232 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27234 enum machine_mode mode = GET_MODE (target);
27235 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27236 enum machine_mode half_mode;
27237 bool use_vec_merge = false;
27239 static rtx (*gen_extract[6][2]) (rtx, rtx)
27241 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27242 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27243 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27244 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27245 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27246 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27248 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27250 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27251 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27252 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27253 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27254 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27255 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27265 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27266 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27268 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27270 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27271 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27277 use_vec_merge = TARGET_SSE4_1;
27285 /* For the two element vectors, we implement a VEC_CONCAT with
27286 the extraction of the other element. */
27288 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27289 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27292 op0 = val, op1 = tmp;
27294 op0 = tmp, op1 = val;
27296 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27297 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27302 use_vec_merge = TARGET_SSE4_1;
27309 use_vec_merge = true;
27313 /* tmp = target = A B C D */
27314 tmp = copy_to_reg (target);
27315 /* target = A A B B */
27316 emit_insn (gen_sse_unpcklps (target, target, target));
27317 /* target = X A B B */
27318 ix86_expand_vector_set (false, target, val, 0);
27319 /* target = A X C D */
27320 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27321 GEN_INT (1), GEN_INT (0),
27322 GEN_INT (2+4), GEN_INT (3+4)));
27326 /* tmp = target = A B C D */
27327 tmp = copy_to_reg (target);
27328 /* tmp = X B C D */
27329 ix86_expand_vector_set (false, tmp, val, 0);
27330 /* target = A B X D */
27331 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27332 GEN_INT (0), GEN_INT (1),
27333 GEN_INT (0+4), GEN_INT (3+4)));
27337 /* tmp = target = A B C D */
27338 tmp = copy_to_reg (target);
27339 /* tmp = X B C D */
27340 ix86_expand_vector_set (false, tmp, val, 0);
27341 /* target = A B X D */
27342 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27343 GEN_INT (0), GEN_INT (1),
27344 GEN_INT (2+4), GEN_INT (0+4)));
27348 gcc_unreachable ();
27353 use_vec_merge = TARGET_SSE4_1;
27357 /* Element 0 handled by vec_merge below. */
27360 use_vec_merge = true;
27366 /* With SSE2, use integer shuffles to swap element 0 and ELT,
27367 store into element 0, then shuffle them back. */
27371 order[0] = GEN_INT (elt);
27372 order[1] = const1_rtx;
27373 order[2] = const2_rtx;
27374 order[3] = GEN_INT (3);
27375 order[elt] = const0_rtx;
27377 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27378 order[1], order[2], order[3]));
27380 ix86_expand_vector_set (false, target, val, 0);
27382 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27383 order[1], order[2], order[3]));
27387 /* For SSE1, we have to reuse the V4SF code. */
27388 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
27389 gen_lowpart (SFmode, val), elt);
27394 use_vec_merge = TARGET_SSE2;
27397 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27401 use_vec_merge = TARGET_SSE4_1;
27408 half_mode = V16QImode;
27414 half_mode = V8HImode;
27420 half_mode = V4SImode;
27426 half_mode = V2DImode;
27432 half_mode = V4SFmode;
27438 half_mode = V2DFmode;
27444 /* Compute offset. */
27448 gcc_assert (i <= 1);
27450 /* Extract the half. */
27451 tmp = gen_reg_rtx (half_mode);
27452 emit_insn ((*gen_extract[j][i]) (tmp, target));
27454 /* Put val in tmp at elt. */
27455 ix86_expand_vector_set (false, tmp, val, elt);
27458 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
27467 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
27468 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
27469 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27473 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27475 emit_move_insn (mem, target);
27477 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27478 emit_move_insn (tmp, val);
27480 emit_move_insn (target, mem);
27485 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
27487 enum machine_mode mode = GET_MODE (vec);
27488 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27489 bool use_vec_extr = false;
27502 use_vec_extr = true;
27506 use_vec_extr = TARGET_SSE4_1;
27518 tmp = gen_reg_rtx (mode);
27519 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
27520 GEN_INT (elt), GEN_INT (elt),
27521 GEN_INT (elt+4), GEN_INT (elt+4)));
27525 tmp = gen_reg_rtx (mode);
27526 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
27530 gcc_unreachable ();
27533 use_vec_extr = true;
27538 use_vec_extr = TARGET_SSE4_1;
27552 tmp = gen_reg_rtx (mode);
27553 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
27554 GEN_INT (elt), GEN_INT (elt),
27555 GEN_INT (elt), GEN_INT (elt)));
27559 tmp = gen_reg_rtx (mode);
27560 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
27564 gcc_unreachable ();
27567 use_vec_extr = true;
27572 /* For SSE1, we have to reuse the V4SF code. */
27573 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
27574 gen_lowpart (V4SFmode, vec), elt);
27580 use_vec_extr = TARGET_SSE2;
27583 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27587 use_vec_extr = TARGET_SSE4_1;
27591 /* ??? Could extract the appropriate HImode element and shift. */
27598 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
27599 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
27601 /* Let the rtl optimizers know about the zero extension performed. */
27602 if (inner_mode == QImode || inner_mode == HImode)
27604 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
27605 target = gen_lowpart (SImode, target);
27608 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27612 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27614 emit_move_insn (mem, vec);
27616 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27617 emit_move_insn (target, tmp);
27621 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
27622 pattern to reduce; DEST is the destination; IN is the input vector. */
27625 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
27627 rtx tmp1, tmp2, tmp3;
27629 tmp1 = gen_reg_rtx (V4SFmode);
27630 tmp2 = gen_reg_rtx (V4SFmode);
27631 tmp3 = gen_reg_rtx (V4SFmode);
27633 emit_insn (gen_sse_movhlps (tmp1, in, in));
27634 emit_insn (fn (tmp2, tmp1, in));
27636 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
27637 GEN_INT (1), GEN_INT (1),
27638 GEN_INT (1+4), GEN_INT (1+4)));
27639 emit_insn (fn (dest, tmp2, tmp3));
27642 /* Target hook for scalar_mode_supported_p. */
27644 ix86_scalar_mode_supported_p (enum machine_mode mode)
27646 if (DECIMAL_FLOAT_MODE_P (mode))
27648 else if (mode == TFmode)
27651 return default_scalar_mode_supported_p (mode);
27654 /* Implements target hook vector_mode_supported_p. */
27656 ix86_vector_mode_supported_p (enum machine_mode mode)
27658 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
27660 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
27662 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
27664 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
27666 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
27671 /* Target hook for c_mode_for_suffix. */
27672 static enum machine_mode
27673 ix86_c_mode_for_suffix (char suffix)
27683 /* Worker function for TARGET_MD_ASM_CLOBBERS.
27685 We do this in the new i386 backend to maintain source compatibility
27686 with the old cc0-based compiler. */
27689 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
27690 tree inputs ATTRIBUTE_UNUSED,
27693 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
27695 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
27700 /* Implements target vector targetm.asm.encode_section_info. This
27701 is not used by netware. */
27703 static void ATTRIBUTE_UNUSED
27704 ix86_encode_section_info (tree decl, rtx rtl, int first)
27706 default_encode_section_info (decl, rtl, first);
27708 if (TREE_CODE (decl) == VAR_DECL
27709 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
27710 && ix86_in_large_data_p (decl))
27711 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
27714 /* Worker function for REVERSE_CONDITION. */
27717 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
27719 return (mode != CCFPmode && mode != CCFPUmode
27720 ? reverse_condition (code)
27721 : reverse_condition_maybe_unordered (code));
27724 /* Output code to perform an x87 FP register move, from OPERANDS[1]
27728 output_387_reg_move (rtx insn, rtx *operands)
27730 if (REG_P (operands[0]))
27732 if (REG_P (operands[1])
27733 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27735 if (REGNO (operands[0]) == FIRST_STACK_REG)
27736 return output_387_ffreep (operands, 0);
27737 return "fstp\t%y0";
27739 if (STACK_TOP_P (operands[0]))
27740 return "fld%z1\t%y1";
27743 else if (MEM_P (operands[0]))
27745 gcc_assert (REG_P (operands[1]));
27746 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27747 return "fstp%z0\t%y0";
27750 /* There is no non-popping store to memory for XFmode.
27751 So if we need one, follow the store with a load. */
27752 if (GET_MODE (operands[0]) == XFmode)
27753 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
27755 return "fst%z0\t%y0";
27762 /* Output code to perform a conditional jump to LABEL, if C2 flag in
27763 FP status register is set. */
27766 ix86_emit_fp_unordered_jump (rtx label)
27768 rtx reg = gen_reg_rtx (HImode);
27771 emit_insn (gen_x86_fnstsw_1 (reg));
27773 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
27775 emit_insn (gen_x86_sahf_1 (reg));
27777 temp = gen_rtx_REG (CCmode, FLAGS_REG);
27778 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
27782 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
27784 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27785 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
27788 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
27789 gen_rtx_LABEL_REF (VOIDmode, label),
27791 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
27793 emit_jump_insn (temp);
27794 predict_jump (REG_BR_PROB_BASE * 10 / 100);
27797 /* Output code to perform a log1p XFmode calculation. */
27799 void ix86_emit_i387_log1p (rtx op0, rtx op1)
27801 rtx label1 = gen_label_rtx ();
27802 rtx label2 = gen_label_rtx ();
27804 rtx tmp = gen_reg_rtx (XFmode);
27805 rtx tmp2 = gen_reg_rtx (XFmode);
27807 emit_insn (gen_absxf2 (tmp, op1));
27808 emit_insn (gen_cmpxf (tmp,
27809 CONST_DOUBLE_FROM_REAL_VALUE (
27810 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
27812 emit_jump_insn (gen_bge (label1));
27814 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
27815 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
27816 emit_jump (label2);
27818 emit_label (label1);
27819 emit_move_insn (tmp, CONST1_RTX (XFmode));
27820 emit_insn (gen_addxf3 (tmp, op1, tmp));
27821 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
27822 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
27824 emit_label (label2);
27827 /* Output code to perform a Newton-Rhapson approximation of a single precision
27828 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
27830 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
27832 rtx x0, x1, e0, e1, two;
27834 x0 = gen_reg_rtx (mode);
27835 e0 = gen_reg_rtx (mode);
27836 e1 = gen_reg_rtx (mode);
27837 x1 = gen_reg_rtx (mode);
27839 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
27841 if (VECTOR_MODE_P (mode))
27842 two = ix86_build_const_vector (SFmode, true, two);
27844 two = force_reg (mode, two);
27846 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
27848 /* x0 = rcp(b) estimate */
27849 emit_insn (gen_rtx_SET (VOIDmode, x0,
27850 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
27853 emit_insn (gen_rtx_SET (VOIDmode, e0,
27854 gen_rtx_MULT (mode, x0, b)));
27856 emit_insn (gen_rtx_SET (VOIDmode, e1,
27857 gen_rtx_MINUS (mode, two, e0)));
27859 emit_insn (gen_rtx_SET (VOIDmode, x1,
27860 gen_rtx_MULT (mode, x0, e1)));
27862 emit_insn (gen_rtx_SET (VOIDmode, res,
27863 gen_rtx_MULT (mode, a, x1)));
27866 /* Output code to perform a Newton-Rhapson approximation of a
27867 single precision floating point [reciprocal] square root. */
27869 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
27872 rtx x0, e0, e1, e2, e3, mthree, mhalf;
27875 x0 = gen_reg_rtx (mode);
27876 e0 = gen_reg_rtx (mode);
27877 e1 = gen_reg_rtx (mode);
27878 e2 = gen_reg_rtx (mode);
27879 e3 = gen_reg_rtx (mode);
27881 real_from_integer (&r, VOIDmode, -3, -1, 0);
27882 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
27884 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
27885 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
27887 if (VECTOR_MODE_P (mode))
27889 mthree = ix86_build_const_vector (SFmode, true, mthree);
27890 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
27893 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
27894 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
27896 /* x0 = rsqrt(a) estimate */
27897 emit_insn (gen_rtx_SET (VOIDmode, x0,
27898 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
27901 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
27906 zero = gen_reg_rtx (mode);
27907 mask = gen_reg_rtx (mode);
27909 zero = force_reg (mode, CONST0_RTX(mode));
27910 emit_insn (gen_rtx_SET (VOIDmode, mask,
27911 gen_rtx_NE (mode, zero, a)));
27913 emit_insn (gen_rtx_SET (VOIDmode, x0,
27914 gen_rtx_AND (mode, x0, mask)));
27918 emit_insn (gen_rtx_SET (VOIDmode, e0,
27919 gen_rtx_MULT (mode, x0, a)));
27921 emit_insn (gen_rtx_SET (VOIDmode, e1,
27922 gen_rtx_MULT (mode, e0, x0)));
27925 mthree = force_reg (mode, mthree);
27926 emit_insn (gen_rtx_SET (VOIDmode, e2,
27927 gen_rtx_PLUS (mode, e1, mthree)));
27929 mhalf = force_reg (mode, mhalf);
27931 /* e3 = -.5 * x0 */
27932 emit_insn (gen_rtx_SET (VOIDmode, e3,
27933 gen_rtx_MULT (mode, x0, mhalf)));
27935 /* e3 = -.5 * e0 */
27936 emit_insn (gen_rtx_SET (VOIDmode, e3,
27937 gen_rtx_MULT (mode, e0, mhalf)));
27938 /* ret = e2 * e3 */
27939 emit_insn (gen_rtx_SET (VOIDmode, res,
27940 gen_rtx_MULT (mode, e2, e3)));
27943 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
27945 static void ATTRIBUTE_UNUSED
27946 i386_solaris_elf_named_section (const char *name, unsigned int flags,
27949 /* With Binutils 2.15, the "@unwind" marker must be specified on
27950 every occurrence of the ".eh_frame" section, not just the first
27953 && strcmp (name, ".eh_frame") == 0)
27955 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
27956 flags & SECTION_WRITE ? "aw" : "a");
27959 default_elf_asm_named_section (name, flags, decl);
27962 /* Return the mangling of TYPE if it is an extended fundamental type. */
27964 static const char *
27965 ix86_mangle_type (const_tree type)
27967 type = TYPE_MAIN_VARIANT (type);
27969 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
27970 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
27973 switch (TYPE_MODE (type))
27976 /* __float128 is "g". */
27979 /* "long double" or __float80 is "e". */
27986 /* For 32-bit code we can save PIC register setup by using
27987 __stack_chk_fail_local hidden function instead of calling
27988 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
27989 register, so it is better to call __stack_chk_fail directly. */
27992 ix86_stack_protect_fail (void)
27994 return TARGET_64BIT
27995 ? default_external_stack_protect_fail ()
27996 : default_hidden_stack_protect_fail ();
27999 /* Select a format to encode pointers in exception handling data. CODE
28000 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
28001 true if the symbol may be affected by dynamic relocations.
28003 ??? All x86 object file formats are capable of representing this.
28004 After all, the relocation needed is the same as for the call insn.
28005 Whether or not a particular assembler allows us to enter such, I
28006 guess we'll have to see. */
28008 asm_preferred_eh_data_format (int code, int global)
28012 int type = DW_EH_PE_sdata8;
28014 || ix86_cmodel == CM_SMALL_PIC
28015 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28016 type = DW_EH_PE_sdata4;
28017 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28019 if (ix86_cmodel == CM_SMALL
28020 || (ix86_cmodel == CM_MEDIUM && code))
28021 return DW_EH_PE_udata4;
28022 return DW_EH_PE_absptr;
28025 /* Expand copysign from SIGN to the positive value ABS_VALUE
28026 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
28029 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28031 enum machine_mode mode = GET_MODE (sign);
28032 rtx sgn = gen_reg_rtx (mode);
28033 if (mask == NULL_RTX)
28035 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28036 if (!VECTOR_MODE_P (mode))
28038 /* We need to generate a scalar mode mask in this case. */
28039 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28040 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28041 mask = gen_reg_rtx (mode);
28042 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28046 mask = gen_rtx_NOT (mode, mask);
28047 emit_insn (gen_rtx_SET (VOIDmode, sgn,
28048 gen_rtx_AND (mode, mask, sign)));
28049 emit_insn (gen_rtx_SET (VOIDmode, result,
28050 gen_rtx_IOR (mode, abs_value, sgn)));
28053 /* Expand fabs (OP0) and return a new rtx that holds the result. The
28054 mask for masking out the sign-bit is stored in *SMASK, if that is
28057 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28059 enum machine_mode mode = GET_MODE (op0);
28062 xa = gen_reg_rtx (mode);
28063 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28064 if (!VECTOR_MODE_P (mode))
28066 /* We need to generate a scalar mode mask in this case. */
28067 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28068 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28069 mask = gen_reg_rtx (mode);
28070 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28072 emit_insn (gen_rtx_SET (VOIDmode, xa,
28073 gen_rtx_AND (mode, op0, mask)));
28081 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28082 swapping the operands if SWAP_OPERANDS is true. The expanded
28083 code is a forward jump to a newly created label in case the
28084 comparison is true. The generated label rtx is returned. */
28086 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28087 bool swap_operands)
28098 label = gen_label_rtx ();
28099 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28100 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28101 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28102 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28103 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28104 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28105 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28106 JUMP_LABEL (tmp) = label;
28111 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28112 using comparison code CODE. Operands are swapped for the comparison if
28113 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28115 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28116 bool swap_operands)
28118 enum machine_mode mode = GET_MODE (op0);
28119 rtx mask = gen_reg_rtx (mode);
28128 if (mode == DFmode)
28129 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28130 gen_rtx_fmt_ee (code, mode, op0, op1)));
28132 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28133 gen_rtx_fmt_ee (code, mode, op0, op1)));
28138 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28139 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28141 ix86_gen_TWO52 (enum machine_mode mode)
28143 REAL_VALUE_TYPE TWO52r;
28146 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28147 TWO52 = const_double_from_real_value (TWO52r, mode);
28148 TWO52 = force_reg (mode, TWO52);
28153 /* Expand SSE sequence for computing lround from OP1 storing
28156 ix86_expand_lround (rtx op0, rtx op1)
28158 /* C code for the stuff we're doing below:
28159 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28162 enum machine_mode mode = GET_MODE (op1);
28163 const struct real_format *fmt;
28164 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28167 /* load nextafter (0.5, 0.0) */
28168 fmt = REAL_MODE_FORMAT (mode);
28169 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28170 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28172 /* adj = copysign (0.5, op1) */
28173 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28174 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28176 /* adj = op1 + adj */
28177 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28179 /* op0 = (imode)adj */
28180 expand_fix (op0, adj, 0);
28183 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28186 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28188 /* C code for the stuff we're doing below (for do_floor):
28190 xi -= (double)xi > op1 ? 1 : 0;
28193 enum machine_mode fmode = GET_MODE (op1);
28194 enum machine_mode imode = GET_MODE (op0);
28195 rtx ireg, freg, label, tmp;
28197 /* reg = (long)op1 */
28198 ireg = gen_reg_rtx (imode);
28199 expand_fix (ireg, op1, 0);
28201 /* freg = (double)reg */
28202 freg = gen_reg_rtx (fmode);
28203 expand_float (freg, ireg, 0);
28205 /* ireg = (freg > op1) ? ireg - 1 : ireg */
28206 label = ix86_expand_sse_compare_and_jump (UNLE,
28207 freg, op1, !do_floor);
28208 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28209 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28210 emit_move_insn (ireg, tmp);
28212 emit_label (label);
28213 LABEL_NUSES (label) = 1;
28215 emit_move_insn (op0, ireg);
28218 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28219 result in OPERAND0. */
28221 ix86_expand_rint (rtx operand0, rtx operand1)
28223 /* C code for the stuff we're doing below:
28224 xa = fabs (operand1);
28225 if (!isless (xa, 2**52))
28227 xa = xa + 2**52 - 2**52;
28228 return copysign (xa, operand1);
28230 enum machine_mode mode = GET_MODE (operand0);
28231 rtx res, xa, label, TWO52, mask;
28233 res = gen_reg_rtx (mode);
28234 emit_move_insn (res, operand1);
28236 /* xa = abs (operand1) */
28237 xa = ix86_expand_sse_fabs (res, &mask);
28239 /* if (!isless (xa, TWO52)) goto label; */
28240 TWO52 = ix86_gen_TWO52 (mode);
28241 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28243 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28244 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28246 ix86_sse_copysign_to_positive (res, xa, res, mask);
28248 emit_label (label);
28249 LABEL_NUSES (label) = 1;
28251 emit_move_insn (operand0, res);
28254 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28257 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28259 /* C code for the stuff we expand below.
28260 double xa = fabs (x), x2;
28261 if (!isless (xa, TWO52))
28263 xa = xa + TWO52 - TWO52;
28264 x2 = copysign (xa, x);
28273 enum machine_mode mode = GET_MODE (operand0);
28274 rtx xa, TWO52, tmp, label, one, res, mask;
28276 TWO52 = ix86_gen_TWO52 (mode);
28278 /* Temporary for holding the result, initialized to the input
28279 operand to ease control flow. */
28280 res = gen_reg_rtx (mode);
28281 emit_move_insn (res, operand1);
28283 /* xa = abs (operand1) */
28284 xa = ix86_expand_sse_fabs (res, &mask);
28286 /* if (!isless (xa, TWO52)) goto label; */
28287 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28289 /* xa = xa + TWO52 - TWO52; */
28290 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28291 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28293 /* xa = copysign (xa, operand1) */
28294 ix86_sse_copysign_to_positive (xa, xa, res, mask);
28296 /* generate 1.0 or -1.0 */
28297 one = force_reg (mode,
28298 const_double_from_real_value (do_floor
28299 ? dconst1 : dconstm1, mode));
28301 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28302 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28303 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28304 gen_rtx_AND (mode, one, tmp)));
28305 /* We always need to subtract here to preserve signed zero. */
28306 tmp = expand_simple_binop (mode, MINUS,
28307 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28308 emit_move_insn (res, tmp);
28310 emit_label (label);
28311 LABEL_NUSES (label) = 1;
28313 emit_move_insn (operand0, res);
28316 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28319 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28321 /* C code for the stuff we expand below.
28322 double xa = fabs (x), x2;
28323 if (!isless (xa, TWO52))
28325 x2 = (double)(long)x;
28332 if (HONOR_SIGNED_ZEROS (mode))
28333 return copysign (x2, x);
28336 enum machine_mode mode = GET_MODE (operand0);
28337 rtx xa, xi, TWO52, tmp, label, one, res, mask;
28339 TWO52 = ix86_gen_TWO52 (mode);
28341 /* Temporary for holding the result, initialized to the input
28342 operand to ease control flow. */
28343 res = gen_reg_rtx (mode);
28344 emit_move_insn (res, operand1);
28346 /* xa = abs (operand1) */
28347 xa = ix86_expand_sse_fabs (res, &mask);
28349 /* if (!isless (xa, TWO52)) goto label; */
28350 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28352 /* xa = (double)(long)x */
28353 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28354 expand_fix (xi, res, 0);
28355 expand_float (xa, xi, 0);
28358 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28360 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28361 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28362 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28363 gen_rtx_AND (mode, one, tmp)));
28364 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28365 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28366 emit_move_insn (res, tmp);
28368 if (HONOR_SIGNED_ZEROS (mode))
28369 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28371 emit_label (label);
28372 LABEL_NUSES (label) = 1;
28374 emit_move_insn (operand0, res);
28377 /* Expand SSE sequence for computing round from OPERAND1 storing
28378 into OPERAND0. Sequence that works without relying on DImode truncation
28379 via cvttsd2siq that is only available on 64bit targets. */
28381 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
28383 /* C code for the stuff we expand below.
28384 double xa = fabs (x), xa2, x2;
28385 if (!isless (xa, TWO52))
28387 Using the absolute value and copying back sign makes
28388 -0.0 -> -0.0 correct.
28389 xa2 = xa + TWO52 - TWO52;
28394 else if (dxa > 0.5)
28396 x2 = copysign (xa2, x);
28399 enum machine_mode mode = GET_MODE (operand0);
28400 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
28402 TWO52 = ix86_gen_TWO52 (mode);
28404 /* Temporary for holding the result, initialized to the input
28405 operand to ease control flow. */
28406 res = gen_reg_rtx (mode);
28407 emit_move_insn (res, operand1);
28409 /* xa = abs (operand1) */
28410 xa = ix86_expand_sse_fabs (res, &mask);
28412 /* if (!isless (xa, TWO52)) goto label; */
28413 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28415 /* xa2 = xa + TWO52 - TWO52; */
28416 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28417 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
28419 /* dxa = xa2 - xa; */
28420 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
28422 /* generate 0.5, 1.0 and -0.5 */
28423 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
28424 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
28425 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
28429 tmp = gen_reg_rtx (mode);
28430 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
28431 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
28432 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28433 gen_rtx_AND (mode, one, tmp)));
28434 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28435 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
28436 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
28437 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28438 gen_rtx_AND (mode, one, tmp)));
28439 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28441 /* res = copysign (xa2, operand1) */
28442 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
28444 emit_label (label);
28445 LABEL_NUSES (label) = 1;
28447 emit_move_insn (operand0, res);
28450 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28453 ix86_expand_trunc (rtx operand0, rtx operand1)
28455 /* C code for SSE variant we expand below.
28456 double xa = fabs (x), x2;
28457 if (!isless (xa, TWO52))
28459 x2 = (double)(long)x;
28460 if (HONOR_SIGNED_ZEROS (mode))
28461 return copysign (x2, x);
28464 enum machine_mode mode = GET_MODE (operand0);
28465 rtx xa, xi, TWO52, label, res, mask;
28467 TWO52 = ix86_gen_TWO52 (mode);
28469 /* Temporary for holding the result, initialized to the input
28470 operand to ease control flow. */
28471 res = gen_reg_rtx (mode);
28472 emit_move_insn (res, operand1);
28474 /* xa = abs (operand1) */
28475 xa = ix86_expand_sse_fabs (res, &mask);
28477 /* if (!isless (xa, TWO52)) goto label; */
28478 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28480 /* x = (double)(long)x */
28481 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28482 expand_fix (xi, res, 0);
28483 expand_float (res, xi, 0);
28485 if (HONOR_SIGNED_ZEROS (mode))
28486 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28488 emit_label (label);
28489 LABEL_NUSES (label) = 1;
28491 emit_move_insn (operand0, res);
28494 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28497 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
28499 enum machine_mode mode = GET_MODE (operand0);
28500 rtx xa, mask, TWO52, label, one, res, smask, tmp;
28502 /* C code for SSE variant we expand below.
28503 double xa = fabs (x), x2;
28504 if (!isless (xa, TWO52))
28506 xa2 = xa + TWO52 - TWO52;
28510 x2 = copysign (xa2, x);
28514 TWO52 = ix86_gen_TWO52 (mode);
28516 /* Temporary for holding the result, initialized to the input
28517 operand to ease control flow. */
28518 res = gen_reg_rtx (mode);
28519 emit_move_insn (res, operand1);
28521 /* xa = abs (operand1) */
28522 xa = ix86_expand_sse_fabs (res, &smask);
28524 /* if (!isless (xa, TWO52)) goto label; */
28525 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28527 /* res = xa + TWO52 - TWO52; */
28528 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28529 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
28530 emit_move_insn (res, tmp);
28533 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28535 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
28536 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
28537 emit_insn (gen_rtx_SET (VOIDmode, mask,
28538 gen_rtx_AND (mode, mask, one)));
28539 tmp = expand_simple_binop (mode, MINUS,
28540 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
28541 emit_move_insn (res, tmp);
28543 /* res = copysign (res, operand1) */
28544 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
28546 emit_label (label);
28547 LABEL_NUSES (label) = 1;
28549 emit_move_insn (operand0, res);
28552 /* Expand SSE sequence for computing round from OPERAND1 storing
28555 ix86_expand_round (rtx operand0, rtx operand1)
28557 /* C code for the stuff we're doing below:
28558 double xa = fabs (x);
28559 if (!isless (xa, TWO52))
28561 xa = (double)(long)(xa + nextafter (0.5, 0.0));
28562 return copysign (xa, x);
28564 enum machine_mode mode = GET_MODE (operand0);
28565 rtx res, TWO52, xa, label, xi, half, mask;
28566 const struct real_format *fmt;
28567 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28569 /* Temporary for holding the result, initialized to the input
28570 operand to ease control flow. */
28571 res = gen_reg_rtx (mode);
28572 emit_move_insn (res, operand1);
28574 TWO52 = ix86_gen_TWO52 (mode);
28575 xa = ix86_expand_sse_fabs (res, &mask);
28576 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28578 /* load nextafter (0.5, 0.0) */
28579 fmt = REAL_MODE_FORMAT (mode);
28580 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28581 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28583 /* xa = xa + 0.5 */
28584 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
28585 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
28587 /* xa = (double)(int64_t)xa */
28588 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28589 expand_fix (xi, xa, 0);
28590 expand_float (xa, xi, 0);
28592 /* res = copysign (xa, operand1) */
28593 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
28595 emit_label (label);
28596 LABEL_NUSES (label) = 1;
28598 emit_move_insn (operand0, res);
28602 /* Validate whether a SSE5 instruction is valid or not.
28603 OPERANDS is the array of operands.
28604 NUM is the number of operands.
28605 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
28606 NUM_MEMORY is the maximum number of memory operands to accept.
28607 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
28610 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
28611 bool uses_oc0, int num_memory, bool commutative)
28617 /* Count the number of memory arguments */
28620 for (i = 0; i < num; i++)
28622 enum machine_mode mode = GET_MODE (operands[i]);
28623 if (register_operand (operands[i], mode))
28626 else if (memory_operand (operands[i], mode))
28628 mem_mask |= (1 << i);
28634 rtx pattern = PATTERN (insn);
28636 /* allow 0 for pcmov */
28637 if (GET_CODE (pattern) != SET
28638 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
28640 || operands[i] != CONST0_RTX (mode))
28645 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
28646 a memory operation. */
28647 if (num_memory < 0)
28649 num_memory = -num_memory;
28650 if ((mem_mask & (1 << (num-1))) != 0)
28652 mem_mask &= ~(1 << (num-1));
28657 /* If there were no memory operations, allow the insn */
28661 /* Do not allow the destination register to be a memory operand. */
28662 else if (mem_mask & (1 << 0))
28665 /* If there are too many memory operations, disallow the instruction. While
28666 the hardware only allows 1 memory reference, before register allocation
28667 for some insns, we allow two memory operations sometimes in order to allow
28668 code like the following to be optimized:
28670 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
28672 or similar cases that are vectorized into using the fmaddss
28674 else if (mem_count > num_memory)
28677 /* Don't allow more than one memory operation if not optimizing. */
28678 else if (mem_count > 1 && !optimize)
28681 else if (num == 4 && mem_count == 1)
28683 /* formats (destination is the first argument), example fmaddss:
28684 xmm1, xmm1, xmm2, xmm3/mem
28685 xmm1, xmm1, xmm2/mem, xmm3
28686 xmm1, xmm2, xmm3/mem, xmm1
28687 xmm1, xmm2/mem, xmm3, xmm1 */
28689 return ((mem_mask == (1 << 1))
28690 || (mem_mask == (1 << 2))
28691 || (mem_mask == (1 << 3)));
28693 /* format, example pmacsdd:
28694 xmm1, xmm2, xmm3/mem, xmm1 */
28696 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
28698 return (mem_mask == (1 << 2));
28701 else if (num == 4 && num_memory == 2)
28703 /* If there are two memory operations, we can load one of the memory ops
28704 into the destination register. This is for optimizing the
28705 multiply/add ops, which the combiner has optimized both the multiply
28706 and the add insns to have a memory operation. We have to be careful
28707 that the destination doesn't overlap with the inputs. */
28708 rtx op0 = operands[0];
28710 if (reg_mentioned_p (op0, operands[1])
28711 || reg_mentioned_p (op0, operands[2])
28712 || reg_mentioned_p (op0, operands[3]))
28715 /* formats (destination is the first argument), example fmaddss:
28716 xmm1, xmm1, xmm2, xmm3/mem
28717 xmm1, xmm1, xmm2/mem, xmm3
28718 xmm1, xmm2, xmm3/mem, xmm1
28719 xmm1, xmm2/mem, xmm3, xmm1
28721 For the oc0 case, we will load either operands[1] or operands[3] into
28722 operands[0], so any combination of 2 memory operands is ok. */
28726 /* format, example pmacsdd:
28727 xmm1, xmm2, xmm3/mem, xmm1
28729 For the integer multiply/add instructions be more restrictive and
28730 require operands[2] and operands[3] to be the memory operands. */
28732 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
28734 return (mem_mask == ((1 << 2) | (1 << 3)));
28737 else if (num == 3 && num_memory == 1)
28739 /* formats, example protb:
28740 xmm1, xmm2, xmm3/mem
28741 xmm1, xmm2/mem, xmm3 */
28743 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
28745 /* format, example comeq:
28746 xmm1, xmm2, xmm3/mem */
28748 return (mem_mask == (1 << 2));
28752 gcc_unreachable ();
28758 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
28759 hardware will allow by using the destination register to load one of the
28760 memory operations. Presently this is used by the multiply/add routines to
28761 allow 2 memory references. */
28764 ix86_expand_sse5_multiple_memory (rtx operands[],
28766 enum machine_mode mode)
28768 rtx op0 = operands[0];
28770 || memory_operand (op0, mode)
28771 || reg_mentioned_p (op0, operands[1])
28772 || reg_mentioned_p (op0, operands[2])
28773 || reg_mentioned_p (op0, operands[3]))
28774 gcc_unreachable ();
28776 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
28777 the destination register. */
28778 if (memory_operand (operands[1], mode))
28780 emit_move_insn (op0, operands[1]);
28783 else if (memory_operand (operands[3], mode))
28785 emit_move_insn (op0, operands[3]);
28789 gcc_unreachable ();
28795 /* Table of valid machine attributes. */
28796 static const struct attribute_spec ix86_attribute_table[] =
28798 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
28799 /* Stdcall attribute says callee is responsible for popping arguments
28800 if they are not variable. */
28801 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28802 /* Fastcall attribute says callee is responsible for popping arguments
28803 if they are not variable. */
28804 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28805 /* Cdecl attribute says the callee is a normal C declaration */
28806 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28807 /* Regparm attribute specifies how many integer arguments are to be
28808 passed in registers. */
28809 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
28810 /* Sseregparm attribute says we are using x86_64 calling conventions
28811 for FP arguments. */
28812 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28813 /* force_align_arg_pointer says this function realigns the stack at entry. */
28814 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
28815 false, true, true, ix86_handle_cconv_attribute },
28816 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28817 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
28818 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
28819 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
28821 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28822 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28823 #ifdef SUBTARGET_ATTRIBUTE_TABLE
28824 SUBTARGET_ATTRIBUTE_TABLE,
28826 /* ms_abi and sysv_abi calling convention function attributes. */
28827 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28828 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28830 { NULL, 0, 0, false, false, false, NULL }
28833 /* Implement targetm.vectorize.builtin_vectorization_cost. */
28835 x86_builtin_vectorization_cost (bool runtime_test)
28837 /* If the branch of the runtime test is taken - i.e. - the vectorized
28838 version is skipped - this incurs a misprediction cost (because the
28839 vectorized version is expected to be the fall-through). So we subtract
28840 the latency of a mispredicted branch from the costs that are incured
28841 when the vectorized version is executed.
28843 TODO: The values in individual target tables have to be tuned or new
28844 fields may be needed. For eg. on K8, the default branch path is the
28845 not-taken path. If the taken path is predicted correctly, the minimum
28846 penalty of going down the taken-path is 1 cycle. If the taken-path is
28847 not predicted correctly, then the minimum penalty is 10 cycles. */
28851 return (-(ix86_cost->cond_taken_branch_cost));
28857 /* This function returns the calling abi specific va_list type node.
28858 It returns the FNDECL specific va_list type. */
28861 ix86_fn_abi_va_list (tree fndecl)
28866 return va_list_type_node;
28867 gcc_assert (fndecl != NULL_TREE);
28868 abi = ix86_function_abi ((const_tree) fndecl);
28871 return ms_va_list_type_node;
28873 return sysv_va_list_type_node;
28876 /* Returns the canonical va_list type specified by TYPE. If there
28877 is no valid TYPE provided, it return NULL_TREE. */
28880 ix86_canonical_va_list_type (tree type)
28884 /* Resolve references and pointers to va_list type. */
28885 if (INDIRECT_REF_P (type))
28886 type = TREE_TYPE (type);
28887 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
28888 type = TREE_TYPE (type);
28892 wtype = va_list_type_node;
28893 gcc_assert (wtype != NULL_TREE);
28895 if (TREE_CODE (wtype) == ARRAY_TYPE)
28897 /* If va_list is an array type, the argument may have decayed
28898 to a pointer type, e.g. by being passed to another function.
28899 In that case, unwrap both types so that we can compare the
28900 underlying records. */
28901 if (TREE_CODE (htype) == ARRAY_TYPE
28902 || POINTER_TYPE_P (htype))
28904 wtype = TREE_TYPE (wtype);
28905 htype = TREE_TYPE (htype);
28908 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28909 return va_list_type_node;
28910 wtype = sysv_va_list_type_node;
28911 gcc_assert (wtype != NULL_TREE);
28913 if (TREE_CODE (wtype) == ARRAY_TYPE)
28915 /* If va_list is an array type, the argument may have decayed
28916 to a pointer type, e.g. by being passed to another function.
28917 In that case, unwrap both types so that we can compare the
28918 underlying records. */
28919 if (TREE_CODE (htype) == ARRAY_TYPE
28920 || POINTER_TYPE_P (htype))
28922 wtype = TREE_TYPE (wtype);
28923 htype = TREE_TYPE (htype);
28926 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28927 return sysv_va_list_type_node;
28928 wtype = ms_va_list_type_node;
28929 gcc_assert (wtype != NULL_TREE);
28931 if (TREE_CODE (wtype) == ARRAY_TYPE)
28933 /* If va_list is an array type, the argument may have decayed
28934 to a pointer type, e.g. by being passed to another function.
28935 In that case, unwrap both types so that we can compare the
28936 underlying records. */
28937 if (TREE_CODE (htype) == ARRAY_TYPE
28938 || POINTER_TYPE_P (htype))
28940 wtype = TREE_TYPE (wtype);
28941 htype = TREE_TYPE (htype);
28944 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28945 return ms_va_list_type_node;
28948 return std_canonical_va_list_type (type);
28951 /* Iterate through the target-specific builtin types for va_list.
28952 IDX denotes the iterator, *PTREE is set to the result type of
28953 the va_list builtin, and *PNAME to its internal type.
28954 Returns zero if there is no element for this index, otherwise
28955 IDX should be increased upon the next call.
28956 Note, do not iterate a base builtin's name like __builtin_va_list.
28957 Used from c_common_nodes_and_builtins. */
28960 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
28966 *ptree = ms_va_list_type_node;
28967 *pname = "__builtin_ms_va_list";
28970 *ptree = sysv_va_list_type_node;
28971 *pname = "__builtin_sysv_va_list";
28979 /* Initialize the GCC target structure. */
28980 #undef TARGET_RETURN_IN_MEMORY
28981 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
28983 #undef TARGET_ATTRIBUTE_TABLE
28984 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
28985 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28986 # undef TARGET_MERGE_DECL_ATTRIBUTES
28987 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
28990 #undef TARGET_COMP_TYPE_ATTRIBUTES
28991 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
28993 #undef TARGET_INIT_BUILTINS
28994 #define TARGET_INIT_BUILTINS ix86_init_builtins
28995 #undef TARGET_EXPAND_BUILTIN
28996 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
28998 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
28999 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29000 ix86_builtin_vectorized_function
29002 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29003 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29005 #undef TARGET_BUILTIN_RECIPROCAL
29006 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29008 #undef TARGET_ASM_FUNCTION_EPILOGUE
29009 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29011 #undef TARGET_ENCODE_SECTION_INFO
29012 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29013 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29015 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29018 #undef TARGET_ASM_OPEN_PAREN
29019 #define TARGET_ASM_OPEN_PAREN ""
29020 #undef TARGET_ASM_CLOSE_PAREN
29021 #define TARGET_ASM_CLOSE_PAREN ""
29023 #undef TARGET_ASM_ALIGNED_HI_OP
29024 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29025 #undef TARGET_ASM_ALIGNED_SI_OP
29026 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29028 #undef TARGET_ASM_ALIGNED_DI_OP
29029 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29032 #undef TARGET_ASM_UNALIGNED_HI_OP
29033 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29034 #undef TARGET_ASM_UNALIGNED_SI_OP
29035 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29036 #undef TARGET_ASM_UNALIGNED_DI_OP
29037 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29039 #undef TARGET_SCHED_ADJUST_COST
29040 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29041 #undef TARGET_SCHED_ISSUE_RATE
29042 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29043 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29044 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29045 ia32_multipass_dfa_lookahead
29047 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29048 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29051 #undef TARGET_HAVE_TLS
29052 #define TARGET_HAVE_TLS true
29054 #undef TARGET_CANNOT_FORCE_CONST_MEM
29055 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29056 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29057 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29059 #undef TARGET_DELEGITIMIZE_ADDRESS
29060 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29062 #undef TARGET_MS_BITFIELD_LAYOUT_P
29063 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29066 #undef TARGET_BINDS_LOCAL_P
29067 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29069 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29070 #undef TARGET_BINDS_LOCAL_P
29071 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29074 #undef TARGET_ASM_OUTPUT_MI_THUNK
29075 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29076 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29077 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29079 #undef TARGET_ASM_FILE_START
29080 #define TARGET_ASM_FILE_START x86_file_start
29082 #undef TARGET_DEFAULT_TARGET_FLAGS
29083 #define TARGET_DEFAULT_TARGET_FLAGS \
29085 | TARGET_SUBTARGET_DEFAULT \
29086 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29088 #undef TARGET_HANDLE_OPTION
29089 #define TARGET_HANDLE_OPTION ix86_handle_option
29091 #undef TARGET_RTX_COSTS
29092 #define TARGET_RTX_COSTS ix86_rtx_costs
29093 #undef TARGET_ADDRESS_COST
29094 #define TARGET_ADDRESS_COST ix86_address_cost
29096 #undef TARGET_FIXED_CONDITION_CODE_REGS
29097 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29098 #undef TARGET_CC_MODES_COMPATIBLE
29099 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29101 #undef TARGET_MACHINE_DEPENDENT_REORG
29102 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29104 #undef TARGET_BUILD_BUILTIN_VA_LIST
29105 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29107 #undef TARGET_FN_ABI_VA_LIST
29108 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29110 #undef TARGET_CANONICAL_VA_LIST_TYPE
29111 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29113 #undef TARGET_EXPAND_BUILTIN_VA_START
29114 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29116 #undef TARGET_MD_ASM_CLOBBERS
29117 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29119 #undef TARGET_PROMOTE_PROTOTYPES
29120 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29121 #undef TARGET_STRUCT_VALUE_RTX
29122 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29123 #undef TARGET_SETUP_INCOMING_VARARGS
29124 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29125 #undef TARGET_MUST_PASS_IN_STACK
29126 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29127 #undef TARGET_PASS_BY_REFERENCE
29128 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29129 #undef TARGET_INTERNAL_ARG_POINTER
29130 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29131 #undef TARGET_UPDATE_STACK_BOUNDARY
29132 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29133 #undef TARGET_GET_DRAP_RTX
29134 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29135 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29136 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29137 #undef TARGET_STRICT_ARGUMENT_NAMING
29138 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29140 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29141 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29143 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29144 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29146 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29147 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29149 #undef TARGET_C_MODE_FOR_SUFFIX
29150 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29153 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29154 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29157 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29158 #undef TARGET_INSERT_ATTRIBUTES
29159 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
29162 #undef TARGET_MANGLE_TYPE
29163 #define TARGET_MANGLE_TYPE ix86_mangle_type
29165 #undef TARGET_STACK_PROTECT_FAIL
29166 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
29168 #undef TARGET_FUNCTION_VALUE
29169 #define TARGET_FUNCTION_VALUE ix86_function_value
29171 #undef TARGET_SECONDARY_RELOAD
29172 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
29174 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
29175 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
29177 #undef TARGET_SET_CURRENT_FUNCTION
29178 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
29180 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
29181 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
29183 #undef TARGET_OPTION_SAVE
29184 #define TARGET_OPTION_SAVE ix86_function_specific_save
29186 #undef TARGET_OPTION_RESTORE
29187 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
29189 #undef TARGET_OPTION_PRINT
29190 #define TARGET_OPTION_PRINT ix86_function_specific_print
29192 #undef TARGET_OPTION_CAN_INLINE_P
29193 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
29195 struct gcc_target targetm = TARGET_INITIALIZER;
29197 #include "gt-i386.h"