1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned char ix86_tune_features[X86_TUNE_LAST];
1215 /* Feature tests against the various tunings used to create ix86_tune_features
1216 based on the processor mask. */
1217 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1218 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1219 negatively, so enabling for Generic64 seems like good code size
1220 tradeoff. We can't enable it for 32bit generic because it does not
1221 work well with PPro base chips. */
1222 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1224 /* X86_TUNE_PUSH_MEMORY */
1225 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1226 | m_NOCONA | m_CORE2 | m_GENERIC,
1228 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1231 /* X86_TUNE_USE_BIT_TEST */
1234 /* X86_TUNE_UNROLL_STRLEN */
1235 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1237 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1238 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1240 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1241 on simulation result. But after P4 was made, no performance benefit
1242 was observed with branch hints. It also increases the code size.
1243 As a result, icc never generates branch hints. */
1246 /* X86_TUNE_DOUBLE_WITH_ADD */
1249 /* X86_TUNE_USE_SAHF */
1250 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC,
1253 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1254 partial dependencies. */
1255 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1256 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1258 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1259 register stalls on Generic32 compilation setting as well. However
1260 in current implementation the partial register stalls are not eliminated
1261 very well - they can be introduced via subregs synthesized by combine
1262 and can happen in caller/callee saving sequences. Because this option
1263 pays back little on PPro based chips and is in conflict with partial reg
1264 dependencies used by Athlon/P4 based chips, it is better to leave it off
1265 for generic32 for now. */
1268 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1269 m_CORE2 | m_GENERIC,
1271 /* X86_TUNE_USE_HIMODE_FIOP */
1272 m_386 | m_486 | m_K6_GEODE,
1274 /* X86_TUNE_USE_SIMODE_FIOP */
1275 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1277 /* X86_TUNE_USE_MOV0 */
1280 /* X86_TUNE_USE_CLTD */
1281 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1283 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1286 /* X86_TUNE_SPLIT_LONG_MOVES */
1289 /* X86_TUNE_READ_MODIFY_WRITE */
1292 /* X86_TUNE_READ_MODIFY */
1295 /* X86_TUNE_PROMOTE_QIMODE */
1296 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1297 | m_GENERIC /* | m_PENT4 ? */,
1299 /* X86_TUNE_FAST_PREFIX */
1300 ~(m_PENT | m_486 | m_386),
1302 /* X86_TUNE_SINGLE_STRINGOP */
1303 m_386 | m_PENT4 | m_NOCONA,
1305 /* X86_TUNE_QIMODE_MATH */
1308 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1309 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1310 might be considered for Generic32 if our scheme for avoiding partial
1311 stalls was more effective. */
1314 /* X86_TUNE_PROMOTE_QI_REGS */
1317 /* X86_TUNE_PROMOTE_HI_REGS */
1320 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1321 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_ADD_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1325 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_SUB_ESP_4 */
1328 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_SUB_ESP_8 */
1331 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1332 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1334 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1335 for DFmode copies */
1336 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1337 | m_GENERIC | m_GEODE),
1339 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1340 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1342 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1343 conflict here in between PPro/Pentium4 based chips that thread 128bit
1344 SSE registers as single units versus K8 based chips that divide SSE
1345 registers to two 64bit halves. This knob promotes all store destinations
1346 to be 128bit to allow register renaming on 128bit SSE units, but usually
1347 results in one extra microop on 64bit SSE units. Experimental results
1348 shows that disabling this option on P4 brings over 20% SPECfp regression,
1349 while enabling it on K8 brings roughly 2.4% regression that can be partly
1350 masked by careful scheduling of moves. */
1351 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1353 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1356 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1357 are resolved on SSE register parts instead of whole registers, so we may
1358 maintain just lower part of scalar values in proper format leaving the
1359 upper part undefined. */
1362 /* X86_TUNE_SSE_TYPELESS_STORES */
1365 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1366 m_PPRO | m_PENT4 | m_NOCONA,
1368 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1369 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1371 /* X86_TUNE_PROLOGUE_USING_MOVE */
1372 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1374 /* X86_TUNE_EPILOGUE_USING_MOVE */
1375 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1377 /* X86_TUNE_SHIFT1 */
1380 /* X86_TUNE_USE_FFREEP */
1383 /* X86_TUNE_INTER_UNIT_MOVES */
1384 ~(m_AMD_MULTIPLE | m_GENERIC),
1386 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1389 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1390 than 4 branch instructions in the 16 byte window. */
1391 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_SCHEDULE */
1394 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1396 /* X86_TUNE_USE_BT */
1397 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1399 /* X86_TUNE_USE_INCDEC */
1400 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1402 /* X86_TUNE_PAD_RETURNS */
1403 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1405 /* X86_TUNE_EXT_80387_CONSTANTS */
1406 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1408 /* X86_TUNE_SHORTEN_X87_SSE */
1411 /* X86_TUNE_AVOID_VECTOR_DECODE */
1414 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1415 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1418 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1419 vector path on AMD machines. */
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1424 m_K8 | m_GENERIC64 | m_AMDFAM10,
1426 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1430 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1431 but one byte longer. */
1434 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1435 operand that cannot be represented using a modRM byte. The XOR
1436 replacement is long decoded, so this split helps here as well. */
1439 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1440 from integer to FP. */
1443 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1444 with a subsequent conditional jump instruction into a single
1445 compare-and-branch uop. */
1449 /* Feature tests against the various architecture variations. */
1450 unsigned char ix86_arch_features[X86_ARCH_LAST];
1452 /* Feature tests against the various architecture variations, used to create
1453 ix86_arch_features based on the processor mask. */
1454 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1455 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1456 ~(m_386 | m_486 | m_PENT | m_K6),
1458 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1461 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1464 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1467 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1471 static const unsigned int x86_accumulate_outgoing_args
1472 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1474 static const unsigned int x86_arch_always_fancy_math_387
1475 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1476 | m_NOCONA | m_CORE2 | m_GENERIC;
1478 static enum stringop_alg stringop_alg = no_stringop;
1480 /* In case the average insn count for single function invocation is
1481 lower than this constant, emit fast (but longer) prologue and
1483 #define FAST_PROLOGUE_INSN_COUNT 20
1485 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1486 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1487 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1488 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1490 /* Array of the smallest class containing reg number REGNO, indexed by
1491 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1493 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1495 /* ax, dx, cx, bx */
1496 AREG, DREG, CREG, BREG,
1497 /* si, di, bp, sp */
1498 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1500 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1501 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1504 /* flags, fpsr, fpcr, frame */
1505 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1507 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1510 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1513 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1514 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1515 /* SSE REX registers */
1516 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1520 /* The "default" register map used in 32bit mode. */
1522 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1524 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1525 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1526 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1527 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1528 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1529 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1530 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1533 static int const x86_64_int_parameter_registers[6] =
1535 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1536 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1539 static int const x86_64_ms_abi_int_parameter_registers[4] =
1541 2 /*RCX*/, 1 /*RDX*/,
1542 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1545 static int const x86_64_int_return_registers[4] =
1547 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1550 /* The "default" register map used in 64bit mode. */
1551 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1553 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1554 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1555 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1556 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1557 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1558 8,9,10,11,12,13,14,15, /* extended integer registers */
1559 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1562 /* Define the register numbers to be used in Dwarf debugging information.
1563 The SVR4 reference port C compiler uses the following register numbers
1564 in its Dwarf output code:
1565 0 for %eax (gcc regno = 0)
1566 1 for %ecx (gcc regno = 2)
1567 2 for %edx (gcc regno = 1)
1568 3 for %ebx (gcc regno = 3)
1569 4 for %esp (gcc regno = 7)
1570 5 for %ebp (gcc regno = 6)
1571 6 for %esi (gcc regno = 4)
1572 7 for %edi (gcc regno = 5)
1573 The following three DWARF register numbers are never generated by
1574 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1575 believes these numbers have these meanings.
1576 8 for %eip (no gcc equivalent)
1577 9 for %eflags (gcc regno = 17)
1578 10 for %trapno (no gcc equivalent)
1579 It is not at all clear how we should number the FP stack registers
1580 for the x86 architecture. If the version of SDB on x86/svr4 were
1581 a bit less brain dead with respect to floating-point then we would
1582 have a precedent to follow with respect to DWARF register numbers
1583 for x86 FP registers, but the SDB on x86/svr4 is so completely
1584 broken with respect to FP registers that it is hardly worth thinking
1585 of it as something to strive for compatibility with.
1586 The version of x86/svr4 SDB I have at the moment does (partially)
1587 seem to believe that DWARF register number 11 is associated with
1588 the x86 register %st(0), but that's about all. Higher DWARF
1589 register numbers don't seem to be associated with anything in
1590 particular, and even for DWARF regno 11, SDB only seems to under-
1591 stand that it should say that a variable lives in %st(0) (when
1592 asked via an `=' command) if we said it was in DWARF regno 11,
1593 but SDB still prints garbage when asked for the value of the
1594 variable in question (via a `/' command).
1595 (Also note that the labels SDB prints for various FP stack regs
1596 when doing an `x' command are all wrong.)
1597 Note that these problems generally don't affect the native SVR4
1598 C compiler because it doesn't allow the use of -O with -g and
1599 because when it is *not* optimizing, it allocates a memory
1600 location for each floating-point variable, and the memory
1601 location is what gets described in the DWARF AT_location
1602 attribute for the variable in question.
1603 Regardless of the severe mental illness of the x86/svr4 SDB, we
1604 do something sensible here and we use the following DWARF
1605 register numbers. Note that these are all stack-top-relative
1607 11 for %st(0) (gcc regno = 8)
1608 12 for %st(1) (gcc regno = 9)
1609 13 for %st(2) (gcc regno = 10)
1610 14 for %st(3) (gcc regno = 11)
1611 15 for %st(4) (gcc regno = 12)
1612 16 for %st(5) (gcc regno = 13)
1613 17 for %st(6) (gcc regno = 14)
1614 18 for %st(7) (gcc regno = 15)
1616 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1618 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1619 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1620 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1627 /* Test and compare insns in i386.md store the information needed to
1628 generate branch and scc insns here. */
1630 rtx ix86_compare_op0 = NULL_RTX;
1631 rtx ix86_compare_op1 = NULL_RTX;
1632 rtx ix86_compare_emitted = NULL_RTX;
1634 /* Size of the register save area. */
1635 #define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
1637 /* Define the structure for the machine field in struct function. */
1639 struct stack_local_entry GTY(())
1641 unsigned short mode;
1644 struct stack_local_entry *next;
1647 /* Structure describing stack frame layout.
1648 Stack grows downward:
1654 saved frame pointer if frame_pointer_needed
1655 <- HARD_FRAME_POINTER
1660 [va_arg registers] (
1661 > to_allocate <- FRAME_POINTER
1671 HOST_WIDE_INT frame;
1673 int outgoing_arguments_size;
1676 HOST_WIDE_INT to_allocate;
1677 /* The offsets relative to ARG_POINTER. */
1678 HOST_WIDE_INT frame_pointer_offset;
1679 HOST_WIDE_INT hard_frame_pointer_offset;
1680 HOST_WIDE_INT stack_pointer_offset;
1682 /* When save_regs_using_mov is set, emit prologue using
1683 move instead of push instructions. */
1684 bool save_regs_using_mov;
1687 /* Code model option. */
1688 enum cmodel ix86_cmodel;
1690 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1692 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1694 /* Which unit we are generating floating point math for. */
1695 enum fpmath_unit ix86_fpmath;
1697 /* Which cpu are we scheduling for. */
1698 enum processor_type ix86_tune;
1700 /* Which instruction set architecture to use. */
1701 enum processor_type ix86_arch;
1703 /* true if sse prefetch instruction is not NOOP. */
1704 int x86_prefetch_sse;
1706 /* ix86_regparm_string as a number */
1707 static int ix86_regparm;
1709 /* -mstackrealign option */
1710 extern int ix86_force_align_arg_pointer;
1711 static const char ix86_force_align_arg_pointer_string[]
1712 = "force_align_arg_pointer";
1714 static rtx (*ix86_gen_leave) (void);
1715 static rtx (*ix86_gen_pop1) (rtx);
1716 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1717 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1718 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1719 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1720 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1721 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1723 /* Preferred alignment for stack boundary in bits. */
1724 unsigned int ix86_preferred_stack_boundary;
1726 /* Alignment for incoming stack boundary in bits specified at
1728 static unsigned int ix86_user_incoming_stack_boundary;
1730 /* Default alignment for incoming stack boundary in bits. */
1731 static unsigned int ix86_default_incoming_stack_boundary;
1733 /* Alignment for incoming stack boundary in bits. */
1734 unsigned int ix86_incoming_stack_boundary;
1736 /* Values 1-5: see jump.c */
1737 int ix86_branch_cost;
1739 /* Calling abi specific va_list type nodes. */
1740 static GTY(()) tree sysv_va_list_type_node;
1741 static GTY(()) tree ms_va_list_type_node;
1743 /* Variables which are this size or smaller are put in the data/bss
1744 or ldata/lbss sections. */
1746 int ix86_section_threshold = 65536;
1748 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1749 char internal_label_prefix[16];
1750 int internal_label_prefix_len;
1752 /* Fence to use after loop using movnt. */
1755 /* Register class used for passing given 64bit part of the argument.
1756 These represent classes as documented by the PS ABI, with the exception
1757 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1758 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1760 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1761 whenever possible (upper half does contain padding). */
1762 enum x86_64_reg_class
1765 X86_64_INTEGER_CLASS,
1766 X86_64_INTEGERSI_CLASS,
1774 X86_64_COMPLEX_X87_CLASS,
1777 static const char * const x86_64_reg_class_name[] =
1779 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1780 "sseup", "x87", "x87up", "cplx87", "no"
1783 #define MAX_CLASSES 4
1785 /* Table of constants used by fldpi, fldln2, etc.... */
1786 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1787 static bool ext_80387_constants_init = 0;
1790 static struct machine_function * ix86_init_machine_status (void);
1791 static rtx ix86_function_value (const_tree, const_tree, bool);
1792 static int ix86_function_regparm (const_tree, const_tree);
1793 static void ix86_compute_frame_layout (struct ix86_frame *);
1794 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1797 enum ix86_function_specific_strings
1799 IX86_FUNCTION_SPECIFIC_ARCH,
1800 IX86_FUNCTION_SPECIFIC_TUNE,
1801 IX86_FUNCTION_SPECIFIC_FPMATH,
1802 IX86_FUNCTION_SPECIFIC_MAX
1805 static char *ix86_target_string (int, int, const char *, const char *,
1806 const char *, bool);
1807 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1808 static void ix86_function_specific_save (struct cl_target_option *);
1809 static void ix86_function_specific_restore (struct cl_target_option *);
1810 static void ix86_function_specific_print (FILE *, int,
1811 struct cl_target_option *);
1812 static bool ix86_valid_option_attribute_p (tree, tree, tree, int);
1813 static bool ix86_valid_option_attribute_inner_p (tree, char *[]);
1814 static bool ix86_can_inline_p (tree, tree);
1815 static void ix86_set_current_function (tree);
1818 /* The svr4 ABI for the i386 says that records and unions are returned
1820 #ifndef DEFAULT_PCC_STRUCT_RETURN
1821 #define DEFAULT_PCC_STRUCT_RETURN 1
1824 /* Whether -mtune= or -march= were specified */
1825 static int ix86_tune_defaulted;
1826 static int ix86_arch_specified;
1828 /* Bit flags that specify the ISA we are compiling for. */
1829 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1831 /* A mask of ix86_isa_flags that includes bit X if X
1832 was set or cleared on the command line. */
1833 static int ix86_isa_flags_explicit;
1835 /* Define a set of ISAs which are available when a given ISA is
1836 enabled. MMX and SSE ISAs are handled separately. */
1838 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1839 #define OPTION_MASK_ISA_3DNOW_SET \
1840 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1842 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1843 #define OPTION_MASK_ISA_SSE2_SET \
1844 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1845 #define OPTION_MASK_ISA_SSE3_SET \
1846 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1847 #define OPTION_MASK_ISA_SSSE3_SET \
1848 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1849 #define OPTION_MASK_ISA_SSE4_1_SET \
1850 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1851 #define OPTION_MASK_ISA_SSE4_2_SET \
1852 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1853 #define OPTION_MASK_ISA_AVX_SET \
1854 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1855 #define OPTION_MASK_ISA_FMA_SET \
1856 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1858 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1860 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1862 #define OPTION_MASK_ISA_SSE4A_SET \
1863 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1864 #define OPTION_MASK_ISA_SSE5_SET \
1865 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1867 /* AES and PCLMUL need SSE2 because they use xmm registers */
1868 #define OPTION_MASK_ISA_AES_SET \
1869 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1870 #define OPTION_MASK_ISA_PCLMUL_SET \
1871 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1873 #define OPTION_MASK_ISA_ABM_SET \
1874 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1875 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1876 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1877 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1879 /* Define a set of ISAs which aren't available when a given ISA is
1880 disabled. MMX and SSE ISAs are handled separately. */
1882 #define OPTION_MASK_ISA_MMX_UNSET \
1883 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1884 #define OPTION_MASK_ISA_3DNOW_UNSET \
1885 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1886 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1888 #define OPTION_MASK_ISA_SSE_UNSET \
1889 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1890 #define OPTION_MASK_ISA_SSE2_UNSET \
1891 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1892 #define OPTION_MASK_ISA_SSE3_UNSET \
1893 (OPTION_MASK_ISA_SSE3 \
1894 | OPTION_MASK_ISA_SSSE3_UNSET \
1895 | OPTION_MASK_ISA_SSE4A_UNSET )
1896 #define OPTION_MASK_ISA_SSSE3_UNSET \
1897 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1898 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1899 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1900 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1901 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1902 #define OPTION_MASK_ISA_AVX_UNSET \
1903 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1904 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1906 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1908 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1910 #define OPTION_MASK_ISA_SSE4A_UNSET \
1911 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1912 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1913 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1914 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1915 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1916 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1917 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1918 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1920 /* Vectorization library interface and handlers. */
1921 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1922 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1923 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1925 /* Processor target table, indexed by processor number */
1928 const struct processor_costs *cost; /* Processor costs */
1929 const int align_loop; /* Default alignments. */
1930 const int align_loop_max_skip;
1931 const int align_jump;
1932 const int align_jump_max_skip;
1933 const int align_func;
1936 static const struct ptt processor_target_table[PROCESSOR_max] =
1938 {&i386_cost, 4, 3, 4, 3, 4},
1939 {&i486_cost, 16, 15, 16, 15, 16},
1940 {&pentium_cost, 16, 7, 16, 7, 16},
1941 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1942 {&geode_cost, 0, 0, 0, 0, 0},
1943 {&k6_cost, 32, 7, 32, 7, 32},
1944 {&athlon_cost, 16, 7, 16, 7, 16},
1945 {&pentium4_cost, 0, 0, 0, 0, 0},
1946 {&k8_cost, 16, 7, 16, 7, 16},
1947 {&nocona_cost, 0, 0, 0, 0, 0},
1948 {&core2_cost, 16, 10, 16, 10, 16},
1949 {&generic32_cost, 16, 7, 16, 7, 16},
1950 {&generic64_cost, 16, 10, 16, 10, 16},
1951 {&amdfam10_cost, 32, 24, 32, 7, 32}
1954 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1979 /* Implement TARGET_HANDLE_OPTION. */
1982 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1989 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1990 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1994 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1995 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2002 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2003 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2007 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2008 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2018 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2019 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2023 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2024 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2031 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2032 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2036 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2037 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2044 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2045 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2049 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2050 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2057 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2058 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2062 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2063 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2070 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2071 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2075 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2076 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2083 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2084 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2088 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2089 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2096 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2097 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2101 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2102 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2109 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2110 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2114 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2115 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2120 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2121 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2125 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2126 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2132 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2133 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2137 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2138 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2145 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2146 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2150 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2151 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2158 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2159 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2163 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2164 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2171 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2172 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2176 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2177 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2184 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2185 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2189 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2190 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2197 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2198 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2202 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2203 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2210 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2211 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2215 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2216 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2223 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2224 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2228 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2238 /* Return a string the documents the current -m options. The caller is
2239 responsible for freeing the string. */
2242 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2243 const char *fpmath, bool add_nl_p)
2245 struct ix86_target_opts
2247 const char *option; /* option string */
2248 int mask; /* isa mask options */
2251 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2252 preceding options while match those first. */
2253 static struct ix86_target_opts isa_opts[] =
2255 { "-m64", OPTION_MASK_ISA_64BIT },
2256 { "-msse5", OPTION_MASK_ISA_SSE5 },
2257 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2258 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2259 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2260 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2261 { "-msse3", OPTION_MASK_ISA_SSE3 },
2262 { "-msse2", OPTION_MASK_ISA_SSE2 },
2263 { "-msse", OPTION_MASK_ISA_SSE },
2264 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2265 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2266 { "-mmmx", OPTION_MASK_ISA_MMX },
2267 { "-mabm", OPTION_MASK_ISA_ABM },
2268 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2269 { "-maes", OPTION_MASK_ISA_AES },
2270 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2274 static struct ix86_target_opts flag_opts[] =
2276 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2277 { "-m80387", MASK_80387 },
2278 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2279 { "-malign-double", MASK_ALIGN_DOUBLE },
2280 { "-mcld", MASK_CLD },
2281 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2282 { "-mieee-fp", MASK_IEEE_FP },
2283 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2284 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2285 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2286 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2287 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2288 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2289 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2290 { "-mno-red-zone", MASK_NO_RED_ZONE },
2291 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2292 { "-mrecip", MASK_RECIP },
2293 { "-mrtd", MASK_RTD },
2294 { "-msseregparm", MASK_SSEREGPARM },
2295 { "-mstack-arg-probe", MASK_STACK_PROBE },
2296 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2299 const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0])
2300 + sizeof (flag_opts) / sizeof (flag_opts[0])
2304 char target_other[40];
2313 memset (opts, '\0', sizeof (opts));
2315 /* Add -march= option. */
2318 opts[num][0] = "-march=";
2319 opts[num++][1] = arch;
2322 /* Add -mtune= option. */
2325 opts[num][0] = "-mtune=";
2326 opts[num++][1] = tune;
2329 /* Pick out the options in isa options. */
2330 for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++)
2332 if ((isa & isa_opts[i].mask) != 0)
2334 opts[num++][0] = isa_opts[i].option;
2335 isa &= ~ isa_opts[i].mask;
2339 if (isa && add_nl_p)
2341 opts[num++][0] = isa_other;
2342 sprintf (isa_other, "(other isa: 0x%x)", isa);
2345 /* Add flag options. */
2346 for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++)
2348 if ((flags & flag_opts[i].mask) != 0)
2350 opts[num++][0] = flag_opts[i].option;
2351 flags &= ~ flag_opts[i].mask;
2355 if (flags && add_nl_p)
2357 opts[num++][0] = target_other;
2358 sprintf (target_other, "(other flags: 0x%x)", isa);
2361 /* Add -fpmath= option. */
2364 opts[num][0] = "-mfpmath=";
2365 opts[num++][1] = fpmath;
2372 gcc_assert (num < sizeof (opts) / sizeof (opts[0]));
2374 /* Size the string. */
2376 sep_len = (add_nl_p) ? 3 : 1;
2377 for (i = 0; i < num; i++)
2380 for (j = 0; j < 2; j++)
2382 len += strlen (opts[i][j]);
2385 /* Build the string. */
2386 ret = ptr = (char *) xmalloc (len);
2389 for (i = 0; i < num; i++)
2393 for (j = 0; j < 2; j++)
2394 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2401 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2409 for (j = 0; j < 2; j++)
2412 memcpy (ptr, opts[i][j], len2[j]);
2414 line_len += len2[j];
2419 gcc_assert (ret + len >= ptr);
2424 /* Function that is callable from the debugger to print the current
2427 ix86_debug_options (void)
2429 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2430 ix86_arch_string, ix86_tune_string,
2431 ix86_fpmath_string, true);
2435 fprintf (stderr, "%s\n\n", opts);
2439 fprintf (stderr, "<no options>\n\n");
2444 /* Sometimes certain combinations of command options do not make
2445 sense on a particular target machine. You can define a macro
2446 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2447 defined, is executed once just after all the command options have
2450 Don't use this macro to turn on various extra optimizations for
2451 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2454 override_options (bool main_args_p)
2457 unsigned int ix86_arch_mask, ix86_tune_mask;
2462 /* Comes from final.c -- no real reason to change it. */
2463 #define MAX_CODE_ALIGN 16
2471 PTA_PREFETCH_SSE = 1 << 4,
2473 PTA_3DNOW_A = 1 << 6,
2477 PTA_POPCNT = 1 << 10,
2479 PTA_SSE4A = 1 << 12,
2480 PTA_NO_SAHF = 1 << 13,
2481 PTA_SSE4_1 = 1 << 14,
2482 PTA_SSE4_2 = 1 << 15,
2485 PTA_PCLMUL = 1 << 18,
2492 const char *const name; /* processor name or nickname. */
2493 const enum processor_type processor;
2494 const unsigned /*enum pta_flags*/ flags;
2496 const processor_alias_table[] =
2498 {"i386", PROCESSOR_I386, 0},
2499 {"i486", PROCESSOR_I486, 0},
2500 {"i586", PROCESSOR_PENTIUM, 0},
2501 {"pentium", PROCESSOR_PENTIUM, 0},
2502 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2503 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2504 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2505 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2506 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2507 {"i686", PROCESSOR_PENTIUMPRO, 0},
2508 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2509 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2510 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2511 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2512 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2513 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2514 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2515 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2516 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2517 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2518 | PTA_CX16 | PTA_NO_SAHF)},
2519 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2520 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2523 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2524 |PTA_PREFETCH_SSE)},
2525 {"k6", PROCESSOR_K6, PTA_MMX},
2526 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2527 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2528 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2529 | PTA_PREFETCH_SSE)},
2530 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2531 | PTA_PREFETCH_SSE)},
2532 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2534 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2536 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2538 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2539 | PTA_MMX | PTA_SSE | PTA_SSE2
2541 {"k8", PROCESSOR_K8, (PTA_64BIT
2542 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2543 | PTA_SSE | PTA_SSE2
2545 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2546 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2547 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2549 {"opteron", PROCESSOR_K8, (PTA_64BIT
2550 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2551 | PTA_SSE | PTA_SSE2
2553 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2554 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2555 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2557 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2558 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2559 | PTA_SSE | PTA_SSE2
2561 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2562 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2563 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2565 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2566 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2567 | PTA_SSE | PTA_SSE2
2569 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2570 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2571 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2573 | PTA_CX16 | PTA_ABM)},
2574 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2575 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2576 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2578 | PTA_CX16 | PTA_ABM)},
2579 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2580 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2583 int const pta_size = ARRAY_SIZE (processor_alias_table);
2585 /* Set up prefix/suffix so the error messages refer to either the command
2586 line argument, or the attribute(option). */
2595 prefix = "option(\"";
2600 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2601 SUBTARGET_OVERRIDE_OPTIONS;
2604 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2605 SUBSUBTARGET_OVERRIDE_OPTIONS;
2608 /* -fPIC is the default for x86_64. */
2609 if (TARGET_MACHO && TARGET_64BIT)
2612 /* Set the default values for switches whose default depends on TARGET_64BIT
2613 in case they weren't overwritten by command line options. */
2616 /* Mach-O doesn't support omitting the frame pointer for now. */
2617 if (flag_omit_frame_pointer == 2)
2618 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2619 if (flag_asynchronous_unwind_tables == 2)
2620 flag_asynchronous_unwind_tables = 1;
2621 if (flag_pcc_struct_return == 2)
2622 flag_pcc_struct_return = 0;
2626 if (flag_omit_frame_pointer == 2)
2627 flag_omit_frame_pointer = 0;
2628 if (flag_asynchronous_unwind_tables == 2)
2629 flag_asynchronous_unwind_tables = 0;
2630 if (flag_pcc_struct_return == 2)
2631 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2634 /* Need to check -mtune=generic first. */
2635 if (ix86_tune_string)
2637 if (!strcmp (ix86_tune_string, "generic")
2638 || !strcmp (ix86_tune_string, "i686")
2639 /* As special support for cross compilers we read -mtune=native
2640 as -mtune=generic. With native compilers we won't see the
2641 -mtune=native, as it was changed by the driver. */
2642 || !strcmp (ix86_tune_string, "native"))
2645 ix86_tune_string = "generic64";
2647 ix86_tune_string = "generic32";
2649 /* If this call is for setting the option attribute, allow the
2650 generic32/generic64 that was previously set. */
2651 else if (!main_args_p
2652 && (!strcmp (ix86_tune_string, "generic32")
2653 || !strcmp (ix86_tune_string, "generic64")))
2655 else if (!strncmp (ix86_tune_string, "generic", 7))
2656 error ("bad value (%s) for %stune=%s %s",
2657 ix86_tune_string, prefix, suffix, sw);
2661 if (ix86_arch_string)
2662 ix86_tune_string = ix86_arch_string;
2663 if (!ix86_tune_string)
2665 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2666 ix86_tune_defaulted = 1;
2669 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2670 need to use a sensible tune option. */
2671 if (!strcmp (ix86_tune_string, "generic")
2672 || !strcmp (ix86_tune_string, "x86-64")
2673 || !strcmp (ix86_tune_string, "i686"))
2676 ix86_tune_string = "generic64";
2678 ix86_tune_string = "generic32";
2681 if (ix86_stringop_string)
2683 if (!strcmp (ix86_stringop_string, "rep_byte"))
2684 stringop_alg = rep_prefix_1_byte;
2685 else if (!strcmp (ix86_stringop_string, "libcall"))
2686 stringop_alg = libcall;
2687 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2688 stringop_alg = rep_prefix_4_byte;
2689 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2690 stringop_alg = rep_prefix_8_byte;
2691 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2692 stringop_alg = loop_1_byte;
2693 else if (!strcmp (ix86_stringop_string, "loop"))
2694 stringop_alg = loop;
2695 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2696 stringop_alg = unrolled_loop;
2698 error ("bad value (%s) for %sstringop-strategy=%s %s",
2699 ix86_stringop_string, prefix, suffix, sw);
2701 if (!strcmp (ix86_tune_string, "x86-64"))
2702 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2703 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2704 prefix, suffix, prefix, suffix, prefix, suffix);
2706 if (!ix86_arch_string)
2707 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2709 ix86_arch_specified = 1;
2711 if (!strcmp (ix86_arch_string, "generic"))
2712 error ("generic CPU can be used only for %stune=%s %s",
2713 prefix, suffix, sw);
2714 if (!strncmp (ix86_arch_string, "generic", 7))
2715 error ("bad value (%s) for %sarch=%s %s",
2716 ix86_arch_string, prefix, suffix, sw);
2718 if (ix86_cmodel_string != 0)
2720 if (!strcmp (ix86_cmodel_string, "small"))
2721 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2722 else if (!strcmp (ix86_cmodel_string, "medium"))
2723 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2724 else if (!strcmp (ix86_cmodel_string, "large"))
2725 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2727 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2728 else if (!strcmp (ix86_cmodel_string, "32"))
2729 ix86_cmodel = CM_32;
2730 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2731 ix86_cmodel = CM_KERNEL;
2733 error ("bad value (%s) for %scmodel=%s %s",
2734 ix86_cmodel_string, prefix, suffix, sw);
2738 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2739 use of rip-relative addressing. This eliminates fixups that
2740 would otherwise be needed if this object is to be placed in a
2741 DLL, and is essentially just as efficient as direct addressing. */
2742 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2743 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2744 else if (TARGET_64BIT)
2745 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2747 ix86_cmodel = CM_32;
2749 if (ix86_asm_string != 0)
2752 && !strcmp (ix86_asm_string, "intel"))
2753 ix86_asm_dialect = ASM_INTEL;
2754 else if (!strcmp (ix86_asm_string, "att"))
2755 ix86_asm_dialect = ASM_ATT;
2757 error ("bad value (%s) for %sasm=%s %s",
2758 ix86_asm_string, prefix, suffix, sw);
2760 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2761 error ("code model %qs not supported in the %s bit mode",
2762 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2763 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2764 sorry ("%i-bit mode not compiled in",
2765 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2767 for (i = 0; i < pta_size; i++)
2768 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2770 ix86_arch = processor_alias_table[i].processor;
2771 /* Default cpu tuning to the architecture. */
2772 ix86_tune = ix86_arch;
2774 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2775 error ("CPU you selected does not support x86-64 "
2778 if (processor_alias_table[i].flags & PTA_MMX
2779 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2780 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2781 if (processor_alias_table[i].flags & PTA_3DNOW
2782 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2783 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2784 if (processor_alias_table[i].flags & PTA_3DNOW_A
2785 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2786 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2787 if (processor_alias_table[i].flags & PTA_SSE
2788 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2789 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2790 if (processor_alias_table[i].flags & PTA_SSE2
2791 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2792 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2793 if (processor_alias_table[i].flags & PTA_SSE3
2794 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2795 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2796 if (processor_alias_table[i].flags & PTA_SSSE3
2797 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2798 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2799 if (processor_alias_table[i].flags & PTA_SSE4_1
2800 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2801 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2802 if (processor_alias_table[i].flags & PTA_SSE4_2
2803 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2804 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2805 if (processor_alias_table[i].flags & PTA_AVX
2806 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2807 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2808 if (processor_alias_table[i].flags & PTA_FMA
2809 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2810 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2811 if (processor_alias_table[i].flags & PTA_SSE4A
2812 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2813 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2814 if (processor_alias_table[i].flags & PTA_SSE5
2815 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2816 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2817 if (processor_alias_table[i].flags & PTA_ABM
2818 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2819 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2820 if (processor_alias_table[i].flags & PTA_CX16
2821 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2822 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2823 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2824 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2825 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2826 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2827 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2828 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2829 if (processor_alias_table[i].flags & PTA_AES
2830 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2831 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2832 if (processor_alias_table[i].flags & PTA_PCLMUL
2833 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2834 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2835 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2836 x86_prefetch_sse = true;
2842 error ("bad value (%s) for %sarch=%s %s",
2843 ix86_arch_string, prefix, suffix, sw);
2845 ix86_arch_mask = 1u << ix86_arch;
2846 for (i = 0; i < X86_ARCH_LAST; ++i)
2847 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2849 for (i = 0; i < pta_size; i++)
2850 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2852 ix86_tune = processor_alias_table[i].processor;
2853 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2855 if (ix86_tune_defaulted)
2857 ix86_tune_string = "x86-64";
2858 for (i = 0; i < pta_size; i++)
2859 if (! strcmp (ix86_tune_string,
2860 processor_alias_table[i].name))
2862 ix86_tune = processor_alias_table[i].processor;
2865 error ("CPU you selected does not support x86-64 "
2868 /* Intel CPUs have always interpreted SSE prefetch instructions as
2869 NOPs; so, we can enable SSE prefetch instructions even when
2870 -mtune (rather than -march) points us to a processor that has them.
2871 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2872 higher processors. */
2874 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2875 x86_prefetch_sse = true;
2879 error ("bad value (%s) for %stune=%s %s",
2880 ix86_tune_string, prefix, suffix, sw);
2882 ix86_tune_mask = 1u << ix86_tune;
2883 for (i = 0; i < X86_TUNE_LAST; ++i)
2884 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2887 ix86_cost = &ix86_size_cost;
2889 ix86_cost = processor_target_table[ix86_tune].cost;
2891 /* Arrange to set up i386_stack_locals for all functions. */
2892 init_machine_status = ix86_init_machine_status;
2894 /* Validate -mregparm= value. */
2895 if (ix86_regparm_string)
2898 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2899 i = atoi (ix86_regparm_string);
2900 if (i < 0 || i > REGPARM_MAX)
2901 error ("%sregparm=%d%s is not between 0 and %d",
2902 prefix, i, suffix, REGPARM_MAX);
2907 ix86_regparm = REGPARM_MAX;
2909 /* If the user has provided any of the -malign-* options,
2910 warn and use that value only if -falign-* is not set.
2911 Remove this code in GCC 3.2 or later. */
2912 if (ix86_align_loops_string)
2914 warning (0, "%salign-loops%s is obsolete, use %salign-loops%s",
2915 prefix, suffix, prefix, suffix);
2916 if (align_loops == 0)
2918 i = atoi (ix86_align_loops_string);
2919 if (i < 0 || i > MAX_CODE_ALIGN)
2920 error ("%salign-loops=%d%s is not between 0 and %d",
2921 prefix, i, suffix, MAX_CODE_ALIGN);
2923 align_loops = 1 << i;
2927 if (ix86_align_jumps_string)
2929 warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s",
2930 prefix, suffix, prefix, suffix);
2931 if (align_jumps == 0)
2933 i = atoi (ix86_align_jumps_string);
2934 if (i < 0 || i > MAX_CODE_ALIGN)
2935 error ("%salign-loops=%d%s is not between 0 and %d",
2936 prefix, i, suffix, MAX_CODE_ALIGN);
2938 align_jumps = 1 << i;
2942 if (ix86_align_funcs_string)
2944 warning (0, "%salign-functions%s is obsolete, use %salign-functions%s",
2945 prefix, suffix, prefix, suffix);
2946 if (align_functions == 0)
2948 i = atoi (ix86_align_funcs_string);
2949 if (i < 0 || i > MAX_CODE_ALIGN)
2950 error ("%salign-loops=%d%s is not between 0 and %d",
2951 prefix, i, suffix, MAX_CODE_ALIGN);
2953 align_functions = 1 << i;
2957 /* Default align_* from the processor table. */
2958 if (align_loops == 0)
2960 align_loops = processor_target_table[ix86_tune].align_loop;
2961 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2963 if (align_jumps == 0)
2965 align_jumps = processor_target_table[ix86_tune].align_jump;
2966 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2968 if (align_functions == 0)
2970 align_functions = processor_target_table[ix86_tune].align_func;
2973 /* Validate -mbranch-cost= value, or provide default. */
2974 ix86_branch_cost = ix86_cost->branch_cost;
2975 if (ix86_branch_cost_string)
2977 i = atoi (ix86_branch_cost_string);
2979 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2981 ix86_branch_cost = i;
2983 if (ix86_section_threshold_string)
2985 i = atoi (ix86_section_threshold_string);
2987 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2989 ix86_section_threshold = i;
2992 if (ix86_tls_dialect_string)
2994 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2995 ix86_tls_dialect = TLS_DIALECT_GNU;
2996 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2997 ix86_tls_dialect = TLS_DIALECT_GNU2;
2998 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2999 ix86_tls_dialect = TLS_DIALECT_SUN;
3001 error ("bad value (%s) for %stls-dialect=%s %s",
3002 ix86_tls_dialect_string, prefix, suffix, sw);
3005 if (ix87_precision_string)
3007 i = atoi (ix87_precision_string);
3008 if (i != 32 && i != 64 && i != 80)
3009 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3014 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3016 /* Enable by default the SSE and MMX builtins. Do allow the user to
3017 explicitly disable any of these. In particular, disabling SSE and
3018 MMX for kernel code is extremely useful. */
3019 if (!ix86_arch_specified)
3021 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3022 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3025 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3029 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3031 if (!ix86_arch_specified)
3033 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3035 /* i386 ABI does not specify red zone. It still makes sense to use it
3036 when programmer takes care to stack from being destroyed. */
3037 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3038 target_flags |= MASK_NO_RED_ZONE;
3041 /* Keep nonleaf frame pointers. */
3042 if (flag_omit_frame_pointer)
3043 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3044 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3045 flag_omit_frame_pointer = 1;
3047 /* If we're doing fast math, we don't care about comparison order
3048 wrt NaNs. This lets us use a shorter comparison sequence. */
3049 if (flag_finite_math_only)
3050 target_flags &= ~MASK_IEEE_FP;
3052 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3053 since the insns won't need emulation. */
3054 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3055 target_flags &= ~MASK_NO_FANCY_MATH_387;
3057 /* Likewise, if the target doesn't have a 387, or we've specified
3058 software floating point, don't use 387 inline intrinsics. */
3060 target_flags |= MASK_NO_FANCY_MATH_387;
3062 /* Turn on MMX builtins for -msse. */
3065 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3066 x86_prefetch_sse = true;
3069 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3070 if (TARGET_SSE4_2 || TARGET_ABM)
3071 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3073 /* Validate -mpreferred-stack-boundary= value or default it to
3074 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3075 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3076 if (ix86_preferred_stack_boundary_string)
3078 i = atoi (ix86_preferred_stack_boundary_string);
3079 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3080 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3081 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3083 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3086 /* Set the default value for -mstackrealign. */
3087 if (ix86_force_align_arg_pointer == -1)
3088 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3090 /* Validate -mincoming-stack-boundary= value or default it to
3091 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3092 if (ix86_force_align_arg_pointer)
3093 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3095 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3096 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3097 if (ix86_incoming_stack_boundary_string)
3099 i = atoi (ix86_incoming_stack_boundary_string);
3100 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3101 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3102 i, TARGET_64BIT ? 4 : 2);
3105 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3106 ix86_incoming_stack_boundary
3107 = ix86_user_incoming_stack_boundary;
3111 /* Accept -msseregparm only if at least SSE support is enabled. */
3112 if (TARGET_SSEREGPARM
3114 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3116 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3117 if (ix86_fpmath_string != 0)
3119 if (! strcmp (ix86_fpmath_string, "387"))
3120 ix86_fpmath = FPMATH_387;
3121 else if (! strcmp (ix86_fpmath_string, "sse"))
3125 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3126 ix86_fpmath = FPMATH_387;
3129 ix86_fpmath = FPMATH_SSE;
3131 else if (! strcmp (ix86_fpmath_string, "387,sse")
3132 || ! strcmp (ix86_fpmath_string, "387+sse")
3133 || ! strcmp (ix86_fpmath_string, "sse,387")
3134 || ! strcmp (ix86_fpmath_string, "sse+387")
3135 || ! strcmp (ix86_fpmath_string, "both"))
3139 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3140 ix86_fpmath = FPMATH_387;
3142 else if (!TARGET_80387)
3144 warning (0, "387 instruction set disabled, using SSE arithmetics");
3145 ix86_fpmath = FPMATH_SSE;
3148 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3151 error ("bad value (%s) for %sfpmath=%s %s",
3152 ix86_fpmath_string, prefix, suffix, sw);
3155 /* If the i387 is disabled, then do not return values in it. */
3157 target_flags &= ~MASK_FLOAT_RETURNS;
3159 /* Use external vectorized library in vectorizing intrinsics. */
3160 if (ix86_veclibabi_string)
3162 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3163 ix86_veclib_handler = ix86_veclibabi_svml;
3164 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3165 ix86_veclib_handler = ix86_veclibabi_acml;
3167 error ("unknown vectorization library ABI type (%s) for "
3168 "%sveclibabi=%s %s", ix86_veclibabi_string,
3169 prefix, suffix, sw);
3172 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3173 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3175 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3177 /* ??? Unwind info is not correct around the CFG unless either a frame
3178 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3179 unwind info generation to be aware of the CFG and propagating states
3181 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3182 || flag_exceptions || flag_non_call_exceptions)
3183 && flag_omit_frame_pointer
3184 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3186 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3187 warning (0, "unwind tables currently require either a frame pointer "
3188 "or %saccumulate-outgoing-args%s for correctness",
3190 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3193 /* If stack probes are required, the space used for large function
3194 arguments on the stack must also be probed, so enable
3195 -maccumulate-outgoing-args so this happens in the prologue. */
3196 if (TARGET_STACK_PROBE
3197 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3199 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3200 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3201 "for correctness", prefix, suffix);
3202 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3205 /* For sane SSE instruction set generation we need fcomi instruction.
3206 It is safe to enable all CMOVE instructions. */
3210 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3213 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3214 p = strchr (internal_label_prefix, 'X');
3215 internal_label_prefix_len = p - internal_label_prefix;
3219 /* When scheduling description is not available, disable scheduler pass
3220 so it won't slow down the compilation and make x87 code slower. */
3221 if (!TARGET_SCHEDULE)
3222 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3224 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3225 set_param_value ("simultaneous-prefetches",
3226 ix86_cost->simultaneous_prefetches);
3227 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3228 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3229 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3230 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3231 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3232 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3234 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3235 can be optimized to ap = __builtin_next_arg (0). */
3237 targetm.expand_builtin_va_start = NULL;
3241 ix86_gen_leave = gen_leave_rex64;
3242 ix86_gen_pop1 = gen_popdi1;
3243 ix86_gen_add3 = gen_adddi3;
3244 ix86_gen_sub3 = gen_subdi3;
3245 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3246 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3247 ix86_gen_monitor = gen_sse3_monitor64;
3248 ix86_gen_andsp = gen_anddi3;
3252 ix86_gen_leave = gen_leave;
3253 ix86_gen_pop1 = gen_popsi1;
3254 ix86_gen_add3 = gen_addsi3;
3255 ix86_gen_sub3 = gen_subsi3;
3256 ix86_gen_sub3_carry = gen_subsi3_carry;
3257 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3258 ix86_gen_monitor = gen_sse3_monitor;
3259 ix86_gen_andsp = gen_andsi3;
3263 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3265 target_flags |= MASK_CLD & ~target_flags_explicit;
3268 /* Save the initial options in case the user does function specific options */
3270 target_option_default_node = target_option_current_node
3271 = build_target_option_node ();
3274 /* Save the current options */
3277 ix86_function_specific_save (struct cl_target_option *ptr)
3279 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3280 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3281 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3282 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3284 ptr->arch = ix86_arch;
3285 ptr->tune = ix86_tune;
3286 ptr->fpmath = ix86_fpmath;
3287 ptr->branch_cost = ix86_branch_cost;
3288 ptr->tune_defaulted = ix86_tune_defaulted;
3289 ptr->arch_specified = ix86_arch_specified;
3290 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3291 ptr->target_flags_explicit = target_flags_explicit;
3294 /* Restore the current options */
3297 ix86_function_specific_restore (struct cl_target_option *ptr)
3299 enum processor_type old_tune = ix86_tune;
3300 enum processor_type old_arch = ix86_arch;
3301 unsigned int ix86_arch_mask, ix86_tune_mask;
3304 ix86_arch = ptr->arch;
3305 ix86_tune = ptr->tune;
3306 ix86_fpmath = ptr->fpmath;
3307 ix86_branch_cost = ptr->branch_cost;
3308 ix86_tune_defaulted = ptr->tune_defaulted;
3309 ix86_arch_specified = ptr->arch_specified;
3310 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3311 target_flags_explicit = ptr->target_flags_explicit;
3313 /* Recreate the arch feature tests if the arch changed */
3314 if (old_arch != ix86_arch)
3316 ix86_arch_mask = 1u << ix86_arch;
3317 for (i = 0; i < X86_ARCH_LAST; ++i)
3318 ix86_arch_features[i]
3319 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3322 /* Recreate the tune optimization tests */
3323 if (old_tune != ix86_tune)
3325 ix86_tune_mask = 1u << ix86_tune;
3326 for (i = 0; i < X86_TUNE_LAST; ++i)
3327 ix86_tune_features[i]
3328 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3332 /* Print the current options */
3335 ix86_function_specific_print (FILE *file, int indent,
3336 struct cl_target_option *ptr)
3339 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3340 NULL, NULL, NULL, false);
3342 fprintf (file, "%*sarch = %d (%s)\n",
3345 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3346 ? cpu_names[ptr->arch]
3349 fprintf (file, "%*stune = %d (%s)\n",
3352 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3353 ? cpu_names[ptr->tune]
3356 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3357 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3358 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3359 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3363 fprintf (file, "%*s%s\n", indent, "", target_string);
3364 free (target_string);
3369 /* Inner function to process the attribute((option(...))), take an argument and
3370 set the current options from the argument. If we have a list, recursively go
3374 ix86_valid_option_attribute_inner_p (tree args, char *p_strings[])
3379 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3380 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3381 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3382 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3397 enum ix86_opt_type type;
3402 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3403 IX86_ATTR_ISA ("abm", OPT_mabm),
3404 IX86_ATTR_ISA ("aes", OPT_maes),
3405 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3406 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3407 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3408 IX86_ATTR_ISA ("sse", OPT_msse),
3409 IX86_ATTR_ISA ("sse2", OPT_msse2),
3410 IX86_ATTR_ISA ("sse3", OPT_msse3),
3411 IX86_ATTR_ISA ("sse4", OPT_msse4),
3412 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3413 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3414 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3415 IX86_ATTR_ISA ("sse5", OPT_msse5),
3416 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3418 /* string options */
3419 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3420 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3421 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3424 IX86_ATTR_YES ("cld",
3428 IX86_ATTR_NO ("fancy-math-387",
3429 OPT_mfancy_math_387,
3430 MASK_NO_FANCY_MATH_387),
3432 IX86_ATTR_NO ("fused-madd",
3434 MASK_NO_FUSED_MADD),
3436 IX86_ATTR_YES ("ieee-fp",
3440 IX86_ATTR_YES ("inline-all-stringops",
3441 OPT_minline_all_stringops,
3442 MASK_INLINE_ALL_STRINGOPS),
3444 IX86_ATTR_YES ("inline-stringops-dynamically",
3445 OPT_minline_stringops_dynamically,
3446 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3448 IX86_ATTR_NO ("align-stringops",
3449 OPT_mno_align_stringops,
3450 MASK_NO_ALIGN_STRINGOPS),
3452 IX86_ATTR_YES ("recip",
3458 /* If this is a list, recurse to get the options. */
3459 if (TREE_CODE (args) == TREE_LIST)
3463 for (; args; args = TREE_CHAIN (args))
3464 if (TREE_VALUE (args)
3465 && !ix86_valid_option_attribute_inner_p (TREE_VALUE (args), p_strings))
3471 else if (TREE_CODE (args) != STRING_CST)
3474 /* Handle multiple arguments separated by commas. */
3475 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3477 while (next_optstr && *next_optstr != '\0')
3479 char *p = next_optstr;
3481 char *comma = strchr (next_optstr, ',');
3482 const char *opt_string;
3483 size_t len, opt_len;
3488 enum ix86_opt_type type = ix86_opt_unknown;
3494 len = comma - next_optstr;
3495 next_optstr = comma + 1;
3503 /* Recognize no-xxx. */
3504 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3513 /* Find the option. */
3516 for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++)
3518 type = attrs[i].type;
3519 opt_len = attrs[i].len;
3520 if (ch == attrs[i].string[0]
3521 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3522 && memcmp (p, attrs[i].string, opt_len) == 0)
3525 mask = attrs[i].mask;
3526 opt_string = attrs[i].string;
3531 /* Process the option. */
3534 error ("attribute(option(\"%s\")) is unknown", orig_p);
3538 else if (type == ix86_opt_isa)
3539 ix86_handle_option (opt, p, opt_set_p);
3541 else if (type == ix86_opt_yes || type == ix86_opt_no)
3543 if (type == ix86_opt_no)
3544 opt_set_p = !opt_set_p;
3547 target_flags |= mask;
3549 target_flags &= ~mask;
3552 else if (type == ix86_opt_str)
3556 error ("option(\"%s\") was already specified", opt_string);
3560 p_strings[opt] = xstrdup (p + opt_len);
3570 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3573 ix86_valid_option_attribute_tree (tree args)
3575 const char *orig_arch_string = ix86_arch_string;
3576 const char *orig_tune_string = ix86_tune_string;
3577 const char *orig_fpmath_string = ix86_fpmath_string;
3578 int orig_tune_defaulted = ix86_tune_defaulted;
3579 int orig_arch_specified = ix86_arch_specified;
3580 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3583 struct cl_target_option *def
3584 = TREE_TARGET_OPTION (target_option_default_node);
3586 /* Process each of the options on the chain. */
3587 if (! ix86_valid_option_attribute_inner_p (args, option_strings))
3590 /* If the changed options are different from the default, rerun override_options,
3591 and then save the options away. The string options are are attribute options,
3592 and will be undone when we copy the save structure. */
3593 if (ix86_isa_flags != def->ix86_isa_flags
3594 || target_flags != def->target_flags
3595 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3596 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3597 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3599 /* If we are using the default tune= or arch=, undo the string assigned,
3600 and use the default. */
3601 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3602 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3603 else if (!orig_arch_specified)
3604 ix86_arch_string = NULL;
3606 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3607 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3608 else if (orig_tune_defaulted)
3609 ix86_tune_string = NULL;
3611 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3612 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3613 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3614 else if (!TARGET_64BIT && TARGET_SSE)
3615 ix86_fpmath_string = "sse,387";
3617 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3618 override_options (false);
3620 /* Save the current options unless we are validating options for
3622 t = build_target_option_node ();
3624 ix86_arch_string = orig_arch_string;
3625 ix86_tune_string = orig_tune_string;
3626 ix86_fpmath_string = orig_fpmath_string;
3628 /* Free up memory allocated to hold the strings */
3629 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3630 if (option_strings[i])
3631 free (option_strings[i]);
3637 /* Hook to validate attribute((option("string"))). */
3640 ix86_valid_option_attribute_p (tree fndecl,
3641 tree ARG_UNUSED (name),
3643 int ARG_UNUSED (flags))
3645 struct cl_target_option cur_opts;
3649 cl_target_option_save (&cur_opts);
3650 new_opts = ix86_valid_option_attribute_tree (args);
3655 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_opts;
3657 cl_target_option_restore (&cur_opts);
3662 /* Hook to determine if one function can safely inline another. */
3665 ix86_can_inline_p (tree caller, tree callee)
3668 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3669 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3671 /* If callee has no option attributes, then it is ok to inline. */
3675 /* If caller has no option attributes, but callee does then it is not ok to
3677 else if (!caller_tree)
3682 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3683 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3685 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3686 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3688 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3689 != callee_opts->ix86_isa_flags)
3692 /* See if we have the same non-isa options. */
3693 else if (caller_opts->target_flags != callee_opts->target_flags)
3696 /* See if arch, tune, etc. are the same. */
3697 else if (caller_opts->arch != callee_opts->arch)
3700 else if (caller_opts->tune != callee_opts->tune)
3703 else if (caller_opts->fpmath != callee_opts->fpmath)
3706 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3717 /* Remember the last target of ix86_set_current_function. */
3718 static GTY(()) tree ix86_previous_fndecl;
3720 /* Establish appropriate back-end context for processing the function
3721 FNDECL. The argument might be NULL to indicate processing at top
3722 level, outside of any function scope. */
3724 ix86_set_current_function (tree fndecl)
3726 /* Only change the context if the function changes. This hook is called
3727 several times in the course of compiling a function, and we don't want to
3728 slow things down too much or call target_reinit when it isn't safe. */
3729 if (fndecl && fndecl != ix86_previous_fndecl)
3731 tree old_tree = (ix86_previous_fndecl
3732 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3735 tree new_tree = (fndecl
3736 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3739 ix86_previous_fndecl = fndecl;
3740 if (old_tree == new_tree)
3745 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3751 struct cl_target_option *def
3752 = TREE_TARGET_OPTION (target_option_current_node);
3754 cl_target_option_restore (def);
3761 /* Return true if this goes in large data/bss. */
3764 ix86_in_large_data_p (tree exp)
3766 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3769 /* Functions are never large data. */
3770 if (TREE_CODE (exp) == FUNCTION_DECL)
3773 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3775 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3776 if (strcmp (section, ".ldata") == 0
3777 || strcmp (section, ".lbss") == 0)
3783 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3785 /* If this is an incomplete type with size 0, then we can't put it
3786 in data because it might be too big when completed. */
3787 if (!size || size > ix86_section_threshold)
3794 /* Switch to the appropriate section for output of DECL.
3795 DECL is either a `VAR_DECL' node or a constant of some sort.
3796 RELOC indicates whether forming the initial value of DECL requires
3797 link-time relocations. */
3799 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3803 x86_64_elf_select_section (tree decl, int reloc,
3804 unsigned HOST_WIDE_INT align)
3806 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3807 && ix86_in_large_data_p (decl))
3809 const char *sname = NULL;
3810 unsigned int flags = SECTION_WRITE;
3811 switch (categorize_decl_for_section (decl, reloc))
3816 case SECCAT_DATA_REL:
3817 sname = ".ldata.rel";
3819 case SECCAT_DATA_REL_LOCAL:
3820 sname = ".ldata.rel.local";
3822 case SECCAT_DATA_REL_RO:
3823 sname = ".ldata.rel.ro";
3825 case SECCAT_DATA_REL_RO_LOCAL:
3826 sname = ".ldata.rel.ro.local";
3830 flags |= SECTION_BSS;
3833 case SECCAT_RODATA_MERGE_STR:
3834 case SECCAT_RODATA_MERGE_STR_INIT:
3835 case SECCAT_RODATA_MERGE_CONST:
3839 case SECCAT_SRODATA:
3846 /* We don't split these for medium model. Place them into
3847 default sections and hope for best. */
3849 case SECCAT_EMUTLS_VAR:
3850 case SECCAT_EMUTLS_TMPL:
3855 /* We might get called with string constants, but get_named_section
3856 doesn't like them as they are not DECLs. Also, we need to set
3857 flags in that case. */
3859 return get_section (sname, flags, NULL);
3860 return get_named_section (decl, sname, reloc);
3863 return default_elf_select_section (decl, reloc, align);
3866 /* Build up a unique section name, expressed as a
3867 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3868 RELOC indicates whether the initial value of EXP requires
3869 link-time relocations. */
3871 static void ATTRIBUTE_UNUSED
3872 x86_64_elf_unique_section (tree decl, int reloc)
3874 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3875 && ix86_in_large_data_p (decl))
3877 const char *prefix = NULL;
3878 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3879 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3881 switch (categorize_decl_for_section (decl, reloc))
3884 case SECCAT_DATA_REL:
3885 case SECCAT_DATA_REL_LOCAL:
3886 case SECCAT_DATA_REL_RO:
3887 case SECCAT_DATA_REL_RO_LOCAL:
3888 prefix = one_only ? ".ld" : ".ldata";
3891 prefix = one_only ? ".lb" : ".lbss";
3894 case SECCAT_RODATA_MERGE_STR:
3895 case SECCAT_RODATA_MERGE_STR_INIT:
3896 case SECCAT_RODATA_MERGE_CONST:
3897 prefix = one_only ? ".lr" : ".lrodata";
3899 case SECCAT_SRODATA:
3906 /* We don't split these for medium model. Place them into
3907 default sections and hope for best. */
3909 case SECCAT_EMUTLS_VAR:
3910 prefix = targetm.emutls.var_section;
3912 case SECCAT_EMUTLS_TMPL:
3913 prefix = targetm.emutls.tmpl_section;
3918 const char *name, *linkonce;
3921 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3922 name = targetm.strip_name_encoding (name);
3924 /* If we're using one_only, then there needs to be a .gnu.linkonce
3925 prefix to the section name. */
3926 linkonce = one_only ? ".gnu.linkonce" : "";
3928 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3930 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3934 default_unique_section (decl, reloc);
3937 #ifdef COMMON_ASM_OP
3938 /* This says how to output assembler code to declare an
3939 uninitialized external linkage data object.
3941 For medium model x86-64 we need to use .largecomm opcode for
3944 x86_elf_aligned_common (FILE *file,
3945 const char *name, unsigned HOST_WIDE_INT size,
3948 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3949 && size > (unsigned int)ix86_section_threshold)
3950 fprintf (file, ".largecomm\t");
3952 fprintf (file, "%s", COMMON_ASM_OP);
3953 assemble_name (file, name);
3954 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3955 size, align / BITS_PER_UNIT);
3959 /* Utility function for targets to use in implementing
3960 ASM_OUTPUT_ALIGNED_BSS. */
3963 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3964 const char *name, unsigned HOST_WIDE_INT size,
3967 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3968 && size > (unsigned int)ix86_section_threshold)
3969 switch_to_section (get_named_section (decl, ".lbss", 0));
3971 switch_to_section (bss_section);
3972 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
3973 #ifdef ASM_DECLARE_OBJECT_NAME
3974 last_assemble_variable_decl = decl;
3975 ASM_DECLARE_OBJECT_NAME (file, name, decl);
3977 /* Standard thing is just output label for the object. */
3978 ASM_OUTPUT_LABEL (file, name);
3979 #endif /* ASM_DECLARE_OBJECT_NAME */
3980 ASM_OUTPUT_SKIP (file, size ? size : 1);
3984 optimization_options (int level, int size ATTRIBUTE_UNUSED)
3986 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
3987 make the problem with not enough registers even worse. */
3988 #ifdef INSN_SCHEDULING
3990 flag_schedule_insns = 0;
3994 /* The Darwin libraries never set errno, so we might as well
3995 avoid calling them when that's the only reason we would. */
3996 flag_errno_math = 0;
3998 /* The default values of these switches depend on the TARGET_64BIT
3999 that is not known at this moment. Mark these values with 2 and
4000 let user the to override these. In case there is no command line option
4001 specifying them, we will set the defaults in override_options. */
4003 flag_omit_frame_pointer = 2;
4004 flag_pcc_struct_return = 2;
4005 flag_asynchronous_unwind_tables = 2;
4006 flag_vect_cost_model = 1;
4007 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4008 SUBTARGET_OPTIMIZATION_OPTIONS;
4012 /* Decide whether we can make a sibling call to a function. DECL is the
4013 declaration of the function being targeted by the call and EXP is the
4014 CALL_EXPR representing the call. */
4017 ix86_function_ok_for_sibcall (tree decl, tree exp)
4022 /* If we are generating position-independent code, we cannot sibcall
4023 optimize any indirect call, or a direct call to a global function,
4024 as the PLT requires %ebx be live. */
4025 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4032 func = TREE_TYPE (CALL_EXPR_FN (exp));
4033 if (POINTER_TYPE_P (func))
4034 func = TREE_TYPE (func);
4037 /* Check that the return value locations are the same. Like
4038 if we are returning floats on the 80387 register stack, we cannot
4039 make a sibcall from a function that doesn't return a float to a
4040 function that does or, conversely, from a function that does return
4041 a float to a function that doesn't; the necessary stack adjustment
4042 would not be executed. This is also the place we notice
4043 differences in the return value ABI. Note that it is ok for one
4044 of the functions to have void return type as long as the return
4045 value of the other is passed in a register. */
4046 a = ix86_function_value (TREE_TYPE (exp), func, false);
4047 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4049 if (STACK_REG_P (a) || STACK_REG_P (b))
4051 if (!rtx_equal_p (a, b))
4054 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4056 else if (!rtx_equal_p (a, b))
4059 /* If this call is indirect, we'll need to be able to use a call-clobbered
4060 register for the address of the target function. Make sure that all
4061 such registers are not used for passing parameters. */
4062 if (!decl && !TARGET_64BIT)
4066 /* We're looking at the CALL_EXPR, we need the type of the function. */
4067 type = CALL_EXPR_FN (exp); /* pointer expression */
4068 type = TREE_TYPE (type); /* pointer type */
4069 type = TREE_TYPE (type); /* function type */
4071 if (ix86_function_regparm (type, NULL) >= 3)
4073 /* ??? Need to count the actual number of registers to be used,
4074 not the possible number of registers. Fix later. */
4079 /* Dllimport'd functions are also called indirectly. */
4080 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4081 && decl && DECL_DLLIMPORT_P (decl)
4082 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4085 /* Otherwise okay. That also includes certain types of indirect calls. */
4089 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4090 calling convention attributes;
4091 arguments as in struct attribute_spec.handler. */
4094 ix86_handle_cconv_attribute (tree *node, tree name,
4096 int flags ATTRIBUTE_UNUSED,
4099 if (TREE_CODE (*node) != FUNCTION_TYPE
4100 && TREE_CODE (*node) != METHOD_TYPE
4101 && TREE_CODE (*node) != FIELD_DECL
4102 && TREE_CODE (*node) != TYPE_DECL)
4104 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4105 IDENTIFIER_POINTER (name));
4106 *no_add_attrs = true;
4110 /* Can combine regparm with all attributes but fastcall. */
4111 if (is_attribute_p ("regparm", name))
4115 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4117 error ("fastcall and regparm attributes are not compatible");
4120 cst = TREE_VALUE (args);
4121 if (TREE_CODE (cst) != INTEGER_CST)
4123 warning (OPT_Wattributes,
4124 "%qs attribute requires an integer constant argument",
4125 IDENTIFIER_POINTER (name));
4126 *no_add_attrs = true;
4128 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4130 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4131 IDENTIFIER_POINTER (name), REGPARM_MAX);
4132 *no_add_attrs = true;
4140 /* Do not warn when emulating the MS ABI. */
4141 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4142 warning (OPT_Wattributes, "%qs attribute ignored",
4143 IDENTIFIER_POINTER (name));
4144 *no_add_attrs = true;
4148 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4149 if (is_attribute_p ("fastcall", name))
4151 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4153 error ("fastcall and cdecl attributes are not compatible");
4155 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4157 error ("fastcall and stdcall attributes are not compatible");
4159 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4161 error ("fastcall and regparm attributes are not compatible");
4165 /* Can combine stdcall with fastcall (redundant), regparm and
4167 else if (is_attribute_p ("stdcall", name))
4169 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4171 error ("stdcall and cdecl attributes are not compatible");
4173 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4175 error ("stdcall and fastcall attributes are not compatible");
4179 /* Can combine cdecl with regparm and sseregparm. */
4180 else if (is_attribute_p ("cdecl", name))
4182 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4184 error ("stdcall and cdecl attributes are not compatible");
4186 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4188 error ("fastcall and cdecl attributes are not compatible");
4192 /* Can combine sseregparm with all attributes. */
4197 /* Return 0 if the attributes for two types are incompatible, 1 if they
4198 are compatible, and 2 if they are nearly compatible (which causes a
4199 warning to be generated). */
4202 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4204 /* Check for mismatch of non-default calling convention. */
4205 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4207 if (TREE_CODE (type1) != FUNCTION_TYPE
4208 && TREE_CODE (type1) != METHOD_TYPE)
4211 /* Check for mismatched fastcall/regparm types. */
4212 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4213 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4214 || (ix86_function_regparm (type1, NULL)
4215 != ix86_function_regparm (type2, NULL)))
4218 /* Check for mismatched sseregparm types. */
4219 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4220 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4223 /* Check for mismatched return types (cdecl vs stdcall). */
4224 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4225 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4231 /* Return the regparm value for a function with the indicated TYPE and DECL.
4232 DECL may be NULL when calling function indirectly
4233 or considering a libcall. */
4236 ix86_function_regparm (const_tree type, const_tree decl)
4239 int regparm = ix86_regparm;
4241 static bool error_issued;
4245 if (ix86_function_type_abi (type) == DEFAULT_ABI)
4247 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
4250 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4254 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4256 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4258 /* We can't use regparm(3) for nested functions because
4259 these pass static chain pointer in %ecx register. */
4260 if (!error_issued && regparm == 3
4261 && decl_function_context (decl)
4262 && !DECL_NO_STATIC_CHAIN (decl))
4264 error ("nested functions are limited to 2 register parameters");
4265 error_issued = true;
4273 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4276 /* Use register calling convention for local functions when possible. */
4277 if (decl && TREE_CODE (decl) == FUNCTION_DECL
4280 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4281 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4284 int local_regparm, globals = 0, regno;
4287 /* Make sure no regparm register is taken by a
4288 fixed register variable. */
4289 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4290 if (fixed_regs[local_regparm])
4293 /* We can't use regparm(3) for nested functions as these use
4294 static chain pointer in third argument. */
4295 if (local_regparm == 3
4296 && decl_function_context (decl)
4297 && !DECL_NO_STATIC_CHAIN (decl))
4300 /* If the function realigns its stackpointer, the prologue will
4301 clobber %ecx. If we've already generated code for the callee,
4302 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4303 scanning the attributes for the self-realigning property. */
4304 f = DECL_STRUCT_FUNCTION (decl);
4305 /* Since current internal arg pointer won't conflict with
4306 parameter passing regs, so no need to change stack
4307 realignment and adjust regparm number.
4309 Each fixed register usage increases register pressure,
4310 so less registers should be used for argument passing.
4311 This functionality can be overriden by an explicit
4313 for (regno = 0; regno <= DI_REG; regno++)
4314 if (fixed_regs[regno])
4318 = globals < local_regparm ? local_regparm - globals : 0;
4320 if (local_regparm > regparm)
4321 regparm = local_regparm;
4328 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4329 DFmode (2) arguments in SSE registers for a function with the
4330 indicated TYPE and DECL. DECL may be NULL when calling function
4331 indirectly or considering a libcall. Otherwise return 0. */
4334 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4336 gcc_assert (!TARGET_64BIT);
4338 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4339 by the sseregparm attribute. */
4340 if (TARGET_SSEREGPARM
4341 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4348 error ("Calling %qD with attribute sseregparm without "
4349 "SSE/SSE2 enabled", decl);
4351 error ("Calling %qT with attribute sseregparm without "
4352 "SSE/SSE2 enabled", type);
4360 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4361 (and DFmode for SSE2) arguments in SSE registers. */
4362 if (decl && TARGET_SSE_MATH && !profile_flag)
4364 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4365 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4367 return TARGET_SSE2 ? 2 : 1;
4373 /* Return true if EAX is live at the start of the function. Used by
4374 ix86_expand_prologue to determine if we need special help before
4375 calling allocate_stack_worker. */
4378 ix86_eax_live_at_start_p (void)
4380 /* Cheat. Don't bother working forward from ix86_function_regparm
4381 to the function type to whether an actual argument is located in
4382 eax. Instead just look at cfg info, which is still close enough
4383 to correct at this point. This gives false positives for broken
4384 functions that might use uninitialized data that happens to be
4385 allocated in eax, but who cares? */
4386 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4389 /* Value is the number of bytes of arguments automatically
4390 popped when returning from a subroutine call.
4391 FUNDECL is the declaration node of the function (as a tree),
4392 FUNTYPE is the data type of the function (as a tree),
4393 or for a library call it is an identifier node for the subroutine name.
4394 SIZE is the number of bytes of arguments passed on the stack.
4396 On the 80386, the RTD insn may be used to pop them if the number
4397 of args is fixed, but if the number is variable then the caller
4398 must pop them all. RTD can't be used for library calls now
4399 because the library is compiled with the Unix compiler.
4400 Use of RTD is a selectable option, since it is incompatible with
4401 standard Unix calling sequences. If the option is not selected,
4402 the caller must always pop the args.
4404 The attribute stdcall is equivalent to RTD on a per module basis. */
4407 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4411 /* None of the 64-bit ABIs pop arguments. */
4415 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4417 /* Cdecl functions override -mrtd, and never pop the stack. */
4418 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4420 /* Stdcall and fastcall functions will pop the stack if not
4422 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4423 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4426 if (rtd && ! stdarg_p (funtype))
4430 /* Lose any fake structure return argument if it is passed on the stack. */
4431 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4432 && !KEEP_AGGREGATE_RETURN_POINTER)
4434 int nregs = ix86_function_regparm (funtype, fundecl);
4436 return GET_MODE_SIZE (Pmode);
4442 /* Argument support functions. */
4444 /* Return true when register may be used to pass function parameters. */
4446 ix86_function_arg_regno_p (int regno)
4449 const int *parm_regs;
4454 return (regno < REGPARM_MAX
4455 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4457 return (regno < REGPARM_MAX
4458 || (TARGET_MMX && MMX_REGNO_P (regno)
4459 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4460 || (TARGET_SSE && SSE_REGNO_P (regno)
4461 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4466 if (SSE_REGNO_P (regno) && TARGET_SSE)
4471 if (TARGET_SSE && SSE_REGNO_P (regno)
4472 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4476 /* TODO: The function should depend on current function ABI but
4477 builtins.c would need updating then. Therefore we use the
4480 /* RAX is used as hidden argument to va_arg functions. */
4481 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4484 if (DEFAULT_ABI == MS_ABI)
4485 parm_regs = x86_64_ms_abi_int_parameter_registers;
4487 parm_regs = x86_64_int_parameter_registers;
4488 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4489 : X86_64_REGPARM_MAX); i++)
4490 if (regno == parm_regs[i])
4495 /* Return if we do not know how to pass TYPE solely in registers. */
4498 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4500 if (must_pass_in_stack_var_size_or_pad (mode, type))
4503 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4504 The layout_type routine is crafty and tries to trick us into passing
4505 currently unsupported vector types on the stack by using TImode. */
4506 return (!TARGET_64BIT && mode == TImode
4507 && type && TREE_CODE (type) != VECTOR_TYPE);
4510 /* It returns the size, in bytes, of the area reserved for arguments passed
4511 in registers for the function represented by fndecl dependent to the used
4514 ix86_reg_parm_stack_space (const_tree fndecl)
4517 /* For libcalls it is possible that there is no fndecl at hand.
4518 Therefore assume for this case the default abi of the target. */
4520 call_abi = DEFAULT_ABI;
4522 call_abi = ix86_function_abi (fndecl);
4528 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4531 ix86_function_type_abi (const_tree fntype)
4533 if (TARGET_64BIT && fntype != NULL)
4536 if (DEFAULT_ABI == SYSV_ABI)
4537 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4539 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4547 ix86_function_abi (const_tree fndecl)
4551 return ix86_function_type_abi (TREE_TYPE (fndecl));
4554 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4557 ix86_cfun_abi (void)
4559 if (! cfun || ! TARGET_64BIT)
4561 return cfun->machine->call_abi;
4565 extern void init_regs (void);
4567 /* Implementation of call abi switching target hook. Specific to FNDECL
4568 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4570 To prevent redudant calls of costy function init_regs (), it checks not to
4571 reset register usage for default abi. */
4573 ix86_call_abi_override (const_tree fndecl)
4575 if (fndecl == NULL_TREE)
4576 cfun->machine->call_abi = DEFAULT_ABI;
4578 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4579 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
4581 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
4583 call_used_regs[4 /*RSI*/] = 0;
4584 call_used_regs[5 /*RDI*/] = 0;
4588 else if (TARGET_64BIT)
4590 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
4592 call_used_regs[4 /*RSI*/] = 1;
4593 call_used_regs[5 /*RDI*/] = 1;
4599 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4600 for a call to a function whose data type is FNTYPE.
4601 For a library call, FNTYPE is 0. */
4604 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4605 tree fntype, /* tree ptr for function decl */
4606 rtx libname, /* SYMBOL_REF of library name or 0 */
4609 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4610 memset (cum, 0, sizeof (*cum));
4612 cum->call_abi = ix86_function_type_abi (fntype);
4613 /* Set up the number of registers to use for passing arguments. */
4614 cum->nregs = ix86_regparm;
4617 if (cum->call_abi != DEFAULT_ABI)
4618 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4623 cum->sse_nregs = SSE_REGPARM_MAX;
4626 if (cum->call_abi != DEFAULT_ABI)
4627 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4628 : X64_SSE_REGPARM_MAX;
4632 cum->mmx_nregs = MMX_REGPARM_MAX;
4633 cum->warn_avx = true;
4634 cum->warn_sse = true;
4635 cum->warn_mmx = true;
4637 /* Because type might mismatch in between caller and callee, we need to
4638 use actual type of function for local calls.
4639 FIXME: cgraph_analyze can be told to actually record if function uses
4640 va_start so for local functions maybe_vaarg can be made aggressive
4642 FIXME: once typesytem is fixed, we won't need this code anymore. */
4644 fntype = TREE_TYPE (fndecl);
4645 cum->maybe_vaarg = (fntype
4646 ? (!prototype_p (fntype) || stdarg_p (fntype))
4651 /* If there are variable arguments, then we won't pass anything
4652 in registers in 32-bit mode. */
4653 if (stdarg_p (fntype))
4664 /* Use ecx and edx registers if function has fastcall attribute,
4665 else look for regparm information. */
4668 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4674 cum->nregs = ix86_function_regparm (fntype, fndecl);
4677 /* Set up the number of SSE registers used for passing SFmode
4678 and DFmode arguments. Warn for mismatching ABI. */
4679 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4683 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4684 But in the case of vector types, it is some vector mode.
4686 When we have only some of our vector isa extensions enabled, then there
4687 are some modes for which vector_mode_supported_p is false. For these
4688 modes, the generic vector support in gcc will choose some non-vector mode
4689 in order to implement the type. By computing the natural mode, we'll
4690 select the proper ABI location for the operand and not depend on whatever
4691 the middle-end decides to do with these vector types. */
4693 static enum machine_mode
4694 type_natural_mode (const_tree type)
4696 enum machine_mode mode = TYPE_MODE (type);
4698 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4700 HOST_WIDE_INT size = int_size_in_bytes (type);
4701 if ((size == 8 || size == 16)
4702 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4703 && TYPE_VECTOR_SUBPARTS (type) > 1)
4705 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4707 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4708 mode = MIN_MODE_VECTOR_FLOAT;
4710 mode = MIN_MODE_VECTOR_INT;
4712 /* Get the mode which has this inner mode and number of units. */
4713 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4714 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4715 && GET_MODE_INNER (mode) == innermode)
4725 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4726 this may not agree with the mode that the type system has chosen for the
4727 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4728 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4731 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4736 if (orig_mode != BLKmode)
4737 tmp = gen_rtx_REG (orig_mode, regno);
4740 tmp = gen_rtx_REG (mode, regno);
4741 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4742 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4748 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4749 of this code is to classify each 8bytes of incoming argument by the register
4750 class and assign registers accordingly. */
4752 /* Return the union class of CLASS1 and CLASS2.
4753 See the x86-64 PS ABI for details. */
4755 static enum x86_64_reg_class
4756 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4758 /* Rule #1: If both classes are equal, this is the resulting class. */
4759 if (class1 == class2)
4762 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4764 if (class1 == X86_64_NO_CLASS)
4766 if (class2 == X86_64_NO_CLASS)
4769 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4770 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4771 return X86_64_MEMORY_CLASS;
4773 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4774 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4775 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4776 return X86_64_INTEGERSI_CLASS;
4777 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4778 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4779 return X86_64_INTEGER_CLASS;
4781 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4783 if (class1 == X86_64_X87_CLASS
4784 || class1 == X86_64_X87UP_CLASS
4785 || class1 == X86_64_COMPLEX_X87_CLASS
4786 || class2 == X86_64_X87_CLASS
4787 || class2 == X86_64_X87UP_CLASS
4788 || class2 == X86_64_COMPLEX_X87_CLASS)
4789 return X86_64_MEMORY_CLASS;
4791 /* Rule #6: Otherwise class SSE is used. */
4792 return X86_64_SSE_CLASS;
4795 /* Classify the argument of type TYPE and mode MODE.
4796 CLASSES will be filled by the register class used to pass each word
4797 of the operand. The number of words is returned. In case the parameter
4798 should be passed in memory, 0 is returned. As a special case for zero
4799 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4801 BIT_OFFSET is used internally for handling records and specifies offset
4802 of the offset in bits modulo 256 to avoid overflow cases.
4804 See the x86-64 PS ABI for details.
4808 classify_argument (enum machine_mode mode, const_tree type,
4809 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4811 HOST_WIDE_INT bytes =
4812 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4813 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4815 /* Variable sized entities are always passed/returned in memory. */
4819 if (mode != VOIDmode
4820 && targetm.calls.must_pass_in_stack (mode, type))
4823 if (type && AGGREGATE_TYPE_P (type))
4827 enum x86_64_reg_class subclasses[MAX_CLASSES];
4829 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
4833 for (i = 0; i < words; i++)
4834 classes[i] = X86_64_NO_CLASS;
4836 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4837 signalize memory class, so handle it as special case. */
4840 classes[0] = X86_64_NO_CLASS;
4844 /* Classify each field of record and merge classes. */
4845 switch (TREE_CODE (type))
4848 /* And now merge the fields of structure. */
4849 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4851 if (TREE_CODE (field) == FIELD_DECL)
4855 if (TREE_TYPE (field) == error_mark_node)
4858 /* Bitfields are always classified as integer. Handle them
4859 early, since later code would consider them to be
4860 misaligned integers. */
4861 if (DECL_BIT_FIELD (field))
4863 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4864 i < ((int_bit_position (field) + (bit_offset % 64))
4865 + tree_low_cst (DECL_SIZE (field), 0)
4868 merge_classes (X86_64_INTEGER_CLASS,
4873 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4874 TREE_TYPE (field), subclasses,
4875 (int_bit_position (field)
4876 + bit_offset) % 256);
4879 for (i = 0; i < num; i++)
4882 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4884 merge_classes (subclasses[i], classes[i + pos]);
4892 /* Arrays are handled as small records. */
4895 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4896 TREE_TYPE (type), subclasses, bit_offset);
4900 /* The partial classes are now full classes. */
4901 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4902 subclasses[0] = X86_64_SSE_CLASS;
4903 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
4904 subclasses[0] = X86_64_INTEGER_CLASS;
4906 for (i = 0; i < words; i++)
4907 classes[i] = subclasses[i % num];
4912 case QUAL_UNION_TYPE:
4913 /* Unions are similar to RECORD_TYPE but offset is always 0.
4915 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4917 if (TREE_CODE (field) == FIELD_DECL)
4921 if (TREE_TYPE (field) == error_mark_node)
4924 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4925 TREE_TYPE (field), subclasses,
4929 for (i = 0; i < num; i++)
4930 classes[i] = merge_classes (subclasses[i], classes[i]);
4939 /* Final merger cleanup. */
4940 for (i = 0; i < words; i++)
4942 /* If one class is MEMORY, everything should be passed in
4944 if (classes[i] == X86_64_MEMORY_CLASS)
4947 /* The X86_64_SSEUP_CLASS should be always preceded by
4948 X86_64_SSE_CLASS. */
4949 if (classes[i] == X86_64_SSEUP_CLASS
4950 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4951 classes[i] = X86_64_SSE_CLASS;
4953 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4954 if (classes[i] == X86_64_X87UP_CLASS
4955 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4956 classes[i] = X86_64_SSE_CLASS;
4961 /* Compute alignment needed. We align all types to natural boundaries with
4962 exception of XFmode that is aligned to 64bits. */
4963 if (mode != VOIDmode && mode != BLKmode)
4965 int mode_alignment = GET_MODE_BITSIZE (mode);
4968 mode_alignment = 128;
4969 else if (mode == XCmode)
4970 mode_alignment = 256;
4971 if (COMPLEX_MODE_P (mode))
4972 mode_alignment /= 2;
4973 /* Misaligned fields are always returned in memory. */
4974 if (bit_offset % mode_alignment)
4978 /* for V1xx modes, just use the base mode */
4979 if (VECTOR_MODE_P (mode) && mode != V1DImode
4980 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
4981 mode = GET_MODE_INNER (mode);
4983 /* Classification of atomic types. */
4988 classes[0] = X86_64_SSE_CLASS;
4991 classes[0] = X86_64_SSE_CLASS;
4992 classes[1] = X86_64_SSEUP_CLASS;
5001 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5002 classes[0] = X86_64_INTEGERSI_CLASS;
5004 classes[0] = X86_64_INTEGER_CLASS;
5008 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5015 if (!(bit_offset % 64))
5016 classes[0] = X86_64_SSESF_CLASS;
5018 classes[0] = X86_64_SSE_CLASS;
5021 classes[0] = X86_64_SSEDF_CLASS;
5024 classes[0] = X86_64_X87_CLASS;
5025 classes[1] = X86_64_X87UP_CLASS;
5028 classes[0] = X86_64_SSE_CLASS;
5029 classes[1] = X86_64_SSEUP_CLASS;
5032 classes[0] = X86_64_SSE_CLASS;
5035 classes[0] = X86_64_SSEDF_CLASS;
5036 classes[1] = X86_64_SSEDF_CLASS;
5039 classes[0] = X86_64_COMPLEX_X87_CLASS;
5042 /* This modes is larger than 16 bytes. */
5050 classes[0] = X86_64_AVX_CLASS;
5058 classes[0] = X86_64_SSE_CLASS;
5059 classes[1] = X86_64_SSEUP_CLASS;
5066 classes[0] = X86_64_SSE_CLASS;
5072 gcc_assert (VECTOR_MODE_P (mode));
5077 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5079 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5080 classes[0] = X86_64_INTEGERSI_CLASS;
5082 classes[0] = X86_64_INTEGER_CLASS;
5083 classes[1] = X86_64_INTEGER_CLASS;
5084 return 1 + (bytes > 8);
5088 /* Examine the argument and return set number of register required in each
5089 class. Return 0 iff parameter should be passed in memory. */
5091 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5092 int *int_nregs, int *sse_nregs)
5094 enum x86_64_reg_class regclass[MAX_CLASSES];
5095 int n = classify_argument (mode, type, regclass, 0);
5101 for (n--; n >= 0; n--)
5102 switch (regclass[n])
5104 case X86_64_INTEGER_CLASS:
5105 case X86_64_INTEGERSI_CLASS:
5108 case X86_64_AVX_CLASS:
5109 case X86_64_SSE_CLASS:
5110 case X86_64_SSESF_CLASS:
5111 case X86_64_SSEDF_CLASS:
5114 case X86_64_NO_CLASS:
5115 case X86_64_SSEUP_CLASS:
5117 case X86_64_X87_CLASS:
5118 case X86_64_X87UP_CLASS:
5122 case X86_64_COMPLEX_X87_CLASS:
5123 return in_return ? 2 : 0;
5124 case X86_64_MEMORY_CLASS:
5130 /* Construct container for the argument used by GCC interface. See
5131 FUNCTION_ARG for the detailed description. */
5134 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5135 const_tree type, int in_return, int nintregs, int nsseregs,
5136 const int *intreg, int sse_regno)
5138 /* The following variables hold the static issued_error state. */
5139 static bool issued_sse_arg_error;
5140 static bool issued_sse_ret_error;
5141 static bool issued_x87_ret_error;
5143 enum machine_mode tmpmode;
5145 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5146 enum x86_64_reg_class regclass[MAX_CLASSES];
5150 int needed_sseregs, needed_intregs;
5151 rtx exp[MAX_CLASSES];
5154 n = classify_argument (mode, type, regclass, 0);
5157 if (!examine_argument (mode, type, in_return, &needed_intregs,
5160 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5163 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5164 some less clueful developer tries to use floating-point anyway. */
5165 if (needed_sseregs && !TARGET_SSE)
5169 if (!issued_sse_ret_error)
5171 error ("SSE register return with SSE disabled");
5172 issued_sse_ret_error = true;
5175 else if (!issued_sse_arg_error)
5177 error ("SSE register argument with SSE disabled");
5178 issued_sse_arg_error = true;
5183 /* Likewise, error if the ABI requires us to return values in the
5184 x87 registers and the user specified -mno-80387. */
5185 if (!TARGET_80387 && in_return)
5186 for (i = 0; i < n; i++)
5187 if (regclass[i] == X86_64_X87_CLASS
5188 || regclass[i] == X86_64_X87UP_CLASS
5189 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5191 if (!issued_x87_ret_error)
5193 error ("x87 register return with x87 disabled");
5194 issued_x87_ret_error = true;
5199 /* First construct simple cases. Avoid SCmode, since we want to use
5200 single register to pass this type. */
5201 if (n == 1 && mode != SCmode)
5202 switch (regclass[0])
5204 case X86_64_INTEGER_CLASS:
5205 case X86_64_INTEGERSI_CLASS:
5206 return gen_rtx_REG (mode, intreg[0]);
5207 case X86_64_AVX_CLASS:
5208 case X86_64_SSE_CLASS:
5209 case X86_64_SSESF_CLASS:
5210 case X86_64_SSEDF_CLASS:
5211 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5212 case X86_64_X87_CLASS:
5213 case X86_64_COMPLEX_X87_CLASS:
5214 return gen_rtx_REG (mode, FIRST_STACK_REG);
5215 case X86_64_NO_CLASS:
5216 /* Zero sized array, struct or class. */
5221 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5222 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5223 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5226 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5227 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5228 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5229 && regclass[1] == X86_64_INTEGER_CLASS
5230 && (mode == CDImode || mode == TImode || mode == TFmode)
5231 && intreg[0] + 1 == intreg[1])
5232 return gen_rtx_REG (mode, intreg[0]);
5234 /* Otherwise figure out the entries of the PARALLEL. */
5235 for (i = 0; i < n; i++)
5237 switch (regclass[i])
5239 case X86_64_NO_CLASS:
5241 case X86_64_INTEGER_CLASS:
5242 case X86_64_INTEGERSI_CLASS:
5243 /* Merge TImodes on aligned occasions here too. */
5244 if (i * 8 + 8 > bytes)
5245 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5246 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5250 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5251 if (tmpmode == BLKmode)
5253 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5254 gen_rtx_REG (tmpmode, *intreg),
5258 case X86_64_SSESF_CLASS:
5259 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5260 gen_rtx_REG (SFmode,
5261 SSE_REGNO (sse_regno)),
5265 case X86_64_SSEDF_CLASS:
5266 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5267 gen_rtx_REG (DFmode,
5268 SSE_REGNO (sse_regno)),
5272 case X86_64_SSE_CLASS:
5273 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
5277 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5278 gen_rtx_REG (tmpmode,
5279 SSE_REGNO (sse_regno)),
5281 if (tmpmode == TImode)
5290 /* Empty aligned struct, union or class. */
5294 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5295 for (i = 0; i < nexps; i++)
5296 XVECEXP (ret, 0, i) = exp [i];
5300 /* Update the data in CUM to advance over an argument of mode MODE
5301 and data type TYPE. (TYPE is null for libcalls where that information
5302 may not be available.) */
5305 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5306 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5322 cum->words += words;
5323 cum->nregs -= words;
5324 cum->regno += words;
5326 if (cum->nregs <= 0)
5334 if (cum->float_in_sse < 2)
5337 if (cum->float_in_sse < 1)
5355 if (!type || !AGGREGATE_TYPE_P (type))
5357 cum->sse_words += words;
5358 cum->sse_nregs -= 1;
5359 cum->sse_regno += 1;
5360 if (cum->sse_nregs <= 0)
5373 if (!type || !AGGREGATE_TYPE_P (type))
5375 cum->mmx_words += words;
5376 cum->mmx_nregs -= 1;
5377 cum->mmx_regno += 1;
5378 if (cum->mmx_nregs <= 0)
5389 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5390 tree type, HOST_WIDE_INT words, int named)
5392 int int_nregs, sse_nregs;
5394 /* Unnamed 256bit vector mode parameters are passed on stack. */
5395 if (!named && VALID_AVX256_REG_MODE (mode))
5398 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5399 cum->words += words;
5400 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5402 cum->nregs -= int_nregs;
5403 cum->sse_nregs -= sse_nregs;
5404 cum->regno += int_nregs;
5405 cum->sse_regno += sse_nregs;
5408 cum->words += words;
5412 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5413 HOST_WIDE_INT words)
5415 /* Otherwise, this should be passed indirect. */
5416 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5418 cum->words += words;
5427 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5428 tree type, int named)
5430 HOST_WIDE_INT bytes, words;
5432 if (mode == BLKmode)
5433 bytes = int_size_in_bytes (type);
5435 bytes = GET_MODE_SIZE (mode);
5436 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5439 mode = type_natural_mode (type);
5441 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5442 function_arg_advance_ms_64 (cum, bytes, words);
5443 else if (TARGET_64BIT)
5444 function_arg_advance_64 (cum, mode, type, words, named);
5446 function_arg_advance_32 (cum, mode, type, bytes, words);
5449 /* Define where to put the arguments to a function.
5450 Value is zero to push the argument on the stack,
5451 or a hard register in which to store the argument.
5453 MODE is the argument's machine mode.
5454 TYPE is the data type of the argument (as a tree).
5455 This is null for libcalls where that information may
5457 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5458 the preceding args and about the function being called.
5459 NAMED is nonzero if this argument is a named parameter
5460 (otherwise it is an extra parameter matching an ellipsis). */
5463 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5464 enum machine_mode orig_mode, tree type,
5465 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5467 static bool warnedavx, warnedsse, warnedmmx;
5469 /* Avoid the AL settings for the Unix64 ABI. */
5470 if (mode == VOIDmode)
5486 if (words <= cum->nregs)
5488 int regno = cum->regno;
5490 /* Fastcall allocates the first two DWORD (SImode) or
5491 smaller arguments to ECX and EDX if it isn't an
5497 || (type && AGGREGATE_TYPE_P (type)))
5500 /* ECX not EAX is the first allocated register. */
5501 if (regno == AX_REG)
5504 return gen_rtx_REG (mode, regno);
5509 if (cum->float_in_sse < 2)
5512 if (cum->float_in_sse < 1)
5516 /* In 32bit, we pass TImode in xmm registers. */
5523 if (!type || !AGGREGATE_TYPE_P (type))
5525 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5528 warning (0, "SSE vector argument without SSE enabled "
5532 return gen_reg_or_parallel (mode, orig_mode,
5533 cum->sse_regno + FIRST_SSE_REG);
5538 /* In 32bit, we pass OImode in ymm registers. */
5545 if (!type || !AGGREGATE_TYPE_P (type))
5547 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5550 warning (0, "AVX vector argument without AVX enabled "
5554 return gen_reg_or_parallel (mode, orig_mode,
5555 cum->sse_regno + FIRST_SSE_REG);
5564 if (!type || !AGGREGATE_TYPE_P (type))
5566 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5569 warning (0, "MMX vector argument without MMX enabled "
5573 return gen_reg_or_parallel (mode, orig_mode,
5574 cum->mmx_regno + FIRST_MMX_REG);
5583 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5584 enum machine_mode orig_mode, tree type, int named)
5586 static bool warnedavx;
5588 /* Handle a hidden AL argument containing number of registers
5589 for varargs x86-64 functions. */
5590 if (mode == VOIDmode)
5591 return GEN_INT (cum->maybe_vaarg
5592 ? (cum->sse_nregs < 0
5593 ? (cum->call_abi == DEFAULT_ABI
5595 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5596 : X64_SSE_REGPARM_MAX))
5611 /* In 64bit, we pass TImode in interger registers and OImode on
5613 if (!type || !AGGREGATE_TYPE_P (type))
5615 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5618 warning (0, "AVX vector argument without AVX enabled "
5623 /* Unnamed 256bit vector mode parameters are passed on stack. */
5629 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5631 &x86_64_int_parameter_registers [cum->regno],
5636 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5637 enum machine_mode orig_mode, int named,
5638 HOST_WIDE_INT bytes)
5642 /* Avoid the AL settings for the Unix64 ABI. */
5643 if (mode == VOIDmode)
5646 /* If we've run out of registers, it goes on the stack. */
5647 if (cum->nregs == 0)
5650 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5652 /* Only floating point modes are passed in anything but integer regs. */
5653 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5656 regno = cum->regno + FIRST_SSE_REG;
5661 /* Unnamed floating parameters are passed in both the
5662 SSE and integer registers. */
5663 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5664 t2 = gen_rtx_REG (mode, regno);
5665 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5666 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5667 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5670 /* Handle aggregated types passed in register. */
5671 if (orig_mode == BLKmode)
5673 if (bytes > 0 && bytes <= 8)
5674 mode = (bytes > 4 ? DImode : SImode);
5675 if (mode == BLKmode)
5679 return gen_reg_or_parallel (mode, orig_mode, regno);
5683 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5684 tree type, int named)
5686 enum machine_mode mode = omode;
5687 HOST_WIDE_INT bytes, words;
5689 if (mode == BLKmode)
5690 bytes = int_size_in_bytes (type);
5692 bytes = GET_MODE_SIZE (mode);
5693 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5695 /* To simplify the code below, represent vector types with a vector mode
5696 even if MMX/SSE are not active. */
5697 if (type && TREE_CODE (type) == VECTOR_TYPE)
5698 mode = type_natural_mode (type);
5700 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5701 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5702 else if (TARGET_64BIT)
5703 return function_arg_64 (cum, mode, omode, type, named);
5705 return function_arg_32 (cum, mode, omode, type, bytes, words);
5708 /* A C expression that indicates when an argument must be passed by
5709 reference. If nonzero for an argument, a copy of that argument is
5710 made in memory and a pointer to the argument is passed instead of
5711 the argument itself. The pointer is passed in whatever way is
5712 appropriate for passing a pointer to that type. */
5715 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5716 enum machine_mode mode ATTRIBUTE_UNUSED,
5717 const_tree type, bool named ATTRIBUTE_UNUSED)
5719 /* See Windows x64 Software Convention. */
5720 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5722 int msize = (int) GET_MODE_SIZE (mode);
5725 /* Arrays are passed by reference. */
5726 if (TREE_CODE (type) == ARRAY_TYPE)
5729 if (AGGREGATE_TYPE_P (type))
5731 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5732 are passed by reference. */
5733 msize = int_size_in_bytes (type);
5737 /* __m128 is passed by reference. */
5739 case 1: case 2: case 4: case 8:
5745 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5751 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5754 contains_aligned_value_p (tree type)
5756 enum machine_mode mode = TYPE_MODE (type);
5757 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5761 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5763 if (TYPE_ALIGN (type) < 128)
5766 if (AGGREGATE_TYPE_P (type))
5768 /* Walk the aggregates recursively. */
5769 switch (TREE_CODE (type))
5773 case QUAL_UNION_TYPE:
5777 /* Walk all the structure fields. */
5778 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5780 if (TREE_CODE (field) == FIELD_DECL
5781 && contains_aligned_value_p (TREE_TYPE (field)))
5788 /* Just for use if some languages passes arrays by value. */
5789 if (contains_aligned_value_p (TREE_TYPE (type)))
5800 /* Gives the alignment boundary, in bits, of an argument with the
5801 specified mode and type. */
5804 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5809 /* Since canonical type is used for call, we convert it to
5810 canonical type if needed. */
5811 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5812 type = TYPE_CANONICAL (type);
5813 align = TYPE_ALIGN (type);
5816 align = GET_MODE_ALIGNMENT (mode);
5817 if (align < PARM_BOUNDARY)
5818 align = PARM_BOUNDARY;
5819 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5820 natural boundaries. */
5821 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5823 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5824 make an exception for SSE modes since these require 128bit
5827 The handling here differs from field_alignment. ICC aligns MMX
5828 arguments to 4 byte boundaries, while structure fields are aligned
5829 to 8 byte boundaries. */
5832 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
5833 align = PARM_BOUNDARY;
5837 if (!contains_aligned_value_p (type))
5838 align = PARM_BOUNDARY;
5841 if (align > BIGGEST_ALIGNMENT)
5842 align = BIGGEST_ALIGNMENT;
5846 /* Return true if N is a possible register number of function value. */
5849 ix86_function_value_regno_p (int regno)
5856 case FIRST_FLOAT_REG:
5857 /* TODO: The function should depend on current function ABI but
5858 builtins.c would need updating then. Therefore we use the
5860 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
5862 return TARGET_FLOAT_RETURNS_IN_80387;
5868 if (TARGET_MACHO || TARGET_64BIT)
5876 /* Define how to find the value returned by a function.
5877 VALTYPE is the data type of the value (as a tree).
5878 If the precise function being called is known, FUNC is its FUNCTION_DECL;
5879 otherwise, FUNC is 0. */
5882 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
5883 const_tree fntype, const_tree fn)
5887 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
5888 we normally prevent this case when mmx is not available. However
5889 some ABIs may require the result to be returned like DImode. */
5890 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5891 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
5893 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
5894 we prevent this case when sse is not available. However some ABIs
5895 may require the result to be returned like integer TImode. */
5896 else if (mode == TImode
5897 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5898 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
5900 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
5901 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
5902 regno = FIRST_FLOAT_REG;
5904 /* Most things go in %eax. */
5907 /* Override FP return register with %xmm0 for local functions when
5908 SSE math is enabled or for functions with sseregparm attribute. */
5909 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
5911 int sse_level = ix86_function_sseregparm (fntype, fn, false);
5912 if ((sse_level >= 1 && mode == SFmode)
5913 || (sse_level == 2 && mode == DFmode))
5914 regno = FIRST_SSE_REG;
5917 return gen_rtx_REG (orig_mode, regno);
5921 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
5926 /* Handle libcalls, which don't provide a type node. */
5927 if (valtype == NULL)
5939 return gen_rtx_REG (mode, FIRST_SSE_REG);
5942 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
5946 return gen_rtx_REG (mode, AX_REG);
5950 ret = construct_container (mode, orig_mode, valtype, 1,
5951 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5952 x86_64_int_return_registers, 0);
5954 /* For zero sized structures, construct_container returns NULL, but we
5955 need to keep rest of compiler happy by returning meaningful value. */
5957 ret = gen_rtx_REG (orig_mode, AX_REG);
5963 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
5965 unsigned int regno = AX_REG;
5969 switch (GET_MODE_SIZE (mode))
5972 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5973 && !COMPLEX_MODE_P (mode))
5974 regno = FIRST_SSE_REG;
5978 if (mode == SFmode || mode == DFmode)
5979 regno = FIRST_SSE_REG;
5985 return gen_rtx_REG (orig_mode, regno);
5989 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
5990 enum machine_mode orig_mode, enum machine_mode mode)
5992 const_tree fn, fntype;
5995 if (fntype_or_decl && DECL_P (fntype_or_decl))
5996 fn = fntype_or_decl;
5997 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
5999 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6000 return function_value_ms_64 (orig_mode, mode);
6001 else if (TARGET_64BIT)
6002 return function_value_64 (orig_mode, mode, valtype);
6004 return function_value_32 (orig_mode, mode, fntype, fn);
6008 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6009 bool outgoing ATTRIBUTE_UNUSED)
6011 enum machine_mode mode, orig_mode;
6013 orig_mode = TYPE_MODE (valtype);
6014 mode = type_natural_mode (valtype);
6015 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6019 ix86_libcall_value (enum machine_mode mode)
6021 return ix86_function_value_1 (NULL, NULL, mode, mode);
6024 /* Return true iff type is returned in memory. */
6026 static int ATTRIBUTE_UNUSED
6027 return_in_memory_32 (const_tree type, enum machine_mode mode)
6031 if (mode == BLKmode)
6034 size = int_size_in_bytes (type);
6036 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6039 if (VECTOR_MODE_P (mode) || mode == TImode)
6041 /* User-created vectors small enough to fit in EAX. */
6045 /* MMX/3dNow values are returned in MM0,
6046 except when it doesn't exits. */
6048 return (TARGET_MMX ? 0 : 1);
6050 /* SSE values are returned in XMM0, except when it doesn't exist. */
6052 return (TARGET_SSE ? 0 : 1);
6063 static int ATTRIBUTE_UNUSED
6064 return_in_memory_64 (const_tree type, enum machine_mode mode)
6066 int needed_intregs, needed_sseregs;
6067 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6070 static int ATTRIBUTE_UNUSED
6071 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6073 HOST_WIDE_INT size = int_size_in_bytes (type);
6075 /* __m128 is returned in xmm0. */
6076 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6077 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6080 /* Otherwise, the size must be exactly in [1248]. */
6081 return (size != 1 && size != 2 && size != 4 && size != 8);
6085 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6087 #ifdef SUBTARGET_RETURN_IN_MEMORY
6088 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6090 const enum machine_mode mode = type_natural_mode (type);
6092 if (TARGET_64BIT_MS_ABI)
6093 return return_in_memory_ms_64 (type, mode);
6094 else if (TARGET_64BIT)
6095 return return_in_memory_64 (type, mode);
6097 return return_in_memory_32 (type, mode);
6101 /* Return false iff TYPE is returned in memory. This version is used
6102 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6103 but differs notably in that when MMX is available, 8-byte vectors
6104 are returned in memory, rather than in MMX registers. */
6107 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6110 enum machine_mode mode = type_natural_mode (type);
6113 return return_in_memory_64 (type, mode);
6115 if (mode == BLKmode)
6118 size = int_size_in_bytes (type);
6120 if (VECTOR_MODE_P (mode))
6122 /* Return in memory only if MMX registers *are* available. This
6123 seems backwards, but it is consistent with the existing
6130 else if (mode == TImode)
6132 else if (mode == XFmode)
6138 /* When returning SSE vector types, we have a choice of either
6139 (1) being abi incompatible with a -march switch, or
6140 (2) generating an error.
6141 Given no good solution, I think the safest thing is one warning.
6142 The user won't be able to use -Werror, but....
6144 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6145 called in response to actually generating a caller or callee that
6146 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6147 via aggregate_value_p for general type probing from tree-ssa. */
6150 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6152 static bool warnedsse, warnedmmx;
6154 if (!TARGET_64BIT && type)
6156 /* Look at the return type of the function, not the function type. */
6157 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6159 if (!TARGET_SSE && !warnedsse)
6162 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6165 warning (0, "SSE vector return without SSE enabled "
6170 if (!TARGET_MMX && !warnedmmx)
6172 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6175 warning (0, "MMX vector return without MMX enabled "
6185 /* Create the va_list data type. */
6187 /* Returns the calling convention specific va_list date type.
6188 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6191 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6193 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6195 /* For i386 we use plain pointer to argument area. */
6196 if (!TARGET_64BIT || abi == MS_ABI)
6197 return build_pointer_type (char_type_node);
6199 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6200 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6202 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6203 unsigned_type_node);
6204 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6205 unsigned_type_node);
6206 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6208 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6211 va_list_gpr_counter_field = f_gpr;
6212 va_list_fpr_counter_field = f_fpr;
6214 DECL_FIELD_CONTEXT (f_gpr) = record;
6215 DECL_FIELD_CONTEXT (f_fpr) = record;
6216 DECL_FIELD_CONTEXT (f_ovf) = record;
6217 DECL_FIELD_CONTEXT (f_sav) = record;
6219 TREE_CHAIN (record) = type_decl;
6220 TYPE_NAME (record) = type_decl;
6221 TYPE_FIELDS (record) = f_gpr;
6222 TREE_CHAIN (f_gpr) = f_fpr;
6223 TREE_CHAIN (f_fpr) = f_ovf;
6224 TREE_CHAIN (f_ovf) = f_sav;
6226 layout_type (record);
6228 /* The correct type is an array type of one element. */
6229 return build_array_type (record, build_index_type (size_zero_node));
6232 /* Setup the builtin va_list data type and for 64-bit the additional
6233 calling convention specific va_list data types. */
6236 ix86_build_builtin_va_list (void)
6238 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6240 /* Initialize abi specific va_list builtin types. */
6244 if (DEFAULT_ABI == MS_ABI)
6246 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6247 if (TREE_CODE (t) != RECORD_TYPE)
6248 t = build_variant_type_copy (t);
6249 sysv_va_list_type_node = t;
6254 if (TREE_CODE (t) != RECORD_TYPE)
6255 t = build_variant_type_copy (t);
6256 sysv_va_list_type_node = t;
6258 if (DEFAULT_ABI != MS_ABI)
6260 t = ix86_build_builtin_va_list_abi (MS_ABI);
6261 if (TREE_CODE (t) != RECORD_TYPE)
6262 t = build_variant_type_copy (t);
6263 ms_va_list_type_node = t;
6268 if (TREE_CODE (t) != RECORD_TYPE)
6269 t = build_variant_type_copy (t);
6270 ms_va_list_type_node = t;
6277 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6280 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6289 int regparm = ix86_regparm;
6291 if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
6292 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6294 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
6297 /* Indicate to allocate space on the stack for varargs save area. */
6298 ix86_save_varrargs_registers = 1;
6300 save_area = frame_pointer_rtx;
6301 set = get_varargs_alias_set ();
6303 for (i = cum->regno;
6305 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6308 mem = gen_rtx_MEM (Pmode,
6309 plus_constant (save_area, i * UNITS_PER_WORD));
6310 MEM_NOTRAP_P (mem) = 1;
6311 set_mem_alias_set (mem, set);
6312 emit_move_insn (mem, gen_rtx_REG (Pmode,
6313 x86_64_int_parameter_registers[i]));
6316 if (cum->sse_nregs && cfun->va_list_fpr_size)
6318 /* Now emit code to save SSE registers. The AX parameter contains number
6319 of SSE parameter registers used to call this function. We use
6320 sse_prologue_save insn template that produces computed jump across
6321 SSE saves. We need some preparation work to get this working. */
6323 label = gen_label_rtx ();
6324 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6326 /* Compute address to jump to :
6327 label - eax*4 + nnamed_sse_arguments*4 Or
6328 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6329 tmp_reg = gen_reg_rtx (Pmode);
6330 nsse_reg = gen_reg_rtx (Pmode);
6331 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6332 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6333 gen_rtx_MULT (Pmode, nsse_reg,
6336 /* vmovaps is one byte longer than movaps. */
6338 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6339 gen_rtx_PLUS (Pmode, tmp_reg,
6345 gen_rtx_CONST (DImode,
6346 gen_rtx_PLUS (DImode,
6348 GEN_INT (cum->sse_regno
6349 * (TARGET_AVX ? 5 : 4)))));
6351 emit_move_insn (nsse_reg, label_ref);
6352 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6354 /* Compute address of memory block we save into. We always use pointer
6355 pointing 127 bytes after first byte to store - this is needed to keep
6356 instruction size limited by 4 bytes (5 bytes for AVX) with one
6357 byte displacement. */
6358 tmp_reg = gen_reg_rtx (Pmode);
6359 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6360 plus_constant (save_area,
6361 8 * X86_64_REGPARM_MAX + 127)));
6362 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6363 MEM_NOTRAP_P (mem) = 1;
6364 set_mem_alias_set (mem, set);
6365 set_mem_align (mem, BITS_PER_WORD);
6367 /* And finally do the dirty job! */
6368 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6369 GEN_INT (cum->sse_regno), label));
6374 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6376 alias_set_type set = get_varargs_alias_set ();
6379 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6383 mem = gen_rtx_MEM (Pmode,
6384 plus_constant (virtual_incoming_args_rtx,
6385 i * UNITS_PER_WORD));
6386 MEM_NOTRAP_P (mem) = 1;
6387 set_mem_alias_set (mem, set);
6389 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6390 emit_move_insn (mem, reg);
6395 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6396 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6399 CUMULATIVE_ARGS next_cum;
6402 /* This argument doesn't appear to be used anymore. Which is good,
6403 because the old code here didn't suppress rtl generation. */
6404 gcc_assert (!no_rtl);
6409 fntype = TREE_TYPE (current_function_decl);
6411 /* For varargs, we do not want to skip the dummy va_dcl argument.
6412 For stdargs, we do want to skip the last named argument. */
6414 if (stdarg_p (fntype))
6415 function_arg_advance (&next_cum, mode, type, 1);
6417 if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
6418 setup_incoming_varargs_ms_64 (&next_cum);
6420 setup_incoming_varargs_64 (&next_cum);
6423 /* Checks if TYPE is of kind va_list char *. */
6426 is_va_list_char_pointer (tree type)
6430 /* For 32-bit it is always true. */
6433 canonic = ix86_canonical_va_list_type (type);
6434 return (canonic == ms_va_list_type_node
6435 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6438 /* Implement va_start. */
6441 ix86_va_start (tree valist, rtx nextarg)
6443 HOST_WIDE_INT words, n_gpr, n_fpr;
6444 tree f_gpr, f_fpr, f_ovf, f_sav;
6445 tree gpr, fpr, ovf, sav, t;
6448 /* Only 64bit target needs something special. */
6449 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6451 std_expand_builtin_va_start (valist, nextarg);
6455 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6456 f_fpr = TREE_CHAIN (f_gpr);
6457 f_ovf = TREE_CHAIN (f_fpr);
6458 f_sav = TREE_CHAIN (f_ovf);
6460 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6461 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6462 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6463 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6464 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6466 /* Count number of gp and fp argument registers used. */
6467 words = crtl->args.info.words;
6468 n_gpr = crtl->args.info.regno;
6469 n_fpr = crtl->args.info.sse_regno;
6471 if (cfun->va_list_gpr_size)
6473 type = TREE_TYPE (gpr);
6474 t = build2 (MODIFY_EXPR, type,
6475 gpr, build_int_cst (type, n_gpr * 8));
6476 TREE_SIDE_EFFECTS (t) = 1;
6477 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6480 if (cfun->va_list_fpr_size)
6482 type = TREE_TYPE (fpr);
6483 t = build2 (MODIFY_EXPR, type, fpr,
6484 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6485 TREE_SIDE_EFFECTS (t) = 1;
6486 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6489 /* Find the overflow area. */
6490 type = TREE_TYPE (ovf);
6491 t = make_tree (type, crtl->args.internal_arg_pointer);
6493 t = build2 (POINTER_PLUS_EXPR, type, t,
6494 size_int (words * UNITS_PER_WORD));
6495 t = build2 (MODIFY_EXPR, type, ovf, t);
6496 TREE_SIDE_EFFECTS (t) = 1;
6497 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6499 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
6501 /* Find the register save area.
6502 Prologue of the function save it right above stack frame. */
6503 type = TREE_TYPE (sav);
6504 t = make_tree (type, frame_pointer_rtx);
6505 t = build2 (MODIFY_EXPR, type, sav, t);
6506 TREE_SIDE_EFFECTS (t) = 1;
6507 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6511 /* Implement va_arg. */
6514 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6517 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6518 tree f_gpr, f_fpr, f_ovf, f_sav;
6519 tree gpr, fpr, ovf, sav, t;
6521 tree lab_false, lab_over = NULL_TREE;
6526 enum machine_mode nat_mode;
6529 /* Only 64bit target needs something special. */
6530 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6531 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6533 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6534 f_fpr = TREE_CHAIN (f_gpr);
6535 f_ovf = TREE_CHAIN (f_fpr);
6536 f_sav = TREE_CHAIN (f_ovf);
6538 valist = build_va_arg_indirect_ref (valist);
6539 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6540 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6541 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6542 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6544 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6546 type = build_pointer_type (type);
6547 size = int_size_in_bytes (type);
6548 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6550 nat_mode = type_natural_mode (type);
6559 /* Unnamed 256bit vector mode parameters are passed on stack. */
6560 if (ix86_cfun_abi () == SYSV_ABI)
6567 container = construct_container (nat_mode, TYPE_MODE (type),
6568 type, 0, X86_64_REGPARM_MAX,
6569 X86_64_SSE_REGPARM_MAX, intreg,
6574 /* Pull the value out of the saved registers. */
6576 addr = create_tmp_var (ptr_type_node, "addr");
6577 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6581 int needed_intregs, needed_sseregs;
6583 tree int_addr, sse_addr;
6585 lab_false = create_artificial_label ();
6586 lab_over = create_artificial_label ();
6588 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6590 need_temp = (!REG_P (container)
6591 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6592 || TYPE_ALIGN (type) > 128));
6594 /* In case we are passing structure, verify that it is consecutive block
6595 on the register save area. If not we need to do moves. */
6596 if (!need_temp && !REG_P (container))
6598 /* Verify that all registers are strictly consecutive */
6599 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6603 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6605 rtx slot = XVECEXP (container, 0, i);
6606 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6607 || INTVAL (XEXP (slot, 1)) != i * 16)
6615 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6617 rtx slot = XVECEXP (container, 0, i);
6618 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6619 || INTVAL (XEXP (slot, 1)) != i * 8)
6631 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6632 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6633 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6634 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6637 /* First ensure that we fit completely in registers. */
6640 t = build_int_cst (TREE_TYPE (gpr),
6641 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6642 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6643 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6644 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6645 gimplify_and_add (t, pre_p);
6649 t = build_int_cst (TREE_TYPE (fpr),
6650 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6651 + X86_64_REGPARM_MAX * 8);
6652 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6653 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6654 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6655 gimplify_and_add (t, pre_p);
6658 /* Compute index to start of area used for integer regs. */
6661 /* int_addr = gpr + sav; */
6662 t = fold_convert (sizetype, gpr);
6663 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6664 gimplify_assign (int_addr, t, pre_p);
6668 /* sse_addr = fpr + sav; */
6669 t = fold_convert (sizetype, fpr);
6670 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6671 gimplify_assign (sse_addr, t, pre_p);
6676 tree temp = create_tmp_var (type, "va_arg_tmp");
6679 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6680 gimplify_assign (addr, t, pre_p);
6682 for (i = 0; i < XVECLEN (container, 0); i++)
6684 rtx slot = XVECEXP (container, 0, i);
6685 rtx reg = XEXP (slot, 0);
6686 enum machine_mode mode = GET_MODE (reg);
6687 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6688 tree addr_type = build_pointer_type (piece_type);
6691 tree dest_addr, dest;
6693 if (SSE_REGNO_P (REGNO (reg)))
6695 src_addr = sse_addr;
6696 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6700 src_addr = int_addr;
6701 src_offset = REGNO (reg) * 8;
6703 src_addr = fold_convert (addr_type, src_addr);
6704 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6705 size_int (src_offset));
6706 src = build_va_arg_indirect_ref (src_addr);
6708 dest_addr = fold_convert (addr_type, addr);
6709 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
6710 size_int (INTVAL (XEXP (slot, 1))));
6711 dest = build_va_arg_indirect_ref (dest_addr);
6713 gimplify_assign (dest, src, pre_p);
6719 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6720 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6721 gimplify_assign (gpr, t, pre_p);
6726 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6727 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6728 gimplify_assign (fpr, t, pre_p);
6731 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
6733 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
6736 /* ... otherwise out of the overflow area. */
6738 /* When we align parameter on stack for caller, if the parameter
6739 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
6740 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
6741 here with caller. */
6742 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6743 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
6744 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
6746 /* Care for on-stack alignment if needed. */
6747 if (arg_boundary <= 64
6748 || integer_zerop (TYPE_SIZE (type)))
6752 HOST_WIDE_INT align = arg_boundary / 8;
6753 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6754 size_int (align - 1));
6755 t = fold_convert (sizetype, t);
6756 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6758 t = fold_convert (TREE_TYPE (ovf), t);
6760 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6761 gimplify_assign (addr, t, pre_p);
6763 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6764 size_int (rsize * UNITS_PER_WORD));
6765 gimplify_assign (unshare_expr (ovf), t, pre_p);
6768 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
6770 ptrtype = build_pointer_type (type);
6771 addr = fold_convert (ptrtype, addr);
6774 addr = build_va_arg_indirect_ref (addr);
6775 return build_va_arg_indirect_ref (addr);
6778 /* Return nonzero if OPNUM's MEM should be matched
6779 in movabs* patterns. */
6782 ix86_check_movabs (rtx insn, int opnum)
6786 set = PATTERN (insn);
6787 if (GET_CODE (set) == PARALLEL)
6788 set = XVECEXP (set, 0, 0);
6789 gcc_assert (GET_CODE (set) == SET);
6790 mem = XEXP (set, opnum);
6791 while (GET_CODE (mem) == SUBREG)
6792 mem = SUBREG_REG (mem);
6793 gcc_assert (MEM_P (mem));
6794 return (volatile_ok || !MEM_VOLATILE_P (mem));
6797 /* Initialize the table of extra 80387 mathematical constants. */
6800 init_ext_80387_constants (void)
6802 static const char * cst[5] =
6804 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
6805 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
6806 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
6807 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
6808 "3.1415926535897932385128089594061862044", /* 4: fldpi */
6812 for (i = 0; i < 5; i++)
6814 real_from_string (&ext_80387_constants_table[i], cst[i]);
6815 /* Ensure each constant is rounded to XFmode precision. */
6816 real_convert (&ext_80387_constants_table[i],
6817 XFmode, &ext_80387_constants_table[i]);
6820 ext_80387_constants_init = 1;
6823 /* Return true if the constant is something that can be loaded with
6824 a special instruction. */
6827 standard_80387_constant_p (rtx x)
6829 enum machine_mode mode = GET_MODE (x);
6833 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
6836 if (x == CONST0_RTX (mode))
6838 if (x == CONST1_RTX (mode))
6841 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6843 /* For XFmode constants, try to find a special 80387 instruction when
6844 optimizing for size or on those CPUs that benefit from them. */
6846 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
6850 if (! ext_80387_constants_init)
6851 init_ext_80387_constants ();
6853 for (i = 0; i < 5; i++)
6854 if (real_identical (&r, &ext_80387_constants_table[i]))
6858 /* Load of the constant -0.0 or -1.0 will be split as
6859 fldz;fchs or fld1;fchs sequence. */
6860 if (real_isnegzero (&r))
6862 if (real_identical (&r, &dconstm1))
6868 /* Return the opcode of the special instruction to be used to load
6872 standard_80387_constant_opcode (rtx x)
6874 switch (standard_80387_constant_p (x))
6898 /* Return the CONST_DOUBLE representing the 80387 constant that is
6899 loaded by the specified special instruction. The argument IDX
6900 matches the return value from standard_80387_constant_p. */
6903 standard_80387_constant_rtx (int idx)
6907 if (! ext_80387_constants_init)
6908 init_ext_80387_constants ();
6924 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
6928 /* Return 1 if mode is a valid mode for sse. */
6930 standard_sse_mode_p (enum machine_mode mode)
6947 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
6948 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
6949 modes and AVX is enabled. */
6952 standard_sse_constant_p (rtx x)
6954 enum machine_mode mode = GET_MODE (x);
6956 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
6958 if (vector_all_ones_operand (x, mode))
6960 if (standard_sse_mode_p (mode))
6961 return TARGET_SSE2 ? 2 : -2;
6962 else if (VALID_AVX256_REG_MODE (mode))
6963 return TARGET_AVX ? 3 : -3;
6969 /* Return the opcode of the special instruction to be used to load
6973 standard_sse_constant_opcode (rtx insn, rtx x)
6975 switch (standard_sse_constant_p (x))
6978 switch (get_attr_mode (insn))
6981 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
6983 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
6985 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
6987 return "vxorps\t%x0, %x0, %x0";
6989 return "vxorpd\t%x0, %x0, %x0";
6991 return "vpxor\t%x0, %x0, %x0";
6997 switch (get_attr_mode (insn))
7002 return "vpcmpeqd\t%0, %0, %0";
7008 return "pcmpeqd\t%0, %0";
7013 /* Returns 1 if OP contains a symbol reference */
7016 symbolic_reference_mentioned_p (rtx op)
7021 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7024 fmt = GET_RTX_FORMAT (GET_CODE (op));
7025 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7031 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7032 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7036 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7043 /* Return 1 if it is appropriate to emit `ret' instructions in the
7044 body of a function. Do this only if the epilogue is simple, needing a
7045 couple of insns. Prior to reloading, we can't tell how many registers
7046 must be saved, so return 0 then. Return 0 if there is no frame
7047 marker to de-allocate. */
7050 ix86_can_use_return_insn_p (void)
7052 struct ix86_frame frame;
7054 if (! reload_completed || frame_pointer_needed)
7057 /* Don't allow more than 32 pop, since that's all we can do
7058 with one instruction. */
7059 if (crtl->args.pops_args
7060 && crtl->args.size >= 32768)
7063 ix86_compute_frame_layout (&frame);
7064 return frame.to_allocate == 0 && frame.nregs == 0;
7067 /* Value should be nonzero if functions must have frame pointers.
7068 Zero means the frame pointer need not be set up (and parms may
7069 be accessed via the stack pointer) in functions that seem suitable. */
7072 ix86_frame_pointer_required (void)
7074 /* If we accessed previous frames, then the generated code expects
7075 to be able to access the saved ebp value in our frame. */
7076 if (cfun->machine->accesses_prev_frame)
7079 /* Several x86 os'es need a frame pointer for other reasons,
7080 usually pertaining to setjmp. */
7081 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7084 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7085 the frame pointer by default. Turn it back on now if we've not
7086 got a leaf function. */
7087 if (TARGET_OMIT_LEAF_FRAME_POINTER
7088 && (!current_function_is_leaf
7089 || ix86_current_function_calls_tls_descriptor))
7098 /* Record that the current function accesses previous call frames. */
7101 ix86_setup_frame_addresses (void)
7103 cfun->machine->accesses_prev_frame = 1;
7106 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7107 # define USE_HIDDEN_LINKONCE 1
7109 # define USE_HIDDEN_LINKONCE 0
7112 static int pic_labels_used;
7114 /* Fills in the label name that should be used for a pc thunk for
7115 the given register. */
7118 get_pc_thunk_name (char name[32], unsigned int regno)
7120 gcc_assert (!TARGET_64BIT);
7122 if (USE_HIDDEN_LINKONCE)
7123 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7125 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7129 /* This function generates code for -fpic that loads %ebx with
7130 the return address of the caller and then returns. */
7133 ix86_file_end (void)
7138 for (regno = 0; regno < 8; ++regno)
7142 if (! ((pic_labels_used >> regno) & 1))
7145 get_pc_thunk_name (name, regno);
7150 switch_to_section (darwin_sections[text_coal_section]);
7151 fputs ("\t.weak_definition\t", asm_out_file);
7152 assemble_name (asm_out_file, name);
7153 fputs ("\n\t.private_extern\t", asm_out_file);
7154 assemble_name (asm_out_file, name);
7155 fputs ("\n", asm_out_file);
7156 ASM_OUTPUT_LABEL (asm_out_file, name);
7160 if (USE_HIDDEN_LINKONCE)
7164 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7166 TREE_PUBLIC (decl) = 1;
7167 TREE_STATIC (decl) = 1;
7168 DECL_ONE_ONLY (decl) = 1;
7170 (*targetm.asm_out.unique_section) (decl, 0);
7171 switch_to_section (get_named_section (decl, NULL, 0));
7173 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7174 fputs ("\t.hidden\t", asm_out_file);
7175 assemble_name (asm_out_file, name);
7176 fputc ('\n', asm_out_file);
7177 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7181 switch_to_section (text_section);
7182 ASM_OUTPUT_LABEL (asm_out_file, name);
7185 xops[0] = gen_rtx_REG (Pmode, regno);
7186 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7187 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7188 output_asm_insn ("ret", xops);
7191 if (NEED_INDICATE_EXEC_STACK)
7192 file_end_indicate_exec_stack ();
7195 /* Emit code for the SET_GOT patterns. */
7198 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7204 if (TARGET_VXWORKS_RTP && flag_pic)
7206 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7207 xops[2] = gen_rtx_MEM (Pmode,
7208 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7209 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7211 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7212 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7213 an unadorned address. */
7214 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7215 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7216 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7220 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7222 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7224 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7227 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7229 output_asm_insn ("call\t%a2", xops);
7232 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7233 is what will be referenced by the Mach-O PIC subsystem. */
7235 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7238 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7239 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7242 output_asm_insn ("pop%z0\t%0", xops);
7247 get_pc_thunk_name (name, REGNO (dest));
7248 pic_labels_used |= 1 << REGNO (dest);
7250 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7251 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7252 output_asm_insn ("call\t%X2", xops);
7253 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7254 is what will be referenced by the Mach-O PIC subsystem. */
7257 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7259 targetm.asm_out.internal_label (asm_out_file, "L",
7260 CODE_LABEL_NUMBER (label));
7267 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7268 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7270 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7275 /* Generate an "push" pattern for input ARG. */
7280 return gen_rtx_SET (VOIDmode,
7282 gen_rtx_PRE_DEC (Pmode,
7283 stack_pointer_rtx)),
7287 /* Return >= 0 if there is an unused call-clobbered register available
7288 for the entire function. */
7291 ix86_select_alt_pic_regnum (void)
7293 if (current_function_is_leaf && !crtl->profile
7294 && !ix86_current_function_calls_tls_descriptor)
7297 /* Can't use the same register for both PIC and DRAP. */
7299 drap = REGNO (crtl->drap_reg);
7302 for (i = 2; i >= 0; --i)
7303 if (i != drap && !df_regs_ever_live_p (i))
7307 return INVALID_REGNUM;
7310 /* Return 1 if we need to save REGNO. */
7312 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7314 if (pic_offset_table_rtx
7315 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7316 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7318 || crtl->calls_eh_return
7319 || crtl->uses_const_pool))
7321 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7326 if (crtl->calls_eh_return && maybe_eh_return)
7331 unsigned test = EH_RETURN_DATA_REGNO (i);
7332 if (test == INVALID_REGNUM)
7340 && regno == REGNO (crtl->drap_reg))
7343 return (df_regs_ever_live_p (regno)
7344 && !call_used_regs[regno]
7345 && !fixed_regs[regno]
7346 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7349 /* Return number of registers to be saved on the stack. */
7352 ix86_nsaved_regs (void)
7357 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7358 if (ix86_save_reg (regno, true))
7363 /* Given FROM and TO register numbers, say whether this elimination is
7364 allowed. If stack alignment is needed, we can only replace argument
7365 pointer with hard frame pointer, or replace frame pointer with stack
7366 pointer. Otherwise, frame pointer elimination is automatically
7367 handled and all other eliminations are valid. */
7370 ix86_can_eliminate (int from, int to)
7372 if (stack_realign_fp)
7373 return ((from == ARG_POINTER_REGNUM
7374 && to == HARD_FRAME_POINTER_REGNUM)
7375 || (from == FRAME_POINTER_REGNUM
7376 && to == STACK_POINTER_REGNUM));
7378 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7381 /* Return the offset between two registers, one to be eliminated, and the other
7382 its replacement, at the start of a routine. */
7385 ix86_initial_elimination_offset (int from, int to)
7387 struct ix86_frame frame;
7388 ix86_compute_frame_layout (&frame);
7390 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7391 return frame.hard_frame_pointer_offset;
7392 else if (from == FRAME_POINTER_REGNUM
7393 && to == HARD_FRAME_POINTER_REGNUM)
7394 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7397 gcc_assert (to == STACK_POINTER_REGNUM);
7399 if (from == ARG_POINTER_REGNUM)
7400 return frame.stack_pointer_offset;
7402 gcc_assert (from == FRAME_POINTER_REGNUM);
7403 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7407 /* Fill structure ix86_frame about frame of currently computed function. */
7410 ix86_compute_frame_layout (struct ix86_frame *frame)
7412 HOST_WIDE_INT total_size;
7413 unsigned int stack_alignment_needed;
7414 HOST_WIDE_INT offset;
7415 unsigned int preferred_alignment;
7416 HOST_WIDE_INT size = get_frame_size ();
7418 frame->nregs = ix86_nsaved_regs ();
7421 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7422 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7424 gcc_assert (!size || stack_alignment_needed);
7425 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7426 gcc_assert (preferred_alignment <= stack_alignment_needed);
7428 /* During reload iteration the amount of registers saved can change.
7429 Recompute the value as needed. Do not recompute when amount of registers
7430 didn't change as reload does multiple calls to the function and does not
7431 expect the decision to change within single iteration. */
7432 if (!optimize_function_for_size_p (cfun)
7433 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7435 int count = frame->nregs;
7437 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7438 /* The fast prologue uses move instead of push to save registers. This
7439 is significantly longer, but also executes faster as modern hardware
7440 can execute the moves in parallel, but can't do that for push/pop.
7442 Be careful about choosing what prologue to emit: When function takes
7443 many instructions to execute we may use slow version as well as in
7444 case function is known to be outside hot spot (this is known with
7445 feedback only). Weight the size of function by number of registers
7446 to save as it is cheap to use one or two push instructions but very
7447 slow to use many of them. */
7449 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7450 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7451 || (flag_branch_probabilities
7452 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7453 cfun->machine->use_fast_prologue_epilogue = false;
7455 cfun->machine->use_fast_prologue_epilogue
7456 = !expensive_function_p (count);
7458 if (TARGET_PROLOGUE_USING_MOVE
7459 && cfun->machine->use_fast_prologue_epilogue)
7460 frame->save_regs_using_mov = true;
7462 frame->save_regs_using_mov = false;
7465 /* Skip return address and saved base pointer. */
7466 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7468 frame->hard_frame_pointer_offset = offset;
7470 /* Set offset to aligned because the realigned frame starts from
7472 if (stack_realign_fp)
7473 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7475 /* Register save area */
7476 offset += frame->nregs * UNITS_PER_WORD;
7479 if (ix86_save_varrargs_registers)
7481 offset += X86_64_VARARGS_SIZE;
7482 frame->va_arg_size = X86_64_VARARGS_SIZE;
7485 frame->va_arg_size = 0;
7487 /* Align start of frame for local function. */
7488 frame->padding1 = ((offset + stack_alignment_needed - 1)
7489 & -stack_alignment_needed) - offset;
7491 offset += frame->padding1;
7493 /* Frame pointer points here. */
7494 frame->frame_pointer_offset = offset;
7498 /* Add outgoing arguments area. Can be skipped if we eliminated
7499 all the function calls as dead code.
7500 Skipping is however impossible when function calls alloca. Alloca
7501 expander assumes that last crtl->outgoing_args_size
7502 of stack frame are unused. */
7503 if (ACCUMULATE_OUTGOING_ARGS
7504 && (!current_function_is_leaf || cfun->calls_alloca
7505 || ix86_current_function_calls_tls_descriptor))
7507 offset += crtl->outgoing_args_size;
7508 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7511 frame->outgoing_arguments_size = 0;
7513 /* Align stack boundary. Only needed if we're calling another function
7515 if (!current_function_is_leaf || cfun->calls_alloca
7516 || ix86_current_function_calls_tls_descriptor)
7517 frame->padding2 = ((offset + preferred_alignment - 1)
7518 & -preferred_alignment) - offset;
7520 frame->padding2 = 0;
7522 offset += frame->padding2;
7524 /* We've reached end of stack frame. */
7525 frame->stack_pointer_offset = offset;
7527 /* Size prologue needs to allocate. */
7528 frame->to_allocate =
7529 (size + frame->padding1 + frame->padding2
7530 + frame->outgoing_arguments_size + frame->va_arg_size);
7532 if ((!frame->to_allocate && frame->nregs <= 1)
7533 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7534 frame->save_regs_using_mov = false;
7536 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7537 && current_function_is_leaf
7538 && !ix86_current_function_calls_tls_descriptor)
7540 frame->red_zone_size = frame->to_allocate;
7541 if (frame->save_regs_using_mov)
7542 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7543 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7544 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7547 frame->red_zone_size = 0;
7548 frame->to_allocate -= frame->red_zone_size;
7549 frame->stack_pointer_offset -= frame->red_zone_size;
7551 fprintf (stderr, "\n");
7552 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7553 fprintf (stderr, "size: %ld\n", (long)size);
7554 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7555 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7556 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7557 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7558 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7559 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7560 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7561 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7562 (long)frame->hard_frame_pointer_offset);
7563 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7564 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7565 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7566 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7570 /* Emit code to save registers in the prologue. */
7573 ix86_emit_save_regs (void)
7578 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
7579 if (ix86_save_reg (regno, true))
7581 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7582 RTX_FRAME_RELATED_P (insn) = 1;
7586 /* Emit code to save registers using MOV insns. First register
7587 is restored from POINTER + OFFSET. */
7589 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7594 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7595 if (ix86_save_reg (regno, true))
7597 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7599 gen_rtx_REG (Pmode, regno));
7600 RTX_FRAME_RELATED_P (insn) = 1;
7601 offset += UNITS_PER_WORD;
7605 /* Expand prologue or epilogue stack adjustment.
7606 The pattern exist to put a dependency on all ebp-based memory accesses.
7607 STYLE should be negative if instructions should be marked as frame related,
7608 zero if %r11 register is live and cannot be freely used and positive
7612 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7617 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7618 else if (x86_64_immediate_operand (offset, DImode))
7619 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7623 /* r11 is used by indirect sibcall return as well, set before the
7624 epilogue and used after the epilogue. ATM indirect sibcall
7625 shouldn't be used together with huge frame sizes in one
7626 function because of the frame_size check in sibcall.c. */
7628 r11 = gen_rtx_REG (DImode, R11_REG);
7629 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7631 RTX_FRAME_RELATED_P (insn) = 1;
7632 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7636 RTX_FRAME_RELATED_P (insn) = 1;
7639 /* Find an available register to be used as dynamic realign argument
7640 pointer regsiter. Such a register will be written in prologue and
7641 used in begin of body, so it must not be
7642 1. parameter passing register.
7644 We reuse static-chain register if it is available. Otherwise, we
7645 use DI for i386 and R13 for x86-64. We chose R13 since it has
7648 Return: the regno of chosen register. */
7651 find_drap_reg (void)
7653 tree decl = cfun->decl;
7657 /* Use R13 for nested function or function need static chain.
7658 Since function with tail call may use any caller-saved
7659 registers in epilogue, DRAP must not use caller-saved
7660 register in such case. */
7661 if ((decl_function_context (decl)
7662 && !DECL_NO_STATIC_CHAIN (decl))
7663 || crtl->tail_call_emit)
7670 /* Use DI for nested function or function need static chain.
7671 Since function with tail call may use any caller-saved
7672 registers in epilogue, DRAP must not use caller-saved
7673 register in such case. */
7674 if ((decl_function_context (decl)
7675 && !DECL_NO_STATIC_CHAIN (decl))
7676 || crtl->tail_call_emit)
7679 /* Reuse static chain register if it isn't used for parameter
7681 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
7682 && !lookup_attribute ("fastcall",
7683 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
7690 /* Update incoming stack boundary and estimated stack alignment. */
7693 ix86_update_stack_boundary (void)
7695 /* Prefer the one specified at command line. */
7696 ix86_incoming_stack_boundary
7697 = (ix86_user_incoming_stack_boundary
7698 ? ix86_user_incoming_stack_boundary
7699 : ix86_default_incoming_stack_boundary);
7701 /* Incoming stack alignment can be changed on individual functions
7702 via force_align_arg_pointer attribute. We use the smallest
7703 incoming stack boundary. */
7704 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
7705 && lookup_attribute (ix86_force_align_arg_pointer_string,
7706 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7707 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
7709 /* Stack at entrance of main is aligned by runtime. We use the
7710 smallest incoming stack boundary. */
7711 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
7712 && DECL_NAME (current_function_decl)
7713 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7714 && DECL_FILE_SCOPE_P (current_function_decl))
7715 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7717 /* x86_64 vararg needs 16byte stack alignment for register save
7721 && crtl->stack_alignment_estimated < 128)
7722 crtl->stack_alignment_estimated = 128;
7725 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7726 needed or an rtx for DRAP otherwise. */
7729 ix86_get_drap_rtx (void)
7731 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
7732 crtl->need_drap = true;
7734 if (stack_realign_drap)
7736 /* Assign DRAP to vDRAP and returns vDRAP */
7737 unsigned int regno = find_drap_reg ();
7742 arg_ptr = gen_rtx_REG (Pmode, regno);
7743 crtl->drap_reg = arg_ptr;
7746 drap_vreg = copy_to_reg (arg_ptr);
7750 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7751 RTX_FRAME_RELATED_P (insn) = 1;
7758 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7761 ix86_internal_arg_pointer (void)
7763 return virtual_incoming_args_rtx;
7766 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
7767 This is called from dwarf2out.c to emit call frame instructions
7768 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
7770 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
7772 rtx unspec = SET_SRC (pattern);
7773 gcc_assert (GET_CODE (unspec) == UNSPEC);
7777 case UNSPEC_REG_SAVE:
7778 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
7779 SET_DEST (pattern));
7781 case UNSPEC_DEF_CFA:
7782 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
7783 INTVAL (XVECEXP (unspec, 0, 0)));
7790 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
7791 to be generated in correct form. */
7793 ix86_finalize_stack_realign_flags (void)
7795 /* Check if stack realign is really needed after reload, and
7796 stores result in cfun */
7797 unsigned int incoming_stack_boundary
7798 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7799 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7800 unsigned int stack_realign = (incoming_stack_boundary
7801 < (current_function_is_leaf
7802 ? crtl->max_used_stack_slot_alignment
7803 : crtl->stack_alignment_needed));
7805 if (crtl->stack_realign_finalized)
7807 /* After stack_realign_needed is finalized, we can't no longer
7809 gcc_assert (crtl->stack_realign_needed == stack_realign);
7813 crtl->stack_realign_needed = stack_realign;
7814 crtl->stack_realign_finalized = true;
7818 /* Expand the prologue into a bunch of separate insns. */
7821 ix86_expand_prologue (void)
7825 struct ix86_frame frame;
7826 HOST_WIDE_INT allocate;
7828 ix86_finalize_stack_realign_flags ();
7830 /* DRAP should not coexist with stack_realign_fp */
7831 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
7833 ix86_compute_frame_layout (&frame);
7835 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7836 of DRAP is needed and stack realignment is really needed after reload */
7837 if (crtl->drap_reg && crtl->stack_realign_needed)
7840 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7841 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
7842 ? 0 : UNITS_PER_WORD);
7844 gcc_assert (stack_realign_drap);
7846 /* Grab the argument pointer. */
7847 x = plus_constant (stack_pointer_rtx,
7848 (UNITS_PER_WORD + param_ptr_offset));
7851 /* Only need to push parameter pointer reg if it is caller
7853 if (!call_used_regs[REGNO (crtl->drap_reg)])
7855 /* Push arg pointer reg */
7856 insn = emit_insn (gen_push (y));
7857 RTX_FRAME_RELATED_P (insn) = 1;
7860 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
7861 RTX_FRAME_RELATED_P (insn) = 1;
7863 /* Align the stack. */
7864 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7866 GEN_INT (-align_bytes)));
7867 RTX_FRAME_RELATED_P (insn) = 1;
7869 /* Replicate the return address on the stack so that return
7870 address can be reached via (argp - 1) slot. This is needed
7871 to implement macro RETURN_ADDR_RTX and intrinsic function
7872 expand_builtin_return_addr etc. */
7874 x = gen_frame_mem (Pmode,
7875 plus_constant (x, -UNITS_PER_WORD));
7876 insn = emit_insn (gen_push (x));
7877 RTX_FRAME_RELATED_P (insn) = 1;
7880 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7881 slower on all targets. Also sdb doesn't like it. */
7883 if (frame_pointer_needed)
7885 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
7886 RTX_FRAME_RELATED_P (insn) = 1;
7888 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
7889 RTX_FRAME_RELATED_P (insn) = 1;
7892 if (stack_realign_fp)
7894 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7895 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
7897 /* Align the stack. */
7898 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7900 GEN_INT (-align_bytes)));
7901 RTX_FRAME_RELATED_P (insn) = 1;
7904 allocate = frame.to_allocate;
7906 if (!frame.save_regs_using_mov)
7907 ix86_emit_save_regs ();
7909 allocate += frame.nregs * UNITS_PER_WORD;
7911 /* When using red zone we may start register saving before allocating
7912 the stack frame saving one cycle of the prologue. However I will
7913 avoid doing this if I am going to have to probe the stack since
7914 at least on x86_64 the stack probe can turn into a call that clobbers
7915 a red zone location */
7916 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
7917 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
7918 ix86_emit_save_regs_using_mov ((frame_pointer_needed
7919 && !crtl->stack_realign_needed)
7920 ? hard_frame_pointer_rtx
7921 : stack_pointer_rtx,
7922 -frame.nregs * UNITS_PER_WORD);
7926 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
7927 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7928 GEN_INT (-allocate), -1);
7931 /* Only valid for Win32. */
7932 rtx eax = gen_rtx_REG (Pmode, AX_REG);
7936 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
7938 if (cfun->machine->call_abi == MS_ABI)
7941 eax_live = ix86_eax_live_at_start_p ();
7945 emit_insn (gen_push (eax));
7946 allocate -= UNITS_PER_WORD;
7949 emit_move_insn (eax, GEN_INT (allocate));
7952 insn = gen_allocate_stack_worker_64 (eax);
7954 insn = gen_allocate_stack_worker_32 (eax);
7955 insn = emit_insn (insn);
7956 RTX_FRAME_RELATED_P (insn) = 1;
7957 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
7958 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
7959 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
7960 t, REG_NOTES (insn));
7964 if (frame_pointer_needed)
7965 t = plus_constant (hard_frame_pointer_rtx,
7968 - frame.nregs * UNITS_PER_WORD);
7970 t = plus_constant (stack_pointer_rtx, allocate);
7971 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
7975 if (frame.save_regs_using_mov
7976 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
7977 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
7979 if (!frame_pointer_needed
7980 || !frame.to_allocate
7981 || crtl->stack_realign_needed)
7982 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
7985 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
7986 -frame.nregs * UNITS_PER_WORD);
7989 pic_reg_used = false;
7990 if (pic_offset_table_rtx
7991 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7994 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
7996 if (alt_pic_reg_used != INVALID_REGNUM)
7997 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
7999 pic_reg_used = true;
8006 if (ix86_cmodel == CM_LARGE_PIC)
8008 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8009 rtx label = gen_label_rtx ();
8011 LABEL_PRESERVE_P (label) = 1;
8012 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8013 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8014 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8015 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8016 pic_offset_table_rtx, tmp_reg));
8019 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8022 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8025 /* Prevent function calls from being scheduled before the call to mcount.
8026 In the pic_reg_used case, make sure that the got load isn't deleted. */
8030 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8031 emit_insn (gen_blockage ());
8034 if (crtl->drap_reg && !crtl->stack_realign_needed)
8036 /* vDRAP is setup but after reload it turns out stack realign
8037 isn't necessary, here we will emit prologue to setup DRAP
8038 without stack realign adjustment */
8039 int drap_bp_offset = UNITS_PER_WORD * 2;
8040 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8041 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8044 /* Emit cld instruction if stringops are used in the function. */
8045 if (TARGET_CLD && ix86_current_function_needs_cld)
8046 emit_insn (gen_cld ());
8049 /* Emit code to restore saved registers using MOV insns. First register
8050 is restored from POINTER + OFFSET. */
8052 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8053 int maybe_eh_return)
8056 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8058 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8059 if (ix86_save_reg (regno, maybe_eh_return))
8061 /* Ensure that adjust_address won't be forced to produce pointer
8062 out of range allowed by x86-64 instruction set. */
8063 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8067 r11 = gen_rtx_REG (DImode, R11_REG);
8068 emit_move_insn (r11, GEN_INT (offset));
8069 emit_insn (gen_adddi3 (r11, r11, pointer));
8070 base_address = gen_rtx_MEM (Pmode, r11);
8073 emit_move_insn (gen_rtx_REG (Pmode, regno),
8074 adjust_address (base_address, Pmode, offset));
8075 offset += UNITS_PER_WORD;
8079 /* Restore function stack, frame, and registers. */
8082 ix86_expand_epilogue (int style)
8086 struct ix86_frame frame;
8087 HOST_WIDE_INT offset;
8089 ix86_finalize_stack_realign_flags ();
8091 /* When stack is realigned, SP must be valid. */
8092 sp_valid = (!frame_pointer_needed
8093 || current_function_sp_is_unchanging
8094 || stack_realign_fp);
8096 ix86_compute_frame_layout (&frame);
8098 /* Calculate start of saved registers relative to ebp. Special care
8099 must be taken for the normal return case of a function using
8100 eh_return: the eax and edx registers are marked as saved, but not
8101 restored along this path. */
8102 offset = frame.nregs;
8103 if (crtl->calls_eh_return && style != 2)
8105 offset *= -UNITS_PER_WORD;
8107 /* If we're only restoring one register and sp is not valid then
8108 using a move instruction to restore the register since it's
8109 less work than reloading sp and popping the register.
8111 The default code result in stack adjustment using add/lea instruction,
8112 while this code results in LEAVE instruction (or discrete equivalent),
8113 so it is profitable in some other cases as well. Especially when there
8114 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8115 and there is exactly one register to pop. This heuristic may need some
8116 tuning in future. */
8117 if ((!sp_valid && frame.nregs <= 1)
8118 || (TARGET_EPILOGUE_USING_MOVE
8119 && cfun->machine->use_fast_prologue_epilogue
8120 && (frame.nregs > 1 || frame.to_allocate))
8121 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
8122 || (frame_pointer_needed && TARGET_USE_LEAVE
8123 && cfun->machine->use_fast_prologue_epilogue
8124 && frame.nregs == 1)
8125 || crtl->calls_eh_return)
8127 /* Restore registers. We can use ebp or esp to address the memory
8128 locations. If both are available, default to ebp, since offsets
8129 are known to be small. Only exception is esp pointing directly
8130 to the end of block of saved registers, where we may simplify
8133 If we are realigning stack with bp and sp, regs restore can't
8134 be addressed by bp. sp must be used instead. */
8136 if (!frame_pointer_needed
8137 || (sp_valid && !frame.to_allocate)
8138 || stack_realign_fp)
8139 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8140 frame.to_allocate, style == 2);
8142 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8143 offset, style == 2);
8145 /* eh_return epilogues need %ecx added to the stack pointer. */
8148 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8150 /* Stack align doesn't work with eh_return. */
8151 gcc_assert (!crtl->stack_realign_needed);
8153 if (frame_pointer_needed)
8155 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8156 tmp = plus_constant (tmp, UNITS_PER_WORD);
8157 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8159 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8160 emit_move_insn (hard_frame_pointer_rtx, tmp);
8162 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8167 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8168 tmp = plus_constant (tmp, (frame.to_allocate
8169 + frame.nregs * UNITS_PER_WORD));
8170 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8173 else if (!frame_pointer_needed)
8174 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8175 GEN_INT (frame.to_allocate
8176 + frame.nregs * UNITS_PER_WORD),
8178 /* If not an i386, mov & pop is faster than "leave". */
8179 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8180 || !cfun->machine->use_fast_prologue_epilogue)
8181 emit_insn ((*ix86_gen_leave) ());
8184 pro_epilogue_adjust_stack (stack_pointer_rtx,
8185 hard_frame_pointer_rtx,
8188 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8193 /* First step is to deallocate the stack frame so that we can
8196 If we realign stack with frame pointer, then stack pointer
8197 won't be able to recover via lea $offset(%bp), %sp, because
8198 there is a padding area between bp and sp for realign.
8199 "add $to_allocate, %sp" must be used instead. */
8202 gcc_assert (frame_pointer_needed);
8203 gcc_assert (!stack_realign_fp);
8204 pro_epilogue_adjust_stack (stack_pointer_rtx,
8205 hard_frame_pointer_rtx,
8206 GEN_INT (offset), style);
8208 else if (frame.to_allocate)
8209 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8210 GEN_INT (frame.to_allocate), style);
8212 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8213 if (ix86_save_reg (regno, false))
8214 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8215 if (frame_pointer_needed)
8217 /* Leave results in shorter dependency chains on CPUs that are
8218 able to grok it fast. */
8219 if (TARGET_USE_LEAVE)
8220 emit_insn ((*ix86_gen_leave) ());
8223 /* For stack realigned really happens, recover stack
8224 pointer to hard frame pointer is a must, if not using
8226 if (stack_realign_fp)
8227 pro_epilogue_adjust_stack (stack_pointer_rtx,
8228 hard_frame_pointer_rtx,
8230 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8235 if (crtl->drap_reg && crtl->stack_realign_needed)
8237 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8238 ? 0 : UNITS_PER_WORD);
8239 gcc_assert (stack_realign_drap);
8240 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8242 GEN_INT (-(UNITS_PER_WORD
8243 + param_ptr_offset))));
8244 if (!call_used_regs[REGNO (crtl->drap_reg)])
8245 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8249 /* Sibcall epilogues don't want a return instruction. */
8253 if (crtl->args.pops_args && crtl->args.size)
8255 rtx popc = GEN_INT (crtl->args.pops_args);
8257 /* i386 can only pop 64K bytes. If asked to pop more, pop
8258 return address, do explicit add, and jump indirectly to the
8261 if (crtl->args.pops_args >= 65536)
8263 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8265 /* There is no "pascal" calling convention in any 64bit ABI. */
8266 gcc_assert (!TARGET_64BIT);
8268 emit_insn (gen_popsi1 (ecx));
8269 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8270 emit_jump_insn (gen_return_indirect_internal (ecx));
8273 emit_jump_insn (gen_return_pop_internal (popc));
8276 emit_jump_insn (gen_return_internal ());
8279 /* Reset from the function's potential modifications. */
8282 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8283 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8285 if (pic_offset_table_rtx)
8286 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8288 /* Mach-O doesn't support labels at the end of objects, so if
8289 it looks like we might want one, insert a NOP. */
8291 rtx insn = get_last_insn ();
8294 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8295 insn = PREV_INSN (insn);
8299 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8300 fputs ("\tnop\n", file);
8306 /* Extract the parts of an RTL expression that is a valid memory address
8307 for an instruction. Return 0 if the structure of the address is
8308 grossly off. Return -1 if the address contains ASHIFT, so it is not
8309 strictly valid, but still used for computing length of lea instruction. */
8312 ix86_decompose_address (rtx addr, struct ix86_address *out)
8314 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8315 rtx base_reg, index_reg;
8316 HOST_WIDE_INT scale = 1;
8317 rtx scale_rtx = NULL_RTX;
8319 enum ix86_address_seg seg = SEG_DEFAULT;
8321 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8323 else if (GET_CODE (addr) == PLUS)
8333 addends[n++] = XEXP (op, 1);
8336 while (GET_CODE (op) == PLUS);
8341 for (i = n; i >= 0; --i)
8344 switch (GET_CODE (op))
8349 index = XEXP (op, 0);
8350 scale_rtx = XEXP (op, 1);
8354 if (XINT (op, 1) == UNSPEC_TP
8355 && TARGET_TLS_DIRECT_SEG_REFS
8356 && seg == SEG_DEFAULT)
8357 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8386 else if (GET_CODE (addr) == MULT)
8388 index = XEXP (addr, 0); /* index*scale */
8389 scale_rtx = XEXP (addr, 1);
8391 else if (GET_CODE (addr) == ASHIFT)
8395 /* We're called for lea too, which implements ashift on occasion. */
8396 index = XEXP (addr, 0);
8397 tmp = XEXP (addr, 1);
8398 if (!CONST_INT_P (tmp))
8400 scale = INTVAL (tmp);
8401 if ((unsigned HOST_WIDE_INT) scale > 3)
8407 disp = addr; /* displacement */
8409 /* Extract the integral value of scale. */
8412 if (!CONST_INT_P (scale_rtx))
8414 scale = INTVAL (scale_rtx);
8417 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8418 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8420 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8421 if (base_reg && index_reg && scale == 1
8422 && (index_reg == arg_pointer_rtx
8423 || index_reg == frame_pointer_rtx
8424 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8427 tmp = base, base = index, index = tmp;
8428 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8431 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8432 if ((base_reg == hard_frame_pointer_rtx
8433 || base_reg == frame_pointer_rtx
8434 || base_reg == arg_pointer_rtx) && !disp)
8437 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8438 Avoid this by transforming to [%esi+0].
8439 Reload calls address legitimization without cfun defined, so we need
8440 to test cfun for being non-NULL. */
8441 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8442 && base_reg && !index_reg && !disp
8444 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8447 /* Special case: encode reg+reg instead of reg*2. */
8448 if (!base && index && scale && scale == 2)
8449 base = index, base_reg = index_reg, scale = 1;
8451 /* Special case: scaling cannot be encoded without base or displacement. */
8452 if (!base && !disp && index && scale != 1)
8464 /* Return cost of the memory address x.
8465 For i386, it is better to use a complex address than let gcc copy
8466 the address into a reg and make a new pseudo. But not if the address
8467 requires to two regs - that would mean more pseudos with longer
8470 ix86_address_cost (rtx x)
8472 struct ix86_address parts;
8474 int ok = ix86_decompose_address (x, &parts);
8478 if (parts.base && GET_CODE (parts.base) == SUBREG)
8479 parts.base = SUBREG_REG (parts.base);
8480 if (parts.index && GET_CODE (parts.index) == SUBREG)
8481 parts.index = SUBREG_REG (parts.index);
8483 /* Attempt to minimize number of registers in the address. */
8485 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8487 && (!REG_P (parts.index)
8488 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8492 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8494 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8495 && parts.base != parts.index)
8498 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8499 since it's predecode logic can't detect the length of instructions
8500 and it degenerates to vector decoded. Increase cost of such
8501 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8502 to split such addresses or even refuse such addresses at all.
8504 Following addressing modes are affected:
8509 The first and last case may be avoidable by explicitly coding the zero in
8510 memory address, but I don't have AMD-K6 machine handy to check this
8514 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8515 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8516 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8522 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8523 this is used for to form addresses to local data when -fPIC is in
8527 darwin_local_data_pic (rtx disp)
8529 if (GET_CODE (disp) == MINUS)
8531 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
8532 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
8533 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
8535 const char *sym_name = XSTR (XEXP (disp, 1), 0);
8536 if (! strcmp (sym_name, "<pic base>"))
8544 /* Determine if a given RTX is a valid constant. We already know this
8545 satisfies CONSTANT_P. */
8548 legitimate_constant_p (rtx x)
8550 switch (GET_CODE (x))
8555 if (GET_CODE (x) == PLUS)
8557 if (!CONST_INT_P (XEXP (x, 1)))
8562 if (TARGET_MACHO && darwin_local_data_pic (x))
8565 /* Only some unspecs are valid as "constants". */
8566 if (GET_CODE (x) == UNSPEC)
8567 switch (XINT (x, 1))
8572 return TARGET_64BIT;
8575 x = XVECEXP (x, 0, 0);
8576 return (GET_CODE (x) == SYMBOL_REF
8577 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8579 x = XVECEXP (x, 0, 0);
8580 return (GET_CODE (x) == SYMBOL_REF
8581 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8586 /* We must have drilled down to a symbol. */
8587 if (GET_CODE (x) == LABEL_REF)
8589 if (GET_CODE (x) != SYMBOL_REF)
8594 /* TLS symbols are never valid. */
8595 if (SYMBOL_REF_TLS_MODEL (x))
8598 /* DLLIMPORT symbols are never valid. */
8599 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8600 && SYMBOL_REF_DLLIMPORT_P (x))
8605 if (GET_MODE (x) == TImode
8606 && x != CONST0_RTX (TImode)
8612 if (x == CONST0_RTX (GET_MODE (x)))
8620 /* Otherwise we handle everything else in the move patterns. */
8624 /* Determine if it's legal to put X into the constant pool. This
8625 is not possible for the address of thread-local symbols, which
8626 is checked above. */
8629 ix86_cannot_force_const_mem (rtx x)
8631 /* We can always put integral constants and vectors in memory. */
8632 switch (GET_CODE (x))
8642 return !legitimate_constant_p (x);
8645 /* Determine if a given RTX is a valid constant address. */
8648 constant_address_p (rtx x)
8650 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8653 /* Nonzero if the constant value X is a legitimate general operand
8654 when generating PIC code. It is given that flag_pic is on and
8655 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
8658 legitimate_pic_operand_p (rtx x)
8662 switch (GET_CODE (x))
8665 inner = XEXP (x, 0);
8666 if (GET_CODE (inner) == PLUS
8667 && CONST_INT_P (XEXP (inner, 1)))
8668 inner = XEXP (inner, 0);
8670 /* Only some unspecs are valid as "constants". */
8671 if (GET_CODE (inner) == UNSPEC)
8672 switch (XINT (inner, 1))
8677 return TARGET_64BIT;
8679 x = XVECEXP (inner, 0, 0);
8680 return (GET_CODE (x) == SYMBOL_REF
8681 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8689 return legitimate_pic_address_disp_p (x);
8696 /* Determine if a given CONST RTX is a valid memory displacement
8700 legitimate_pic_address_disp_p (rtx disp)
8704 /* In 64bit mode we can allow direct addresses of symbols and labels
8705 when they are not dynamic symbols. */
8708 rtx op0 = disp, op1;
8710 switch (GET_CODE (disp))
8716 if (GET_CODE (XEXP (disp, 0)) != PLUS)
8718 op0 = XEXP (XEXP (disp, 0), 0);
8719 op1 = XEXP (XEXP (disp, 0), 1);
8720 if (!CONST_INT_P (op1)
8721 || INTVAL (op1) >= 16*1024*1024
8722 || INTVAL (op1) < -16*1024*1024)
8724 if (GET_CODE (op0) == LABEL_REF)
8726 if (GET_CODE (op0) != SYMBOL_REF)
8731 /* TLS references should always be enclosed in UNSPEC. */
8732 if (SYMBOL_REF_TLS_MODEL (op0))
8734 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
8735 && ix86_cmodel != CM_LARGE_PIC)
8743 if (GET_CODE (disp) != CONST)
8745 disp = XEXP (disp, 0);
8749 /* We are unsafe to allow PLUS expressions. This limit allowed distance
8750 of GOT tables. We should not need these anyway. */
8751 if (GET_CODE (disp) != UNSPEC
8752 || (XINT (disp, 1) != UNSPEC_GOTPCREL
8753 && XINT (disp, 1) != UNSPEC_GOTOFF
8754 && XINT (disp, 1) != UNSPEC_PLTOFF))
8757 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
8758 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
8764 if (GET_CODE (disp) == PLUS)
8766 if (!CONST_INT_P (XEXP (disp, 1)))
8768 disp = XEXP (disp, 0);
8772 if (TARGET_MACHO && darwin_local_data_pic (disp))
8775 if (GET_CODE (disp) != UNSPEC)
8778 switch (XINT (disp, 1))
8783 /* We need to check for both symbols and labels because VxWorks loads
8784 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
8786 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8787 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8789 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
8790 While ABI specify also 32bit relocation but we don't produce it in
8791 small PIC model at all. */
8792 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8793 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
8795 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
8797 case UNSPEC_GOTTPOFF:
8798 case UNSPEC_GOTNTPOFF:
8799 case UNSPEC_INDNTPOFF:
8802 disp = XVECEXP (disp, 0, 0);
8803 return (GET_CODE (disp) == SYMBOL_REF
8804 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
8806 disp = XVECEXP (disp, 0, 0);
8807 return (GET_CODE (disp) == SYMBOL_REF
8808 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
8810 disp = XVECEXP (disp, 0, 0);
8811 return (GET_CODE (disp) == SYMBOL_REF
8812 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
8818 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
8819 memory address for an instruction. The MODE argument is the machine mode
8820 for the MEM expression that wants to use this address.
8822 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
8823 convert common non-canonical forms to canonical form so that they will
8827 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
8828 rtx addr, int strict)
8830 struct ix86_address parts;
8831 rtx base, index, disp;
8832 HOST_WIDE_INT scale;
8833 const char *reason = NULL;
8834 rtx reason_rtx = NULL_RTX;
8836 if (ix86_decompose_address (addr, &parts) <= 0)
8838 reason = "decomposition failed";
8843 index = parts.index;
8845 scale = parts.scale;
8847 /* Validate base register.
8849 Don't allow SUBREG's that span more than a word here. It can lead to spill
8850 failures when the base is one word out of a two word structure, which is
8851 represented internally as a DImode int. */
8860 else if (GET_CODE (base) == SUBREG
8861 && REG_P (SUBREG_REG (base))
8862 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
8864 reg = SUBREG_REG (base);
8867 reason = "base is not a register";
8871 if (GET_MODE (base) != Pmode)
8873 reason = "base is not in Pmode";
8877 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
8878 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
8880 reason = "base is not valid";
8885 /* Validate index register.
8887 Don't allow SUBREG's that span more than a word here -- same as above. */
8896 else if (GET_CODE (index) == SUBREG
8897 && REG_P (SUBREG_REG (index))
8898 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
8900 reg = SUBREG_REG (index);
8903 reason = "index is not a register";
8907 if (GET_MODE (index) != Pmode)
8909 reason = "index is not in Pmode";
8913 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
8914 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
8916 reason = "index is not valid";
8921 /* Validate scale factor. */
8924 reason_rtx = GEN_INT (scale);
8927 reason = "scale without index";
8931 if (scale != 2 && scale != 4 && scale != 8)
8933 reason = "scale is not a valid multiplier";
8938 /* Validate displacement. */
8943 if (GET_CODE (disp) == CONST
8944 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
8945 switch (XINT (XEXP (disp, 0), 1))
8947 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
8948 used. While ABI specify also 32bit relocations, we don't produce
8949 them at all and use IP relative instead. */
8952 gcc_assert (flag_pic);
8954 goto is_legitimate_pic;
8955 reason = "64bit address unspec";
8958 case UNSPEC_GOTPCREL:
8959 gcc_assert (flag_pic);
8960 goto is_legitimate_pic;
8962 case UNSPEC_GOTTPOFF:
8963 case UNSPEC_GOTNTPOFF:
8964 case UNSPEC_INDNTPOFF:
8970 reason = "invalid address unspec";
8974 else if (SYMBOLIC_CONST (disp)
8978 && MACHOPIC_INDIRECT
8979 && !machopic_operand_p (disp)
8985 if (TARGET_64BIT && (index || base))
8987 /* foo@dtpoff(%rX) is ok. */
8988 if (GET_CODE (disp) != CONST
8989 || GET_CODE (XEXP (disp, 0)) != PLUS
8990 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
8991 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
8992 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
8993 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
8995 reason = "non-constant pic memory reference";
8999 else if (! legitimate_pic_address_disp_p (disp))
9001 reason = "displacement is an invalid pic construct";
9005 /* This code used to verify that a symbolic pic displacement
9006 includes the pic_offset_table_rtx register.
9008 While this is good idea, unfortunately these constructs may
9009 be created by "adds using lea" optimization for incorrect
9018 This code is nonsensical, but results in addressing
9019 GOT table with pic_offset_table_rtx base. We can't
9020 just refuse it easily, since it gets matched by
9021 "addsi3" pattern, that later gets split to lea in the
9022 case output register differs from input. While this
9023 can be handled by separate addsi pattern for this case
9024 that never results in lea, this seems to be easier and
9025 correct fix for crash to disable this test. */
9027 else if (GET_CODE (disp) != LABEL_REF
9028 && !CONST_INT_P (disp)
9029 && (GET_CODE (disp) != CONST
9030 || !legitimate_constant_p (disp))
9031 && (GET_CODE (disp) != SYMBOL_REF
9032 || !legitimate_constant_p (disp)))
9034 reason = "displacement is not constant";
9037 else if (TARGET_64BIT
9038 && !x86_64_immediate_operand (disp, VOIDmode))
9040 reason = "displacement is out of range";
9045 /* Everything looks valid. */
9052 /* Return a unique alias set for the GOT. */
9054 static alias_set_type
9055 ix86_GOT_alias_set (void)
9057 static alias_set_type set = -1;
9059 set = new_alias_set ();
9063 /* Return a legitimate reference for ORIG (an address) using the
9064 register REG. If REG is 0, a new pseudo is generated.
9066 There are two types of references that must be handled:
9068 1. Global data references must load the address from the GOT, via
9069 the PIC reg. An insn is emitted to do this load, and the reg is
9072 2. Static data references, constant pool addresses, and code labels
9073 compute the address as an offset from the GOT, whose base is in
9074 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9075 differentiate them from global data objects. The returned
9076 address is the PIC reg + an unspec constant.
9078 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9079 reg also appears in the address. */
9082 legitimize_pic_address (rtx orig, rtx reg)
9089 if (TARGET_MACHO && !TARGET_64BIT)
9092 reg = gen_reg_rtx (Pmode);
9093 /* Use the generic Mach-O PIC machinery. */
9094 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9098 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9100 else if (TARGET_64BIT
9101 && ix86_cmodel != CM_SMALL_PIC
9102 && gotoff_operand (addr, Pmode))
9105 /* This symbol may be referenced via a displacement from the PIC
9106 base address (@GOTOFF). */
9108 if (reload_in_progress)
9109 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9110 if (GET_CODE (addr) == CONST)
9111 addr = XEXP (addr, 0);
9112 if (GET_CODE (addr) == PLUS)
9114 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9116 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9119 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9120 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9122 tmpreg = gen_reg_rtx (Pmode);
9125 emit_move_insn (tmpreg, new_rtx);
9129 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9130 tmpreg, 1, OPTAB_DIRECT);
9133 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9135 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9137 /* This symbol may be referenced via a displacement from the PIC
9138 base address (@GOTOFF). */
9140 if (reload_in_progress)
9141 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9142 if (GET_CODE (addr) == CONST)
9143 addr = XEXP (addr, 0);
9144 if (GET_CODE (addr) == PLUS)
9146 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9148 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9151 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9152 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9153 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9157 emit_move_insn (reg, new_rtx);
9161 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9162 /* We can't use @GOTOFF for text labels on VxWorks;
9163 see gotoff_operand. */
9164 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9166 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9168 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9169 return legitimize_dllimport_symbol (addr, true);
9170 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9171 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9172 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9174 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9175 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9179 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9181 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9182 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9183 new_rtx = gen_const_mem (Pmode, new_rtx);
9184 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9187 reg = gen_reg_rtx (Pmode);
9188 /* Use directly gen_movsi, otherwise the address is loaded
9189 into register for CSE. We don't want to CSE this addresses,
9190 instead we CSE addresses from the GOT table, so skip this. */
9191 emit_insn (gen_movsi (reg, new_rtx));
9196 /* This symbol must be referenced via a load from the
9197 Global Offset Table (@GOT). */
9199 if (reload_in_progress)
9200 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9201 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9202 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9204 new_rtx = force_reg (Pmode, new_rtx);
9205 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9206 new_rtx = gen_const_mem (Pmode, new_rtx);
9207 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9210 reg = gen_reg_rtx (Pmode);
9211 emit_move_insn (reg, new_rtx);
9217 if (CONST_INT_P (addr)
9218 && !x86_64_immediate_operand (addr, VOIDmode))
9222 emit_move_insn (reg, addr);
9226 new_rtx = force_reg (Pmode, addr);
9228 else if (GET_CODE (addr) == CONST)
9230 addr = XEXP (addr, 0);
9232 /* We must match stuff we generate before. Assume the only
9233 unspecs that can get here are ours. Not that we could do
9234 anything with them anyway.... */
9235 if (GET_CODE (addr) == UNSPEC
9236 || (GET_CODE (addr) == PLUS
9237 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9239 gcc_assert (GET_CODE (addr) == PLUS);
9241 if (GET_CODE (addr) == PLUS)
9243 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9245 /* Check first to see if this is a constant offset from a @GOTOFF
9246 symbol reference. */
9247 if (gotoff_operand (op0, Pmode)
9248 && CONST_INT_P (op1))
9252 if (reload_in_progress)
9253 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9254 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9256 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9257 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9258 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9262 emit_move_insn (reg, new_rtx);
9268 if (INTVAL (op1) < -16*1024*1024
9269 || INTVAL (op1) >= 16*1024*1024)
9271 if (!x86_64_immediate_operand (op1, Pmode))
9272 op1 = force_reg (Pmode, op1);
9273 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9279 base = legitimize_pic_address (XEXP (addr, 0), reg);
9280 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9281 base == reg ? NULL_RTX : reg);
9283 if (CONST_INT_P (new_rtx))
9284 new_rtx = plus_constant (base, INTVAL (new_rtx));
9287 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9289 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9290 new_rtx = XEXP (new_rtx, 1);
9292 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9300 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9303 get_thread_pointer (int to_reg)
9307 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9311 reg = gen_reg_rtx (Pmode);
9312 insn = gen_rtx_SET (VOIDmode, reg, tp);
9313 insn = emit_insn (insn);
9318 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9319 false if we expect this to be used for a memory address and true if
9320 we expect to load the address into a register. */
9323 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9325 rtx dest, base, off, pic, tp;
9330 case TLS_MODEL_GLOBAL_DYNAMIC:
9331 dest = gen_reg_rtx (Pmode);
9332 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9334 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9336 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9339 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9340 insns = get_insns ();
9343 RTL_CONST_CALL_P (insns) = 1;
9344 emit_libcall_block (insns, dest, rax, x);
9346 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9347 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9349 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9351 if (TARGET_GNU2_TLS)
9353 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9355 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9359 case TLS_MODEL_LOCAL_DYNAMIC:
9360 base = gen_reg_rtx (Pmode);
9361 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9363 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9365 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9368 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9369 insns = get_insns ();
9372 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9373 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9374 RTL_CONST_CALL_P (insns) = 1;
9375 emit_libcall_block (insns, base, rax, note);
9377 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9378 emit_insn (gen_tls_local_dynamic_base_64 (base));
9380 emit_insn (gen_tls_local_dynamic_base_32 (base));
9382 if (TARGET_GNU2_TLS)
9384 rtx x = ix86_tls_module_base ();
9386 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9387 gen_rtx_MINUS (Pmode, x, tp));
9390 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9391 off = gen_rtx_CONST (Pmode, off);
9393 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9395 if (TARGET_GNU2_TLS)
9397 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9399 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9404 case TLS_MODEL_INITIAL_EXEC:
9408 type = UNSPEC_GOTNTPOFF;
9412 if (reload_in_progress)
9413 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9414 pic = pic_offset_table_rtx;
9415 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9417 else if (!TARGET_ANY_GNU_TLS)
9419 pic = gen_reg_rtx (Pmode);
9420 emit_insn (gen_set_got (pic));
9421 type = UNSPEC_GOTTPOFF;
9426 type = UNSPEC_INDNTPOFF;
9429 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9430 off = gen_rtx_CONST (Pmode, off);
9432 off = gen_rtx_PLUS (Pmode, pic, off);
9433 off = gen_const_mem (Pmode, off);
9434 set_mem_alias_set (off, ix86_GOT_alias_set ());
9436 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9438 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9439 off = force_reg (Pmode, off);
9440 return gen_rtx_PLUS (Pmode, base, off);
9444 base = get_thread_pointer (true);
9445 dest = gen_reg_rtx (Pmode);
9446 emit_insn (gen_subsi3 (dest, base, off));
9450 case TLS_MODEL_LOCAL_EXEC:
9451 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9452 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9453 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9454 off = gen_rtx_CONST (Pmode, off);
9456 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9458 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9459 return gen_rtx_PLUS (Pmode, base, off);
9463 base = get_thread_pointer (true);
9464 dest = gen_reg_rtx (Pmode);
9465 emit_insn (gen_subsi3 (dest, base, off));
9476 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9479 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9480 htab_t dllimport_map;
9483 get_dllimport_decl (tree decl)
9485 struct tree_map *h, in;
9489 size_t namelen, prefixlen;
9495 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9497 in.hash = htab_hash_pointer (decl);
9498 in.base.from = decl;
9499 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9500 h = (struct tree_map *) *loc;
9504 *loc = h = GGC_NEW (struct tree_map);
9506 h->base.from = decl;
9507 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9508 DECL_ARTIFICIAL (to) = 1;
9509 DECL_IGNORED_P (to) = 1;
9510 DECL_EXTERNAL (to) = 1;
9511 TREE_READONLY (to) = 1;
9513 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9514 name = targetm.strip_name_encoding (name);
9515 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9516 ? "*__imp_" : "*__imp__";
9517 namelen = strlen (name);
9518 prefixlen = strlen (prefix);
9519 imp_name = (char *) alloca (namelen + prefixlen + 1);
9520 memcpy (imp_name, prefix, prefixlen);
9521 memcpy (imp_name + prefixlen, name, namelen + 1);
9523 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9524 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9525 SET_SYMBOL_REF_DECL (rtl, to);
9526 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9528 rtl = gen_const_mem (Pmode, rtl);
9529 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9531 SET_DECL_RTL (to, rtl);
9532 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9537 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9538 true if we require the result be a register. */
9541 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9546 gcc_assert (SYMBOL_REF_DECL (symbol));
9547 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9549 x = DECL_RTL (imp_decl);
9551 x = force_reg (Pmode, x);
9555 /* Try machine-dependent ways of modifying an illegitimate address
9556 to be legitimate. If we find one, return the new, valid address.
9557 This macro is used in only one place: `memory_address' in explow.c.
9559 OLDX is the address as it was before break_out_memory_refs was called.
9560 In some cases it is useful to look at this to decide what needs to be done.
9562 MODE and WIN are passed so that this macro can use
9563 GO_IF_LEGITIMATE_ADDRESS.
9565 It is always safe for this macro to do nothing. It exists to recognize
9566 opportunities to optimize the output.
9568 For the 80386, we handle X+REG by loading X into a register R and
9569 using R+REG. R will go in a general reg and indexing will be used.
9570 However, if REG is a broken-out memory address or multiplication,
9571 nothing needs to be done because REG can certainly go in a general reg.
9573 When -fpic is used, special handling is needed for symbolic references.
9574 See comments by legitimize_pic_address in i386.c for details. */
9577 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9582 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9584 return legitimize_tls_address (x, (enum tls_model) log, false);
9585 if (GET_CODE (x) == CONST
9586 && GET_CODE (XEXP (x, 0)) == PLUS
9587 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9588 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9590 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9591 (enum tls_model) log, false);
9592 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9595 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9597 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9598 return legitimize_dllimport_symbol (x, true);
9599 if (GET_CODE (x) == CONST
9600 && GET_CODE (XEXP (x, 0)) == PLUS
9601 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9602 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9604 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9605 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9609 if (flag_pic && SYMBOLIC_CONST (x))
9610 return legitimize_pic_address (x, 0);
9612 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9613 if (GET_CODE (x) == ASHIFT
9614 && CONST_INT_P (XEXP (x, 1))
9615 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9618 log = INTVAL (XEXP (x, 1));
9619 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9620 GEN_INT (1 << log));
9623 if (GET_CODE (x) == PLUS)
9625 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9627 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9628 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9629 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9632 log = INTVAL (XEXP (XEXP (x, 0), 1));
9633 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9634 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9635 GEN_INT (1 << log));
9638 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9639 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9640 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9643 log = INTVAL (XEXP (XEXP (x, 1), 1));
9644 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9645 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9646 GEN_INT (1 << log));
9649 /* Put multiply first if it isn't already. */
9650 if (GET_CODE (XEXP (x, 1)) == MULT)
9652 rtx tmp = XEXP (x, 0);
9653 XEXP (x, 0) = XEXP (x, 1);
9658 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
9659 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
9660 created by virtual register instantiation, register elimination, and
9661 similar optimizations. */
9662 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
9665 x = gen_rtx_PLUS (Pmode,
9666 gen_rtx_PLUS (Pmode, XEXP (x, 0),
9667 XEXP (XEXP (x, 1), 0)),
9668 XEXP (XEXP (x, 1), 1));
9672 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
9673 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
9674 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
9675 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9676 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
9677 && CONSTANT_P (XEXP (x, 1)))
9680 rtx other = NULL_RTX;
9682 if (CONST_INT_P (XEXP (x, 1)))
9684 constant = XEXP (x, 1);
9685 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
9687 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
9689 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
9690 other = XEXP (x, 1);
9698 x = gen_rtx_PLUS (Pmode,
9699 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
9700 XEXP (XEXP (XEXP (x, 0), 1), 0)),
9701 plus_constant (other, INTVAL (constant)));
9705 if (changed && legitimate_address_p (mode, x, FALSE))
9708 if (GET_CODE (XEXP (x, 0)) == MULT)
9711 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
9714 if (GET_CODE (XEXP (x, 1)) == MULT)
9717 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
9721 && REG_P (XEXP (x, 1))
9722 && REG_P (XEXP (x, 0)))
9725 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
9728 x = legitimize_pic_address (x, 0);
9731 if (changed && legitimate_address_p (mode, x, FALSE))
9734 if (REG_P (XEXP (x, 0)))
9736 rtx temp = gen_reg_rtx (Pmode);
9737 rtx val = force_operand (XEXP (x, 1), temp);
9739 emit_move_insn (temp, val);
9745 else if (REG_P (XEXP (x, 1)))
9747 rtx temp = gen_reg_rtx (Pmode);
9748 rtx val = force_operand (XEXP (x, 0), temp);
9750 emit_move_insn (temp, val);
9760 /* Print an integer constant expression in assembler syntax. Addition
9761 and subtraction are the only arithmetic that may appear in these
9762 expressions. FILE is the stdio stream to write to, X is the rtx, and
9763 CODE is the operand print code from the output string. */
9766 output_pic_addr_const (FILE *file, rtx x, int code)
9770 switch (GET_CODE (x))
9773 gcc_assert (flag_pic);
9778 if (! TARGET_MACHO || TARGET_64BIT)
9779 output_addr_const (file, x);
9782 const char *name = XSTR (x, 0);
9784 /* Mark the decl as referenced so that cgraph will
9785 output the function. */
9786 if (SYMBOL_REF_DECL (x))
9787 mark_decl_referenced (SYMBOL_REF_DECL (x));
9790 if (MACHOPIC_INDIRECT
9791 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
9792 name = machopic_indirection_name (x, /*stub_p=*/true);
9794 assemble_name (file, name);
9796 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
9797 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
9798 fputs ("@PLT", file);
9805 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
9806 assemble_name (asm_out_file, buf);
9810 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9814 /* This used to output parentheses around the expression,
9815 but that does not work on the 386 (either ATT or BSD assembler). */
9816 output_pic_addr_const (file, XEXP (x, 0), code);
9820 if (GET_MODE (x) == VOIDmode)
9822 /* We can use %d if the number is <32 bits and positive. */
9823 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
9824 fprintf (file, "0x%lx%08lx",
9825 (unsigned long) CONST_DOUBLE_HIGH (x),
9826 (unsigned long) CONST_DOUBLE_LOW (x));
9828 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
9831 /* We can't handle floating point constants;
9832 PRINT_OPERAND must handle them. */
9833 output_operand_lossage ("floating constant misused");
9837 /* Some assemblers need integer constants to appear first. */
9838 if (CONST_INT_P (XEXP (x, 0)))
9840 output_pic_addr_const (file, XEXP (x, 0), code);
9842 output_pic_addr_const (file, XEXP (x, 1), code);
9846 gcc_assert (CONST_INT_P (XEXP (x, 1)));
9847 output_pic_addr_const (file, XEXP (x, 1), code);
9849 output_pic_addr_const (file, XEXP (x, 0), code);
9855 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
9856 output_pic_addr_const (file, XEXP (x, 0), code);
9858 output_pic_addr_const (file, XEXP (x, 1), code);
9860 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
9864 gcc_assert (XVECLEN (x, 0) == 1);
9865 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
9866 switch (XINT (x, 1))
9869 fputs ("@GOT", file);
9872 fputs ("@GOTOFF", file);
9875 fputs ("@PLTOFF", file);
9877 case UNSPEC_GOTPCREL:
9878 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9879 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
9881 case UNSPEC_GOTTPOFF:
9882 /* FIXME: This might be @TPOFF in Sun ld too. */
9883 fputs ("@GOTTPOFF", file);
9886 fputs ("@TPOFF", file);
9890 fputs ("@TPOFF", file);
9892 fputs ("@NTPOFF", file);
9895 fputs ("@DTPOFF", file);
9897 case UNSPEC_GOTNTPOFF:
9899 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9900 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
9902 fputs ("@GOTNTPOFF", file);
9904 case UNSPEC_INDNTPOFF:
9905 fputs ("@INDNTPOFF", file);
9908 output_operand_lossage ("invalid UNSPEC as operand");
9914 output_operand_lossage ("invalid expression as operand");
9918 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9919 We need to emit DTP-relative relocations. */
9921 static void ATTRIBUTE_UNUSED
9922 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
9924 fputs (ASM_LONG, file);
9925 output_addr_const (file, x);
9926 fputs ("@DTPOFF", file);
9932 fputs (", 0", file);
9939 /* In the name of slightly smaller debug output, and to cater to
9940 general assembler lossage, recognize PIC+GOTOFF and turn it back
9941 into a direct symbol reference.
9943 On Darwin, this is necessary to avoid a crash, because Darwin
9944 has a different PIC label for each routine but the DWARF debugging
9945 information is not associated with any particular routine, so it's
9946 necessary to remove references to the PIC label from RTL stored by
9947 the DWARF output code. */
9950 ix86_delegitimize_address (rtx orig_x)
9953 /* reg_addend is NULL or a multiple of some register. */
9954 rtx reg_addend = NULL_RTX;
9955 /* const_addend is NULL or a const_int. */
9956 rtx const_addend = NULL_RTX;
9957 /* This is the result, or NULL. */
9958 rtx result = NULL_RTX;
9965 if (GET_CODE (x) != CONST
9966 || GET_CODE (XEXP (x, 0)) != UNSPEC
9967 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
9970 return XVECEXP (XEXP (x, 0), 0, 0);
9973 if (GET_CODE (x) != PLUS
9974 || GET_CODE (XEXP (x, 1)) != CONST)
9977 if (REG_P (XEXP (x, 0))
9978 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
9979 /* %ebx + GOT/GOTOFF */
9981 else if (GET_CODE (XEXP (x, 0)) == PLUS)
9983 /* %ebx + %reg * scale + GOT/GOTOFF */
9984 reg_addend = XEXP (x, 0);
9985 if (REG_P (XEXP (reg_addend, 0))
9986 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
9987 reg_addend = XEXP (reg_addend, 1);
9988 else if (REG_P (XEXP (reg_addend, 1))
9989 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
9990 reg_addend = XEXP (reg_addend, 0);
9993 if (!REG_P (reg_addend)
9994 && GET_CODE (reg_addend) != MULT
9995 && GET_CODE (reg_addend) != ASHIFT)
10001 x = XEXP (XEXP (x, 1), 0);
10002 if (GET_CODE (x) == PLUS
10003 && CONST_INT_P (XEXP (x, 1)))
10005 const_addend = XEXP (x, 1);
10009 if (GET_CODE (x) == UNSPEC
10010 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10011 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10012 result = XVECEXP (x, 0, 0);
10014 if (TARGET_MACHO && darwin_local_data_pic (x)
10015 && !MEM_P (orig_x))
10016 result = XEXP (x, 0);
10022 result = gen_rtx_PLUS (Pmode, result, const_addend);
10024 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10028 /* If X is a machine specific address (i.e. a symbol or label being
10029 referenced as a displacement from the GOT implemented using an
10030 UNSPEC), then return the base term. Otherwise return X. */
10033 ix86_find_base_term (rtx x)
10039 if (GET_CODE (x) != CONST)
10041 term = XEXP (x, 0);
10042 if (GET_CODE (term) == PLUS
10043 && (CONST_INT_P (XEXP (term, 1))
10044 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10045 term = XEXP (term, 0);
10046 if (GET_CODE (term) != UNSPEC
10047 || XINT (term, 1) != UNSPEC_GOTPCREL)
10050 term = XVECEXP (term, 0, 0);
10052 if (GET_CODE (term) != SYMBOL_REF
10053 && GET_CODE (term) != LABEL_REF)
10059 term = ix86_delegitimize_address (x);
10061 if (GET_CODE (term) != SYMBOL_REF
10062 && GET_CODE (term) != LABEL_REF)
10069 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10070 int fp, FILE *file)
10072 const char *suffix;
10074 if (mode == CCFPmode || mode == CCFPUmode)
10076 enum rtx_code second_code, bypass_code;
10077 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10078 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10079 code = ix86_fp_compare_code_to_integer (code);
10083 code = reverse_condition (code);
10134 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10138 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10139 Those same assemblers have the same but opposite lossage on cmov. */
10140 if (mode == CCmode)
10141 suffix = fp ? "nbe" : "a";
10142 else if (mode == CCCmode)
10145 gcc_unreachable ();
10161 gcc_unreachable ();
10165 gcc_assert (mode == CCmode || mode == CCCmode);
10182 gcc_unreachable ();
10186 /* ??? As above. */
10187 gcc_assert (mode == CCmode || mode == CCCmode);
10188 suffix = fp ? "nb" : "ae";
10191 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10195 /* ??? As above. */
10196 if (mode == CCmode)
10198 else if (mode == CCCmode)
10199 suffix = fp ? "nb" : "ae";
10201 gcc_unreachable ();
10204 suffix = fp ? "u" : "p";
10207 suffix = fp ? "nu" : "np";
10210 gcc_unreachable ();
10212 fputs (suffix, file);
10215 /* Print the name of register X to FILE based on its machine mode and number.
10216 If CODE is 'w', pretend the mode is HImode.
10217 If CODE is 'b', pretend the mode is QImode.
10218 If CODE is 'k', pretend the mode is SImode.
10219 If CODE is 'q', pretend the mode is DImode.
10220 If CODE is 'x', pretend the mode is V4SFmode.
10221 If CODE is 't', pretend the mode is V8SFmode.
10222 If CODE is 'h', pretend the reg is the 'high' byte register.
10223 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10224 If CODE is 'd', duplicate the operand for AVX instruction.
10228 print_reg (rtx x, int code, FILE *file)
10231 bool duplicated = code == 'd' && TARGET_AVX;
10233 gcc_assert (x == pc_rtx
10234 || (REGNO (x) != ARG_POINTER_REGNUM
10235 && REGNO (x) != FRAME_POINTER_REGNUM
10236 && REGNO (x) != FLAGS_REG
10237 && REGNO (x) != FPSR_REG
10238 && REGNO (x) != FPCR_REG));
10240 if (ASSEMBLER_DIALECT == ASM_ATT)
10245 gcc_assert (TARGET_64BIT);
10246 fputs ("rip", file);
10250 if (code == 'w' || MMX_REG_P (x))
10252 else if (code == 'b')
10254 else if (code == 'k')
10256 else if (code == 'q')
10258 else if (code == 'y')
10260 else if (code == 'h')
10262 else if (code == 'x')
10264 else if (code == 't')
10267 code = GET_MODE_SIZE (GET_MODE (x));
10269 /* Irritatingly, AMD extended registers use different naming convention
10270 from the normal registers. */
10271 if (REX_INT_REG_P (x))
10273 gcc_assert (TARGET_64BIT);
10277 error ("extended registers have no high halves");
10280 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10283 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10286 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10289 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10292 error ("unsupported operand size for extended register");
10302 if (STACK_TOP_P (x))
10311 if (! ANY_FP_REG_P (x))
10312 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10317 reg = hi_reg_name[REGNO (x)];
10320 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10322 reg = qi_reg_name[REGNO (x)];
10325 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10327 reg = qi_high_reg_name[REGNO (x)];
10332 gcc_assert (!duplicated);
10334 fputs (hi_reg_name[REGNO (x)] + 1, file);
10339 gcc_unreachable ();
10345 if (ASSEMBLER_DIALECT == ASM_ATT)
10346 fprintf (file, ", %%%s", reg);
10348 fprintf (file, ", %s", reg);
10352 /* Locate some local-dynamic symbol still in use by this function
10353 so that we can print its name in some tls_local_dynamic_base
10357 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10361 if (GET_CODE (x) == SYMBOL_REF
10362 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10364 cfun->machine->some_ld_name = XSTR (x, 0);
10371 static const char *
10372 get_some_local_dynamic_name (void)
10376 if (cfun->machine->some_ld_name)
10377 return cfun->machine->some_ld_name;
10379 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10381 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10382 return cfun->machine->some_ld_name;
10384 gcc_unreachable ();
10387 /* Meaning of CODE:
10388 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10389 C -- print opcode suffix for set/cmov insn.
10390 c -- like C, but print reversed condition
10391 E,e -- likewise, but for compare-and-branch fused insn.
10392 F,f -- likewise, but for floating-point.
10393 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10395 R -- print the prefix for register names.
10396 z -- print the opcode suffix for the size of the current operand.
10397 * -- print a star (in certain assembler syntax)
10398 A -- print an absolute memory reference.
10399 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10400 s -- print a shift double count, followed by the assemblers argument
10402 b -- print the QImode name of the register for the indicated operand.
10403 %b0 would print %al if operands[0] is reg 0.
10404 w -- likewise, print the HImode name of the register.
10405 k -- likewise, print the SImode name of the register.
10406 q -- likewise, print the DImode name of the register.
10407 x -- likewise, print the V4SFmode name of the register.
10408 t -- likewise, print the V8SFmode name of the register.
10409 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10410 y -- print "st(0)" instead of "st" as a register.
10411 d -- print duplicated register operand for AVX instruction.
10412 D -- print condition for SSE cmp instruction.
10413 P -- if PIC, print an @PLT suffix.
10414 X -- don't print any sort of PIC '@' suffix for a symbol.
10415 & -- print some in-use local-dynamic symbol name.
10416 H -- print a memory address offset by 8; used for sse high-parts
10417 Y -- print condition for SSE5 com* instruction.
10418 + -- print a branch hint as 'cs' or 'ds' prefix
10419 ; -- print a semicolon (after prefixes due to bug in older gas).
10423 print_operand (FILE *file, rtx x, int code)
10430 if (ASSEMBLER_DIALECT == ASM_ATT)
10435 assemble_name (file, get_some_local_dynamic_name ());
10439 switch (ASSEMBLER_DIALECT)
10446 /* Intel syntax. For absolute addresses, registers should not
10447 be surrounded by braces. */
10451 PRINT_OPERAND (file, x, 0);
10458 gcc_unreachable ();
10461 PRINT_OPERAND (file, x, 0);
10466 if (ASSEMBLER_DIALECT == ASM_ATT)
10471 if (ASSEMBLER_DIALECT == ASM_ATT)
10476 if (ASSEMBLER_DIALECT == ASM_ATT)
10481 if (ASSEMBLER_DIALECT == ASM_ATT)
10486 if (ASSEMBLER_DIALECT == ASM_ATT)
10491 if (ASSEMBLER_DIALECT == ASM_ATT)
10496 /* 387 opcodes don't get size suffixes if the operands are
10498 if (STACK_REG_P (x))
10501 /* Likewise if using Intel opcodes. */
10502 if (ASSEMBLER_DIALECT == ASM_INTEL)
10505 /* This is the size of op from size of operand. */
10506 switch (GET_MODE_SIZE (GET_MODE (x)))
10515 #ifdef HAVE_GAS_FILDS_FISTS
10525 if (GET_MODE (x) == SFmode)
10540 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10544 #ifdef GAS_MNEMONICS
10559 gcc_unreachable ();
10576 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10578 PRINT_OPERAND (file, x, 0);
10579 fputs (", ", file);
10584 /* Little bit of braindamage here. The SSE compare instructions
10585 does use completely different names for the comparisons that the
10586 fp conditional moves. */
10589 switch (GET_CODE (x))
10592 fputs ("eq", file);
10595 fputs ("eq_us", file);
10598 fputs ("lt", file);
10601 fputs ("nge", file);
10604 fputs ("le", file);
10607 fputs ("ngt", file);
10610 fputs ("unord", file);
10613 fputs ("neq", file);
10616 fputs ("neq_oq", file);
10619 fputs ("ge", file);
10622 fputs ("nlt", file);
10625 fputs ("gt", file);
10628 fputs ("nle", file);
10631 fputs ("ord", file);
10634 gcc_unreachable ();
10639 switch (GET_CODE (x))
10643 fputs ("eq", file);
10647 fputs ("lt", file);
10651 fputs ("le", file);
10654 fputs ("unord", file);
10658 fputs ("neq", file);
10662 fputs ("nlt", file);
10666 fputs ("nle", file);
10669 fputs ("ord", file);
10672 gcc_unreachable ();
10677 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10678 if (ASSEMBLER_DIALECT == ASM_ATT)
10680 switch (GET_MODE (x))
10682 case HImode: putc ('w', file); break;
10684 case SFmode: putc ('l', file); break;
10686 case DFmode: putc ('q', file); break;
10687 default: gcc_unreachable ();
10694 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
10697 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10698 if (ASSEMBLER_DIALECT == ASM_ATT)
10701 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
10704 /* Like above, but reverse condition */
10706 /* Check to see if argument to %c is really a constant
10707 and not a condition code which needs to be reversed. */
10708 if (!COMPARISON_P (x))
10710 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
10713 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
10716 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10717 if (ASSEMBLER_DIALECT == ASM_ATT)
10720 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
10724 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
10728 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
10732 /* It doesn't actually matter what mode we use here, as we're
10733 only going to use this for printing. */
10734 x = adjust_address_nv (x, DImode, 8);
10742 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
10745 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
10748 int pred_val = INTVAL (XEXP (x, 0));
10750 if (pred_val < REG_BR_PROB_BASE * 45 / 100
10751 || pred_val > REG_BR_PROB_BASE * 55 / 100)
10753 int taken = pred_val > REG_BR_PROB_BASE / 2;
10754 int cputaken = final_forward_branch_p (current_output_insn) == 0;
10756 /* Emit hints only in the case default branch prediction
10757 heuristics would fail. */
10758 if (taken != cputaken)
10760 /* We use 3e (DS) prefix for taken branches and
10761 2e (CS) prefix for not taken branches. */
10763 fputs ("ds ; ", file);
10765 fputs ("cs ; ", file);
10773 switch (GET_CODE (x))
10776 fputs ("neq", file);
10779 fputs ("eq", file);
10783 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
10787 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
10791 fputs ("le", file);
10795 fputs ("lt", file);
10798 fputs ("unord", file);
10801 fputs ("ord", file);
10804 fputs ("ueq", file);
10807 fputs ("nlt", file);
10810 fputs ("nle", file);
10813 fputs ("ule", file);
10816 fputs ("ult", file);
10819 fputs ("une", file);
10822 gcc_unreachable ();
10828 fputs (" ; ", file);
10835 output_operand_lossage ("invalid operand code '%c'", code);
10840 print_reg (x, code, file);
10842 else if (MEM_P (x))
10844 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
10845 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
10846 && GET_MODE (x) != BLKmode)
10849 switch (GET_MODE_SIZE (GET_MODE (x)))
10851 case 1: size = "BYTE"; break;
10852 case 2: size = "WORD"; break;
10853 case 4: size = "DWORD"; break;
10854 case 8: size = "QWORD"; break;
10855 case 12: size = "XWORD"; break;
10857 if (GET_MODE (x) == XFmode)
10863 gcc_unreachable ();
10866 /* Check for explicit size override (codes 'b', 'w' and 'k') */
10869 else if (code == 'w')
10871 else if (code == 'k')
10874 fputs (size, file);
10875 fputs (" PTR ", file);
10879 /* Avoid (%rip) for call operands. */
10880 if (CONSTANT_ADDRESS_P (x) && code == 'P'
10881 && !CONST_INT_P (x))
10882 output_addr_const (file, x);
10883 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
10884 output_operand_lossage ("invalid constraints for operand");
10886 output_address (x);
10889 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
10894 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10895 REAL_VALUE_TO_TARGET_SINGLE (r, l);
10897 if (ASSEMBLER_DIALECT == ASM_ATT)
10899 fprintf (file, "0x%08lx", (long unsigned int) l);
10902 /* These float cases don't actually occur as immediate operands. */
10903 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
10907 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10908 fprintf (file, "%s", dstr);
10911 else if (GET_CODE (x) == CONST_DOUBLE
10912 && GET_MODE (x) == XFmode)
10916 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10917 fprintf (file, "%s", dstr);
10922 /* We have patterns that allow zero sets of memory, for instance.
10923 In 64-bit mode, we should probably support all 8-byte vectors,
10924 since we can in fact encode that into an immediate. */
10925 if (GET_CODE (x) == CONST_VECTOR)
10927 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
10933 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
10935 if (ASSEMBLER_DIALECT == ASM_ATT)
10938 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
10939 || GET_CODE (x) == LABEL_REF)
10941 if (ASSEMBLER_DIALECT == ASM_ATT)
10944 fputs ("OFFSET FLAT:", file);
10947 if (CONST_INT_P (x))
10948 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10950 output_pic_addr_const (file, x, code);
10952 output_addr_const (file, x);
10956 /* Print a memory operand whose address is ADDR. */
10959 print_operand_address (FILE *file, rtx addr)
10961 struct ix86_address parts;
10962 rtx base, index, disp;
10964 int ok = ix86_decompose_address (addr, &parts);
10969 index = parts.index;
10971 scale = parts.scale;
10979 if (ASSEMBLER_DIALECT == ASM_ATT)
10981 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
10984 gcc_unreachable ();
10987 /* Use one byte shorter RIP relative addressing for 64bit mode. */
10988 if (TARGET_64BIT && !base && !index)
10992 if (GET_CODE (disp) == CONST
10993 && GET_CODE (XEXP (disp, 0)) == PLUS
10994 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
10995 symbol = XEXP (XEXP (disp, 0), 0);
10997 if (GET_CODE (symbol) == LABEL_REF
10998 || (GET_CODE (symbol) == SYMBOL_REF
10999 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11002 if (!base && !index)
11004 /* Displacement only requires special attention. */
11006 if (CONST_INT_P (disp))
11008 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11009 fputs ("ds:", file);
11010 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11013 output_pic_addr_const (file, disp, 0);
11015 output_addr_const (file, disp);
11019 if (ASSEMBLER_DIALECT == ASM_ATT)
11024 output_pic_addr_const (file, disp, 0);
11025 else if (GET_CODE (disp) == LABEL_REF)
11026 output_asm_label (disp);
11028 output_addr_const (file, disp);
11033 print_reg (base, 0, file);
11037 print_reg (index, 0, file);
11039 fprintf (file, ",%d", scale);
11045 rtx offset = NULL_RTX;
11049 /* Pull out the offset of a symbol; print any symbol itself. */
11050 if (GET_CODE (disp) == CONST
11051 && GET_CODE (XEXP (disp, 0)) == PLUS
11052 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11054 offset = XEXP (XEXP (disp, 0), 1);
11055 disp = gen_rtx_CONST (VOIDmode,
11056 XEXP (XEXP (disp, 0), 0));
11060 output_pic_addr_const (file, disp, 0);
11061 else if (GET_CODE (disp) == LABEL_REF)
11062 output_asm_label (disp);
11063 else if (CONST_INT_P (disp))
11066 output_addr_const (file, disp);
11072 print_reg (base, 0, file);
11075 if (INTVAL (offset) >= 0)
11077 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11081 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11088 print_reg (index, 0, file);
11090 fprintf (file, "*%d", scale);
11098 output_addr_const_extra (FILE *file, rtx x)
11102 if (GET_CODE (x) != UNSPEC)
11105 op = XVECEXP (x, 0, 0);
11106 switch (XINT (x, 1))
11108 case UNSPEC_GOTTPOFF:
11109 output_addr_const (file, op);
11110 /* FIXME: This might be @TPOFF in Sun ld. */
11111 fputs ("@GOTTPOFF", file);
11114 output_addr_const (file, op);
11115 fputs ("@TPOFF", file);
11117 case UNSPEC_NTPOFF:
11118 output_addr_const (file, op);
11120 fputs ("@TPOFF", file);
11122 fputs ("@NTPOFF", file);
11124 case UNSPEC_DTPOFF:
11125 output_addr_const (file, op);
11126 fputs ("@DTPOFF", file);
11128 case UNSPEC_GOTNTPOFF:
11129 output_addr_const (file, op);
11131 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11132 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11134 fputs ("@GOTNTPOFF", file);
11136 case UNSPEC_INDNTPOFF:
11137 output_addr_const (file, op);
11138 fputs ("@INDNTPOFF", file);
11148 /* Split one or more DImode RTL references into pairs of SImode
11149 references. The RTL can be REG, offsettable MEM, integer constant, or
11150 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11151 split and "num" is its length. lo_half and hi_half are output arrays
11152 that parallel "operands". */
11155 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11159 rtx op = operands[num];
11161 /* simplify_subreg refuse to split volatile memory addresses,
11162 but we still have to handle it. */
11165 lo_half[num] = adjust_address (op, SImode, 0);
11166 hi_half[num] = adjust_address (op, SImode, 4);
11170 lo_half[num] = simplify_gen_subreg (SImode, op,
11171 GET_MODE (op) == VOIDmode
11172 ? DImode : GET_MODE (op), 0);
11173 hi_half[num] = simplify_gen_subreg (SImode, op,
11174 GET_MODE (op) == VOIDmode
11175 ? DImode : GET_MODE (op), 4);
11179 /* Split one or more TImode RTL references into pairs of DImode
11180 references. The RTL can be REG, offsettable MEM, integer constant, or
11181 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11182 split and "num" is its length. lo_half and hi_half are output arrays
11183 that parallel "operands". */
11186 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11190 rtx op = operands[num];
11192 /* simplify_subreg refuse to split volatile memory addresses, but we
11193 still have to handle it. */
11196 lo_half[num] = adjust_address (op, DImode, 0);
11197 hi_half[num] = adjust_address (op, DImode, 8);
11201 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11202 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11207 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11208 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11209 is the expression of the binary operation. The output may either be
11210 emitted here, or returned to the caller, like all output_* functions.
11212 There is no guarantee that the operands are the same mode, as they
11213 might be within FLOAT or FLOAT_EXTEND expressions. */
11215 #ifndef SYSV386_COMPAT
11216 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11217 wants to fix the assemblers because that causes incompatibility
11218 with gcc. No-one wants to fix gcc because that causes
11219 incompatibility with assemblers... You can use the option of
11220 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11221 #define SYSV386_COMPAT 1
11225 output_387_binary_op (rtx insn, rtx *operands)
11227 static char buf[40];
11230 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11232 #ifdef ENABLE_CHECKING
11233 /* Even if we do not want to check the inputs, this documents input
11234 constraints. Which helps in understanding the following code. */
11235 if (STACK_REG_P (operands[0])
11236 && ((REG_P (operands[1])
11237 && REGNO (operands[0]) == REGNO (operands[1])
11238 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11239 || (REG_P (operands[2])
11240 && REGNO (operands[0]) == REGNO (operands[2])
11241 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11242 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11245 gcc_assert (is_sse);
11248 switch (GET_CODE (operands[3]))
11251 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11252 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11260 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11261 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11269 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11270 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11278 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11279 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11287 gcc_unreachable ();
11294 strcpy (buf, ssep);
11295 if (GET_MODE (operands[0]) == SFmode)
11296 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11298 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11302 strcpy (buf, ssep + 1);
11303 if (GET_MODE (operands[0]) == SFmode)
11304 strcat (buf, "ss\t{%2, %0|%0, %2}");
11306 strcat (buf, "sd\t{%2, %0|%0, %2}");
11312 switch (GET_CODE (operands[3]))
11316 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11318 rtx temp = operands[2];
11319 operands[2] = operands[1];
11320 operands[1] = temp;
11323 /* know operands[0] == operands[1]. */
11325 if (MEM_P (operands[2]))
11331 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11333 if (STACK_TOP_P (operands[0]))
11334 /* How is it that we are storing to a dead operand[2]?
11335 Well, presumably operands[1] is dead too. We can't
11336 store the result to st(0) as st(0) gets popped on this
11337 instruction. Instead store to operands[2] (which I
11338 think has to be st(1)). st(1) will be popped later.
11339 gcc <= 2.8.1 didn't have this check and generated
11340 assembly code that the Unixware assembler rejected. */
11341 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11343 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11347 if (STACK_TOP_P (operands[0]))
11348 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11350 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11355 if (MEM_P (operands[1]))
11361 if (MEM_P (operands[2]))
11367 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11370 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11371 derived assemblers, confusingly reverse the direction of
11372 the operation for fsub{r} and fdiv{r} when the
11373 destination register is not st(0). The Intel assembler
11374 doesn't have this brain damage. Read !SYSV386_COMPAT to
11375 figure out what the hardware really does. */
11376 if (STACK_TOP_P (operands[0]))
11377 p = "{p\t%0, %2|rp\t%2, %0}";
11379 p = "{rp\t%2, %0|p\t%0, %2}";
11381 if (STACK_TOP_P (operands[0]))
11382 /* As above for fmul/fadd, we can't store to st(0). */
11383 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11385 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11390 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11393 if (STACK_TOP_P (operands[0]))
11394 p = "{rp\t%0, %1|p\t%1, %0}";
11396 p = "{p\t%1, %0|rp\t%0, %1}";
11398 if (STACK_TOP_P (operands[0]))
11399 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11401 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11406 if (STACK_TOP_P (operands[0]))
11408 if (STACK_TOP_P (operands[1]))
11409 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11411 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11414 else if (STACK_TOP_P (operands[1]))
11417 p = "{\t%1, %0|r\t%0, %1}";
11419 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11425 p = "{r\t%2, %0|\t%0, %2}";
11427 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11433 gcc_unreachable ();
11440 /* Return needed mode for entity in optimize_mode_switching pass. */
11443 ix86_mode_needed (int entity, rtx insn)
11445 enum attr_i387_cw mode;
11447 /* The mode UNINITIALIZED is used to store control word after a
11448 function call or ASM pattern. The mode ANY specify that function
11449 has no requirements on the control word and make no changes in the
11450 bits we are interested in. */
11453 || (NONJUMP_INSN_P (insn)
11454 && (asm_noperands (PATTERN (insn)) >= 0
11455 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11456 return I387_CW_UNINITIALIZED;
11458 if (recog_memoized (insn) < 0)
11459 return I387_CW_ANY;
11461 mode = get_attr_i387_cw (insn);
11466 if (mode == I387_CW_TRUNC)
11471 if (mode == I387_CW_FLOOR)
11476 if (mode == I387_CW_CEIL)
11481 if (mode == I387_CW_MASK_PM)
11486 gcc_unreachable ();
11489 return I387_CW_ANY;
11492 /* Output code to initialize control word copies used by trunc?f?i and
11493 rounding patterns. CURRENT_MODE is set to current control word,
11494 while NEW_MODE is set to new control word. */
11497 emit_i387_cw_initialization (int mode)
11499 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11502 enum ix86_stack_slot slot;
11504 rtx reg = gen_reg_rtx (HImode);
11506 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11507 emit_move_insn (reg, copy_rtx (stored_mode));
11509 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11510 || optimize_function_for_size_p (cfun))
11514 case I387_CW_TRUNC:
11515 /* round toward zero (truncate) */
11516 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11517 slot = SLOT_CW_TRUNC;
11520 case I387_CW_FLOOR:
11521 /* round down toward -oo */
11522 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11523 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11524 slot = SLOT_CW_FLOOR;
11528 /* round up toward +oo */
11529 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11530 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11531 slot = SLOT_CW_CEIL;
11534 case I387_CW_MASK_PM:
11535 /* mask precision exception for nearbyint() */
11536 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11537 slot = SLOT_CW_MASK_PM;
11541 gcc_unreachable ();
11548 case I387_CW_TRUNC:
11549 /* round toward zero (truncate) */
11550 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11551 slot = SLOT_CW_TRUNC;
11554 case I387_CW_FLOOR:
11555 /* round down toward -oo */
11556 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11557 slot = SLOT_CW_FLOOR;
11561 /* round up toward +oo */
11562 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11563 slot = SLOT_CW_CEIL;
11566 case I387_CW_MASK_PM:
11567 /* mask precision exception for nearbyint() */
11568 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11569 slot = SLOT_CW_MASK_PM;
11573 gcc_unreachable ();
11577 gcc_assert (slot < MAX_386_STACK_LOCALS);
11579 new_mode = assign_386_stack_local (HImode, slot);
11580 emit_move_insn (new_mode, reg);
11583 /* Output code for INSN to convert a float to a signed int. OPERANDS
11584 are the insn operands. The output may be [HSD]Imode and the input
11585 operand may be [SDX]Fmode. */
11588 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11590 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11591 int dimode_p = GET_MODE (operands[0]) == DImode;
11592 int round_mode = get_attr_i387_cw (insn);
11594 /* Jump through a hoop or two for DImode, since the hardware has no
11595 non-popping instruction. We used to do this a different way, but
11596 that was somewhat fragile and broke with post-reload splitters. */
11597 if ((dimode_p || fisttp) && !stack_top_dies)
11598 output_asm_insn ("fld\t%y1", operands);
11600 gcc_assert (STACK_TOP_P (operands[1]));
11601 gcc_assert (MEM_P (operands[0]));
11602 gcc_assert (GET_MODE (operands[1]) != TFmode);
11605 output_asm_insn ("fisttp%z0\t%0", operands);
11608 if (round_mode != I387_CW_ANY)
11609 output_asm_insn ("fldcw\t%3", operands);
11610 if (stack_top_dies || dimode_p)
11611 output_asm_insn ("fistp%z0\t%0", operands);
11613 output_asm_insn ("fist%z0\t%0", operands);
11614 if (round_mode != I387_CW_ANY)
11615 output_asm_insn ("fldcw\t%2", operands);
11621 /* Output code for x87 ffreep insn. The OPNO argument, which may only
11622 have the values zero or one, indicates the ffreep insn's operand
11623 from the OPERANDS array. */
11625 static const char *
11626 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
11628 if (TARGET_USE_FFREEP)
11629 #if HAVE_AS_IX86_FFREEP
11630 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
11633 static char retval[] = ".word\t0xc_df";
11634 int regno = REGNO (operands[opno]);
11636 gcc_assert (FP_REGNO_P (regno));
11638 retval[9] = '0' + (regno - FIRST_STACK_REG);
11643 return opno ? "fstp\t%y1" : "fstp\t%y0";
11647 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
11648 should be used. UNORDERED_P is true when fucom should be used. */
11651 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
11653 int stack_top_dies;
11654 rtx cmp_op0, cmp_op1;
11655 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
11659 cmp_op0 = operands[0];
11660 cmp_op1 = operands[1];
11664 cmp_op0 = operands[1];
11665 cmp_op1 = operands[2];
11670 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
11671 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
11672 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
11673 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
11675 if (GET_MODE (operands[0]) == SFmode)
11677 return &ucomiss[TARGET_AVX ? 0 : 1];
11679 return &comiss[TARGET_AVX ? 0 : 1];
11682 return &ucomisd[TARGET_AVX ? 0 : 1];
11684 return &comisd[TARGET_AVX ? 0 : 1];
11687 gcc_assert (STACK_TOP_P (cmp_op0));
11689 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11691 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
11693 if (stack_top_dies)
11695 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
11696 return output_387_ffreep (operands, 1);
11699 return "ftst\n\tfnstsw\t%0";
11702 if (STACK_REG_P (cmp_op1)
11704 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
11705 && REGNO (cmp_op1) != FIRST_STACK_REG)
11707 /* If both the top of the 387 stack dies, and the other operand
11708 is also a stack register that dies, then this must be a
11709 `fcompp' float compare */
11713 /* There is no double popping fcomi variant. Fortunately,
11714 eflags is immune from the fstp's cc clobbering. */
11716 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
11718 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
11719 return output_387_ffreep (operands, 0);
11724 return "fucompp\n\tfnstsw\t%0";
11726 return "fcompp\n\tfnstsw\t%0";
11731 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
11733 static const char * const alt[16] =
11735 "fcom%z2\t%y2\n\tfnstsw\t%0",
11736 "fcomp%z2\t%y2\n\tfnstsw\t%0",
11737 "fucom%z2\t%y2\n\tfnstsw\t%0",
11738 "fucomp%z2\t%y2\n\tfnstsw\t%0",
11740 "ficom%z2\t%y2\n\tfnstsw\t%0",
11741 "ficomp%z2\t%y2\n\tfnstsw\t%0",
11745 "fcomi\t{%y1, %0|%0, %y1}",
11746 "fcomip\t{%y1, %0|%0, %y1}",
11747 "fucomi\t{%y1, %0|%0, %y1}",
11748 "fucomip\t{%y1, %0|%0, %y1}",
11759 mask = eflags_p << 3;
11760 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
11761 mask |= unordered_p << 1;
11762 mask |= stack_top_dies;
11764 gcc_assert (mask < 16);
11773 ix86_output_addr_vec_elt (FILE *file, int value)
11775 const char *directive = ASM_LONG;
11779 directive = ASM_QUAD;
11781 gcc_assert (!TARGET_64BIT);
11784 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
11788 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
11790 const char *directive = ASM_LONG;
11793 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
11794 directive = ASM_QUAD;
11796 gcc_assert (!TARGET_64BIT);
11798 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
11799 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
11800 fprintf (file, "%s%s%d-%s%d\n",
11801 directive, LPREFIX, value, LPREFIX, rel);
11802 else if (HAVE_AS_GOTOFF_IN_DATA)
11803 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
11805 else if (TARGET_MACHO)
11807 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
11808 machopic_output_function_base_name (file);
11809 fprintf(file, "\n");
11813 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
11814 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
11817 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
11821 ix86_expand_clear (rtx dest)
11825 /* We play register width games, which are only valid after reload. */
11826 gcc_assert (reload_completed);
11828 /* Avoid HImode and its attendant prefix byte. */
11829 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
11830 dest = gen_rtx_REG (SImode, REGNO (dest));
11831 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
11833 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
11834 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
11836 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11837 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
11843 /* X is an unchanging MEM. If it is a constant pool reference, return
11844 the constant pool rtx, else NULL. */
11847 maybe_get_pool_constant (rtx x)
11849 x = ix86_delegitimize_address (XEXP (x, 0));
11851 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
11852 return get_pool_constant (x);
11858 ix86_expand_move (enum machine_mode mode, rtx operands[])
11861 enum tls_model model;
11866 if (GET_CODE (op1) == SYMBOL_REF)
11868 model = SYMBOL_REF_TLS_MODEL (op1);
11871 op1 = legitimize_tls_address (op1, model, true);
11872 op1 = force_operand (op1, op0);
11876 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11877 && SYMBOL_REF_DLLIMPORT_P (op1))
11878 op1 = legitimize_dllimport_symbol (op1, false);
11880 else if (GET_CODE (op1) == CONST
11881 && GET_CODE (XEXP (op1, 0)) == PLUS
11882 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
11884 rtx addend = XEXP (XEXP (op1, 0), 1);
11885 rtx symbol = XEXP (XEXP (op1, 0), 0);
11888 model = SYMBOL_REF_TLS_MODEL (symbol);
11890 tmp = legitimize_tls_address (symbol, model, true);
11891 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11892 && SYMBOL_REF_DLLIMPORT_P (symbol))
11893 tmp = legitimize_dllimport_symbol (symbol, true);
11897 tmp = force_operand (tmp, NULL);
11898 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
11899 op0, 1, OPTAB_DIRECT);
11905 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
11907 if (TARGET_MACHO && !TARGET_64BIT)
11912 rtx temp = ((reload_in_progress
11913 || ((op0 && REG_P (op0))
11915 ? op0 : gen_reg_rtx (Pmode));
11916 op1 = machopic_indirect_data_reference (op1, temp);
11917 op1 = machopic_legitimize_pic_address (op1, mode,
11918 temp == op1 ? 0 : temp);
11920 else if (MACHOPIC_INDIRECT)
11921 op1 = machopic_indirect_data_reference (op1, 0);
11929 op1 = force_reg (Pmode, op1);
11930 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
11932 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
11933 op1 = legitimize_pic_address (op1, reg);
11942 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
11943 || !push_operand (op0, mode))
11945 op1 = force_reg (mode, op1);
11947 if (push_operand (op0, mode)
11948 && ! general_no_elim_operand (op1, mode))
11949 op1 = copy_to_mode_reg (mode, op1);
11951 /* Force large constants in 64bit compilation into register
11952 to get them CSEed. */
11953 if (can_create_pseudo_p ()
11954 && (mode == DImode) && TARGET_64BIT
11955 && immediate_operand (op1, mode)
11956 && !x86_64_zext_immediate_operand (op1, VOIDmode)
11957 && !register_operand (op0, mode)
11959 op1 = copy_to_mode_reg (mode, op1);
11961 if (can_create_pseudo_p ()
11962 && FLOAT_MODE_P (mode)
11963 && GET_CODE (op1) == CONST_DOUBLE)
11965 /* If we are loading a floating point constant to a register,
11966 force the value to memory now, since we'll get better code
11967 out the back end. */
11969 op1 = validize_mem (force_const_mem (mode, op1));
11970 if (!register_operand (op0, mode))
11972 rtx temp = gen_reg_rtx (mode);
11973 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
11974 emit_move_insn (op0, temp);
11980 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
11984 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
11986 rtx op0 = operands[0], op1 = operands[1];
11987 unsigned int align = GET_MODE_ALIGNMENT (mode);
11989 /* Force constants other than zero into memory. We do not know how
11990 the instructions used to build constants modify the upper 64 bits
11991 of the register, once we have that information we may be able
11992 to handle some of them more efficiently. */
11993 if (can_create_pseudo_p ()
11994 && register_operand (op0, mode)
11995 && (CONSTANT_P (op1)
11996 || (GET_CODE (op1) == SUBREG
11997 && CONSTANT_P (SUBREG_REG (op1))))
11998 && standard_sse_constant_p (op1) <= 0)
11999 op1 = validize_mem (force_const_mem (mode, op1));
12001 /* We need to check memory alignment for SSE mode since attribute
12002 can make operands unaligned. */
12003 if (can_create_pseudo_p ()
12004 && SSE_REG_MODE_P (mode)
12005 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12006 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12010 /* ix86_expand_vector_move_misalign() does not like constants ... */
12011 if (CONSTANT_P (op1)
12012 || (GET_CODE (op1) == SUBREG
12013 && CONSTANT_P (SUBREG_REG (op1))))
12014 op1 = validize_mem (force_const_mem (mode, op1));
12016 /* ... nor both arguments in memory. */
12017 if (!register_operand (op0, mode)
12018 && !register_operand (op1, mode))
12019 op1 = force_reg (mode, op1);
12021 tmp[0] = op0; tmp[1] = op1;
12022 ix86_expand_vector_move_misalign (mode, tmp);
12026 /* Make operand1 a register if it isn't already. */
12027 if (can_create_pseudo_p ()
12028 && !register_operand (op0, mode)
12029 && !register_operand (op1, mode))
12031 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12035 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12038 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12039 straight to ix86_expand_vector_move. */
12040 /* Code generation for scalar reg-reg moves of single and double precision data:
12041 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12045 if (x86_sse_partial_reg_dependency == true)
12050 Code generation for scalar loads of double precision data:
12051 if (x86_sse_split_regs == true)
12052 movlpd mem, reg (gas syntax)
12056 Code generation for unaligned packed loads of single precision data
12057 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12058 if (x86_sse_unaligned_move_optimal)
12061 if (x86_sse_partial_reg_dependency == true)
12073 Code generation for unaligned packed loads of double precision data
12074 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12075 if (x86_sse_unaligned_move_optimal)
12078 if (x86_sse_split_regs == true)
12091 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12100 switch (GET_MODE_CLASS (mode))
12102 case MODE_VECTOR_INT:
12104 switch (GET_MODE_SIZE (mode))
12107 op0 = gen_lowpart (V16QImode, op0);
12108 op1 = gen_lowpart (V16QImode, op1);
12109 emit_insn (gen_avx_movdqu (op0, op1));
12112 op0 = gen_lowpart (V32QImode, op0);
12113 op1 = gen_lowpart (V32QImode, op1);
12114 emit_insn (gen_avx_movdqu256 (op0, op1));
12117 gcc_unreachable ();
12120 case MODE_VECTOR_FLOAT:
12121 op0 = gen_lowpart (mode, op0);
12122 op1 = gen_lowpart (mode, op1);
12127 emit_insn (gen_avx_movups (op0, op1));
12130 emit_insn (gen_avx_movups256 (op0, op1));
12133 emit_insn (gen_avx_movupd (op0, op1));
12136 emit_insn (gen_avx_movupd256 (op0, op1));
12139 gcc_unreachable ();
12144 gcc_unreachable ();
12152 /* If we're optimizing for size, movups is the smallest. */
12153 if (optimize_insn_for_size_p ())
12155 op0 = gen_lowpart (V4SFmode, op0);
12156 op1 = gen_lowpart (V4SFmode, op1);
12157 emit_insn (gen_sse_movups (op0, op1));
12161 /* ??? If we have typed data, then it would appear that using
12162 movdqu is the only way to get unaligned data loaded with
12164 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12166 op0 = gen_lowpart (V16QImode, op0);
12167 op1 = gen_lowpart (V16QImode, op1);
12168 emit_insn (gen_sse2_movdqu (op0, op1));
12172 if (TARGET_SSE2 && mode == V2DFmode)
12176 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12178 op0 = gen_lowpart (V2DFmode, op0);
12179 op1 = gen_lowpart (V2DFmode, op1);
12180 emit_insn (gen_sse2_movupd (op0, op1));
12184 /* When SSE registers are split into halves, we can avoid
12185 writing to the top half twice. */
12186 if (TARGET_SSE_SPLIT_REGS)
12188 emit_clobber (op0);
12193 /* ??? Not sure about the best option for the Intel chips.
12194 The following would seem to satisfy; the register is
12195 entirely cleared, breaking the dependency chain. We
12196 then store to the upper half, with a dependency depth
12197 of one. A rumor has it that Intel recommends two movsd
12198 followed by an unpacklpd, but this is unconfirmed. And
12199 given that the dependency depth of the unpacklpd would
12200 still be one, I'm not sure why this would be better. */
12201 zero = CONST0_RTX (V2DFmode);
12204 m = adjust_address (op1, DFmode, 0);
12205 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12206 m = adjust_address (op1, DFmode, 8);
12207 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12211 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12213 op0 = gen_lowpart (V4SFmode, op0);
12214 op1 = gen_lowpart (V4SFmode, op1);
12215 emit_insn (gen_sse_movups (op0, op1));
12219 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12220 emit_move_insn (op0, CONST0_RTX (mode));
12222 emit_clobber (op0);
12224 if (mode != V4SFmode)
12225 op0 = gen_lowpart (V4SFmode, op0);
12226 m = adjust_address (op1, V2SFmode, 0);
12227 emit_insn (gen_sse_loadlps (op0, op0, m));
12228 m = adjust_address (op1, V2SFmode, 8);
12229 emit_insn (gen_sse_loadhps (op0, op0, m));
12232 else if (MEM_P (op0))
12234 /* If we're optimizing for size, movups is the smallest. */
12235 if (optimize_insn_for_size_p ())
12237 op0 = gen_lowpart (V4SFmode, op0);
12238 op1 = gen_lowpart (V4SFmode, op1);
12239 emit_insn (gen_sse_movups (op0, op1));
12243 /* ??? Similar to above, only less clear because of quote
12244 typeless stores unquote. */
12245 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12246 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12248 op0 = gen_lowpart (V16QImode, op0);
12249 op1 = gen_lowpart (V16QImode, op1);
12250 emit_insn (gen_sse2_movdqu (op0, op1));
12254 if (TARGET_SSE2 && mode == V2DFmode)
12256 m = adjust_address (op0, DFmode, 0);
12257 emit_insn (gen_sse2_storelpd (m, op1));
12258 m = adjust_address (op0, DFmode, 8);
12259 emit_insn (gen_sse2_storehpd (m, op1));
12263 if (mode != V4SFmode)
12264 op1 = gen_lowpart (V4SFmode, op1);
12265 m = adjust_address (op0, V2SFmode, 0);
12266 emit_insn (gen_sse_storelps (m, op1));
12267 m = adjust_address (op0, V2SFmode, 8);
12268 emit_insn (gen_sse_storehps (m, op1));
12272 gcc_unreachable ();
12275 /* Expand a push in MODE. This is some mode for which we do not support
12276 proper push instructions, at least from the registers that we expect
12277 the value to live in. */
12280 ix86_expand_push (enum machine_mode mode, rtx x)
12284 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12285 GEN_INT (-GET_MODE_SIZE (mode)),
12286 stack_pointer_rtx, 1, OPTAB_DIRECT);
12287 if (tmp != stack_pointer_rtx)
12288 emit_move_insn (stack_pointer_rtx, tmp);
12290 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12291 emit_move_insn (tmp, x);
12294 /* Helper function of ix86_fixup_binary_operands to canonicalize
12295 operand order. Returns true if the operands should be swapped. */
12298 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12301 rtx dst = operands[0];
12302 rtx src1 = operands[1];
12303 rtx src2 = operands[2];
12305 /* If the operation is not commutative, we can't do anything. */
12306 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12309 /* Highest priority is that src1 should match dst. */
12310 if (rtx_equal_p (dst, src1))
12312 if (rtx_equal_p (dst, src2))
12315 /* Next highest priority is that immediate constants come second. */
12316 if (immediate_operand (src2, mode))
12318 if (immediate_operand (src1, mode))
12321 /* Lowest priority is that memory references should come second. */
12331 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12332 destination to use for the operation. If different from the true
12333 destination in operands[0], a copy operation will be required. */
12336 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12339 rtx dst = operands[0];
12340 rtx src1 = operands[1];
12341 rtx src2 = operands[2];
12343 /* Canonicalize operand order. */
12344 if (ix86_swap_binary_operands_p (code, mode, operands))
12348 /* It is invalid to swap operands of different modes. */
12349 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12356 /* Both source operands cannot be in memory. */
12357 if (MEM_P (src1) && MEM_P (src2))
12359 /* Optimization: Only read from memory once. */
12360 if (rtx_equal_p (src1, src2))
12362 src2 = force_reg (mode, src2);
12366 src2 = force_reg (mode, src2);
12369 /* If the destination is memory, and we do not have matching source
12370 operands, do things in registers. */
12371 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12372 dst = gen_reg_rtx (mode);
12374 /* Source 1 cannot be a constant. */
12375 if (CONSTANT_P (src1))
12376 src1 = force_reg (mode, src1);
12378 /* Source 1 cannot be a non-matching memory. */
12379 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12380 src1 = force_reg (mode, src1);
12382 operands[1] = src1;
12383 operands[2] = src2;
12387 /* Similarly, but assume that the destination has already been
12388 set up properly. */
12391 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12392 enum machine_mode mode, rtx operands[])
12394 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12395 gcc_assert (dst == operands[0]);
12398 /* Attempt to expand a binary operator. Make the expansion closer to the
12399 actual machine, then just general_operand, which will allow 3 separate
12400 memory references (one output, two input) in a single insn. */
12403 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12406 rtx src1, src2, dst, op, clob;
12408 dst = ix86_fixup_binary_operands (code, mode, operands);
12409 src1 = operands[1];
12410 src2 = operands[2];
12412 /* Emit the instruction. */
12414 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12415 if (reload_in_progress)
12417 /* Reload doesn't know about the flags register, and doesn't know that
12418 it doesn't want to clobber it. We can only do this with PLUS. */
12419 gcc_assert (code == PLUS);
12424 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12425 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12428 /* Fix up the destination if needed. */
12429 if (dst != operands[0])
12430 emit_move_insn (operands[0], dst);
12433 /* Return TRUE or FALSE depending on whether the binary operator meets the
12434 appropriate constraints. */
12437 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12440 rtx dst = operands[0];
12441 rtx src1 = operands[1];
12442 rtx src2 = operands[2];
12444 /* Both source operands cannot be in memory. */
12445 if (MEM_P (src1) && MEM_P (src2))
12448 /* Canonicalize operand order for commutative operators. */
12449 if (ix86_swap_binary_operands_p (code, mode, operands))
12456 /* If the destination is memory, we must have a matching source operand. */
12457 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12460 /* Source 1 cannot be a constant. */
12461 if (CONSTANT_P (src1))
12464 /* Source 1 cannot be a non-matching memory. */
12465 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12471 /* Attempt to expand a unary operator. Make the expansion closer to the
12472 actual machine, then just general_operand, which will allow 2 separate
12473 memory references (one output, one input) in a single insn. */
12476 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12479 int matching_memory;
12480 rtx src, dst, op, clob;
12485 /* If the destination is memory, and we do not have matching source
12486 operands, do things in registers. */
12487 matching_memory = 0;
12490 if (rtx_equal_p (dst, src))
12491 matching_memory = 1;
12493 dst = gen_reg_rtx (mode);
12496 /* When source operand is memory, destination must match. */
12497 if (MEM_P (src) && !matching_memory)
12498 src = force_reg (mode, src);
12500 /* Emit the instruction. */
12502 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12503 if (reload_in_progress || code == NOT)
12505 /* Reload doesn't know about the flags register, and doesn't know that
12506 it doesn't want to clobber it. */
12507 gcc_assert (code == NOT);
12512 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12513 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12516 /* Fix up the destination if needed. */
12517 if (dst != operands[0])
12518 emit_move_insn (operands[0], dst);
12521 /* Return TRUE or FALSE depending on whether the unary operator meets the
12522 appropriate constraints. */
12525 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
12526 enum machine_mode mode ATTRIBUTE_UNUSED,
12527 rtx operands[2] ATTRIBUTE_UNUSED)
12529 /* If one of operands is memory, source and destination must match. */
12530 if ((MEM_P (operands[0])
12531 || MEM_P (operands[1]))
12532 && ! rtx_equal_p (operands[0], operands[1]))
12537 /* Post-reload splitter for converting an SF or DFmode value in an
12538 SSE register into an unsigned SImode. */
12541 ix86_split_convert_uns_si_sse (rtx operands[])
12543 enum machine_mode vecmode;
12544 rtx value, large, zero_or_two31, input, two31, x;
12546 large = operands[1];
12547 zero_or_two31 = operands[2];
12548 input = operands[3];
12549 two31 = operands[4];
12550 vecmode = GET_MODE (large);
12551 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12553 /* Load up the value into the low element. We must ensure that the other
12554 elements are valid floats -- zero is the easiest such value. */
12557 if (vecmode == V4SFmode)
12558 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12560 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12564 input = gen_rtx_REG (vecmode, REGNO (input));
12565 emit_move_insn (value, CONST0_RTX (vecmode));
12566 if (vecmode == V4SFmode)
12567 emit_insn (gen_sse_movss (value, value, input));
12569 emit_insn (gen_sse2_movsd (value, value, input));
12572 emit_move_insn (large, two31);
12573 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12575 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12576 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12578 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12579 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12581 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12582 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12584 large = gen_rtx_REG (V4SImode, REGNO (large));
12585 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12587 x = gen_rtx_REG (V4SImode, REGNO (value));
12588 if (vecmode == V4SFmode)
12589 emit_insn (gen_sse2_cvttps2dq (x, value));
12591 emit_insn (gen_sse2_cvttpd2dq (x, value));
12594 emit_insn (gen_xorv4si3 (value, value, large));
12597 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12598 Expects the 64-bit DImode to be supplied in a pair of integral
12599 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12600 -mfpmath=sse, !optimize_size only. */
12603 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12605 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12606 rtx int_xmm, fp_xmm;
12607 rtx biases, exponents;
12610 int_xmm = gen_reg_rtx (V4SImode);
12611 if (TARGET_INTER_UNIT_MOVES)
12612 emit_insn (gen_movdi_to_sse (int_xmm, input));
12613 else if (TARGET_SSE_SPLIT_REGS)
12615 emit_clobber (int_xmm);
12616 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
12620 x = gen_reg_rtx (V2DImode);
12621 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
12622 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
12625 x = gen_rtx_CONST_VECTOR (V4SImode,
12626 gen_rtvec (4, GEN_INT (0x43300000UL),
12627 GEN_INT (0x45300000UL),
12628 const0_rtx, const0_rtx));
12629 exponents = validize_mem (force_const_mem (V4SImode, x));
12631 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
12632 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
12634 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
12635 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
12636 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
12637 (0x1.0p84 + double(fp_value_hi_xmm)).
12638 Note these exponents differ by 32. */
12640 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
12642 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
12643 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
12644 real_ldexp (&bias_lo_rvt, &dconst1, 52);
12645 real_ldexp (&bias_hi_rvt, &dconst1, 84);
12646 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
12647 x = const_double_from_real_value (bias_hi_rvt, DFmode);
12648 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
12649 biases = validize_mem (force_const_mem (V2DFmode, biases));
12650 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
12652 /* Add the upper and lower DFmode values together. */
12654 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
12657 x = copy_to_mode_reg (V2DFmode, fp_xmm);
12658 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
12659 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
12662 ix86_expand_vector_extract (false, target, fp_xmm, 0);
12665 /* Not used, but eases macroization of patterns. */
12667 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
12668 rtx input ATTRIBUTE_UNUSED)
12670 gcc_unreachable ();
12673 /* Convert an unsigned SImode value into a DFmode. Only currently used
12674 for SSE, but applicable anywhere. */
12677 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
12679 REAL_VALUE_TYPE TWO31r;
12682 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
12683 NULL, 1, OPTAB_DIRECT);
12685 fp = gen_reg_rtx (DFmode);
12686 emit_insn (gen_floatsidf2 (fp, x));
12688 real_ldexp (&TWO31r, &dconst1, 31);
12689 x = const_double_from_real_value (TWO31r, DFmode);
12691 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
12693 emit_move_insn (target, x);
12696 /* Convert a signed DImode value into a DFmode. Only used for SSE in
12697 32-bit mode; otherwise we have a direct convert instruction. */
12700 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
12702 REAL_VALUE_TYPE TWO32r;
12703 rtx fp_lo, fp_hi, x;
12705 fp_lo = gen_reg_rtx (DFmode);
12706 fp_hi = gen_reg_rtx (DFmode);
12708 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
12710 real_ldexp (&TWO32r, &dconst1, 32);
12711 x = const_double_from_real_value (TWO32r, DFmode);
12712 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
12714 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
12716 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
12719 emit_move_insn (target, x);
12722 /* Convert an unsigned SImode value into a SFmode, using only SSE.
12723 For x86_32, -mfpmath=sse, !optimize_size only. */
12725 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
12727 REAL_VALUE_TYPE ONE16r;
12728 rtx fp_hi, fp_lo, int_hi, int_lo, x;
12730 real_ldexp (&ONE16r, &dconst1, 16);
12731 x = const_double_from_real_value (ONE16r, SFmode);
12732 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
12733 NULL, 0, OPTAB_DIRECT);
12734 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
12735 NULL, 0, OPTAB_DIRECT);
12736 fp_hi = gen_reg_rtx (SFmode);
12737 fp_lo = gen_reg_rtx (SFmode);
12738 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
12739 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
12740 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
12742 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
12744 if (!rtx_equal_p (target, fp_hi))
12745 emit_move_insn (target, fp_hi);
12748 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
12749 then replicate the value for all elements of the vector
12753 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
12760 v = gen_rtvec (4, value, value, value, value);
12761 return gen_rtx_CONST_VECTOR (V4SImode, v);
12765 v = gen_rtvec (2, value, value);
12766 return gen_rtx_CONST_VECTOR (V2DImode, v);
12770 v = gen_rtvec (4, value, value, value, value);
12772 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
12773 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12774 return gen_rtx_CONST_VECTOR (V4SFmode, v);
12778 v = gen_rtvec (2, value, value);
12780 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
12781 return gen_rtx_CONST_VECTOR (V2DFmode, v);
12784 gcc_unreachable ();
12788 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
12789 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
12790 for an SSE register. If VECT is true, then replicate the mask for
12791 all elements of the vector register. If INVERT is true, then create
12792 a mask excluding the sign bit. */
12795 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
12797 enum machine_mode vec_mode, imode;
12798 HOST_WIDE_INT hi, lo;
12803 /* Find the sign bit, sign extended to 2*HWI. */
12809 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
12810 lo = 0x80000000, hi = lo < 0;
12816 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
12817 if (HOST_BITS_PER_WIDE_INT >= 64)
12818 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
12820 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12825 vec_mode = VOIDmode;
12826 if (HOST_BITS_PER_WIDE_INT >= 64)
12829 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
12836 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12840 lo = ~lo, hi = ~hi;
12846 mask = immed_double_const (lo, hi, imode);
12848 vec = gen_rtvec (2, v, mask);
12849 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
12850 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
12857 gcc_unreachable ();
12861 lo = ~lo, hi = ~hi;
12863 /* Force this value into the low part of a fp vector constant. */
12864 mask = immed_double_const (lo, hi, imode);
12865 mask = gen_lowpart (mode, mask);
12867 if (vec_mode == VOIDmode)
12868 return force_reg (mode, mask);
12870 v = ix86_build_const_vector (mode, vect, mask);
12871 return force_reg (vec_mode, v);
12874 /* Generate code for floating point ABS or NEG. */
12877 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
12880 rtx mask, set, use, clob, dst, src;
12881 bool use_sse = false;
12882 bool vector_mode = VECTOR_MODE_P (mode);
12883 enum machine_mode elt_mode = mode;
12887 elt_mode = GET_MODE_INNER (mode);
12890 else if (mode == TFmode)
12892 else if (TARGET_SSE_MATH)
12893 use_sse = SSE_FLOAT_MODE_P (mode);
12895 /* NEG and ABS performed with SSE use bitwise mask operations.
12896 Create the appropriate mask now. */
12898 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
12907 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
12908 set = gen_rtx_SET (VOIDmode, dst, set);
12913 set = gen_rtx_fmt_e (code, mode, src);
12914 set = gen_rtx_SET (VOIDmode, dst, set);
12917 use = gen_rtx_USE (VOIDmode, mask);
12918 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12919 emit_insn (gen_rtx_PARALLEL (VOIDmode,
12920 gen_rtvec (3, set, use, clob)));
12927 /* Expand a copysign operation. Special case operand 0 being a constant. */
12930 ix86_expand_copysign (rtx operands[])
12932 enum machine_mode mode;
12933 rtx dest, op0, op1, mask, nmask;
12935 dest = operands[0];
12939 mode = GET_MODE (dest);
12941 if (GET_CODE (op0) == CONST_DOUBLE)
12943 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
12945 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
12946 op0 = simplify_unary_operation (ABS, mode, op0, mode);
12948 if (mode == SFmode || mode == DFmode)
12950 enum machine_mode vmode;
12952 vmode = mode == SFmode ? V4SFmode : V2DFmode;
12954 if (op0 == CONST0_RTX (mode))
12955 op0 = CONST0_RTX (vmode);
12960 if (mode == SFmode)
12961 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
12962 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12964 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
12966 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
12969 else if (op0 != CONST0_RTX (mode))
12970 op0 = force_reg (mode, op0);
12972 mask = ix86_build_signbit_mask (mode, 0, 0);
12974 if (mode == SFmode)
12975 copysign_insn = gen_copysignsf3_const;
12976 else if (mode == DFmode)
12977 copysign_insn = gen_copysigndf3_const;
12979 copysign_insn = gen_copysigntf3_const;
12981 emit_insn (copysign_insn (dest, op0, op1, mask));
12985 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
12987 nmask = ix86_build_signbit_mask (mode, 0, 1);
12988 mask = ix86_build_signbit_mask (mode, 0, 0);
12990 if (mode == SFmode)
12991 copysign_insn = gen_copysignsf3_var;
12992 else if (mode == DFmode)
12993 copysign_insn = gen_copysigndf3_var;
12995 copysign_insn = gen_copysigntf3_var;
12997 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13001 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13002 be a constant, and so has already been expanded into a vector constant. */
13005 ix86_split_copysign_const (rtx operands[])
13007 enum machine_mode mode, vmode;
13008 rtx dest, op0, op1, mask, x;
13010 dest = operands[0];
13013 mask = operands[3];
13015 mode = GET_MODE (dest);
13016 vmode = GET_MODE (mask);
13018 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13019 x = gen_rtx_AND (vmode, dest, mask);
13020 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13022 if (op0 != CONST0_RTX (vmode))
13024 x = gen_rtx_IOR (vmode, dest, op0);
13025 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13029 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13030 so we have to do two masks. */
13033 ix86_split_copysign_var (rtx operands[])
13035 enum machine_mode mode, vmode;
13036 rtx dest, scratch, op0, op1, mask, nmask, x;
13038 dest = operands[0];
13039 scratch = operands[1];
13042 nmask = operands[4];
13043 mask = operands[5];
13045 mode = GET_MODE (dest);
13046 vmode = GET_MODE (mask);
13048 if (rtx_equal_p (op0, op1))
13050 /* Shouldn't happen often (it's useless, obviously), but when it does
13051 we'd generate incorrect code if we continue below. */
13052 emit_move_insn (dest, op0);
13056 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13058 gcc_assert (REGNO (op1) == REGNO (scratch));
13060 x = gen_rtx_AND (vmode, scratch, mask);
13061 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13064 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13065 x = gen_rtx_NOT (vmode, dest);
13066 x = gen_rtx_AND (vmode, x, op0);
13067 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13071 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13073 x = gen_rtx_AND (vmode, scratch, mask);
13075 else /* alternative 2,4 */
13077 gcc_assert (REGNO (mask) == REGNO (scratch));
13078 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13079 x = gen_rtx_AND (vmode, scratch, op1);
13081 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13083 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13085 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13086 x = gen_rtx_AND (vmode, dest, nmask);
13088 else /* alternative 3,4 */
13090 gcc_assert (REGNO (nmask) == REGNO (dest));
13092 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13093 x = gen_rtx_AND (vmode, dest, op0);
13095 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13098 x = gen_rtx_IOR (vmode, dest, scratch);
13099 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13102 /* Return TRUE or FALSE depending on whether the first SET in INSN
13103 has source and destination with matching CC modes, and that the
13104 CC mode is at least as constrained as REQ_MODE. */
13107 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13110 enum machine_mode set_mode;
13112 set = PATTERN (insn);
13113 if (GET_CODE (set) == PARALLEL)
13114 set = XVECEXP (set, 0, 0);
13115 gcc_assert (GET_CODE (set) == SET);
13116 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13118 set_mode = GET_MODE (SET_DEST (set));
13122 if (req_mode != CCNOmode
13123 && (req_mode != CCmode
13124 || XEXP (SET_SRC (set), 1) != const0_rtx))
13128 if (req_mode == CCGCmode)
13132 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13136 if (req_mode == CCZmode)
13147 gcc_unreachable ();
13150 return (GET_MODE (SET_SRC (set)) == set_mode);
13153 /* Generate insn patterns to do an integer compare of OPERANDS. */
13156 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13158 enum machine_mode cmpmode;
13161 cmpmode = SELECT_CC_MODE (code, op0, op1);
13162 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13164 /* This is very simple, but making the interface the same as in the
13165 FP case makes the rest of the code easier. */
13166 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13167 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13169 /* Return the test that should be put into the flags user, i.e.
13170 the bcc, scc, or cmov instruction. */
13171 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13174 /* Figure out whether to use ordered or unordered fp comparisons.
13175 Return the appropriate mode to use. */
13178 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13180 /* ??? In order to make all comparisons reversible, we do all comparisons
13181 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13182 all forms trapping and nontrapping comparisons, we can make inequality
13183 comparisons trapping again, since it results in better code when using
13184 FCOM based compares. */
13185 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13189 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13191 enum machine_mode mode = GET_MODE (op0);
13193 if (SCALAR_FLOAT_MODE_P (mode))
13195 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13196 return ix86_fp_compare_mode (code);
13201 /* Only zero flag is needed. */
13202 case EQ: /* ZF=0 */
13203 case NE: /* ZF!=0 */
13205 /* Codes needing carry flag. */
13206 case GEU: /* CF=0 */
13207 case LTU: /* CF=1 */
13208 /* Detect overflow checks. They need just the carry flag. */
13209 if (GET_CODE (op0) == PLUS
13210 && rtx_equal_p (op1, XEXP (op0, 0)))
13214 case GTU: /* CF=0 & ZF=0 */
13215 case LEU: /* CF=1 | ZF=1 */
13216 /* Detect overflow checks. They need just the carry flag. */
13217 if (GET_CODE (op0) == MINUS
13218 && rtx_equal_p (op1, XEXP (op0, 0)))
13222 /* Codes possibly doable only with sign flag when
13223 comparing against zero. */
13224 case GE: /* SF=OF or SF=0 */
13225 case LT: /* SF<>OF or SF=1 */
13226 if (op1 == const0_rtx)
13229 /* For other cases Carry flag is not required. */
13231 /* Codes doable only with sign flag when comparing
13232 against zero, but we miss jump instruction for it
13233 so we need to use relational tests against overflow
13234 that thus needs to be zero. */
13235 case GT: /* ZF=0 & SF=OF */
13236 case LE: /* ZF=1 | SF<>OF */
13237 if (op1 == const0_rtx)
13241 /* strcmp pattern do (use flags) and combine may ask us for proper
13246 gcc_unreachable ();
13250 /* Return the fixed registers used for condition codes. */
13253 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13260 /* If two condition code modes are compatible, return a condition code
13261 mode which is compatible with both. Otherwise, return
13264 static enum machine_mode
13265 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13270 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13273 if ((m1 == CCGCmode && m2 == CCGOCmode)
13274 || (m1 == CCGOCmode && m2 == CCGCmode))
13280 gcc_unreachable ();
13310 /* These are only compatible with themselves, which we already
13316 /* Split comparison code CODE into comparisons we can do using branch
13317 instructions. BYPASS_CODE is comparison code for branch that will
13318 branch around FIRST_CODE and SECOND_CODE. If some of branches
13319 is not required, set value to UNKNOWN.
13320 We never require more than two branches. */
13323 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13324 enum rtx_code *first_code,
13325 enum rtx_code *second_code)
13327 *first_code = code;
13328 *bypass_code = UNKNOWN;
13329 *second_code = UNKNOWN;
13331 /* The fcomi comparison sets flags as follows:
13341 case GT: /* GTU - CF=0 & ZF=0 */
13342 case GE: /* GEU - CF=0 */
13343 case ORDERED: /* PF=0 */
13344 case UNORDERED: /* PF=1 */
13345 case UNEQ: /* EQ - ZF=1 */
13346 case UNLT: /* LTU - CF=1 */
13347 case UNLE: /* LEU - CF=1 | ZF=1 */
13348 case LTGT: /* EQ - ZF=0 */
13350 case LT: /* LTU - CF=1 - fails on unordered */
13351 *first_code = UNLT;
13352 *bypass_code = UNORDERED;
13354 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13355 *first_code = UNLE;
13356 *bypass_code = UNORDERED;
13358 case EQ: /* EQ - ZF=1 - fails on unordered */
13359 *first_code = UNEQ;
13360 *bypass_code = UNORDERED;
13362 case NE: /* NE - ZF=0 - fails on unordered */
13363 *first_code = LTGT;
13364 *second_code = UNORDERED;
13366 case UNGE: /* GEU - CF=0 - fails on unordered */
13368 *second_code = UNORDERED;
13370 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13372 *second_code = UNORDERED;
13375 gcc_unreachable ();
13377 if (!TARGET_IEEE_FP)
13379 *second_code = UNKNOWN;
13380 *bypass_code = UNKNOWN;
13384 /* Return cost of comparison done fcom + arithmetics operations on AX.
13385 All following functions do use number of instructions as a cost metrics.
13386 In future this should be tweaked to compute bytes for optimize_size and
13387 take into account performance of various instructions on various CPUs. */
13389 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13391 if (!TARGET_IEEE_FP)
13393 /* The cost of code output by ix86_expand_fp_compare. */
13417 gcc_unreachable ();
13421 /* Return cost of comparison done using fcomi operation.
13422 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13424 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13426 enum rtx_code bypass_code, first_code, second_code;
13427 /* Return arbitrarily high cost when instruction is not supported - this
13428 prevents gcc from using it. */
13431 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13432 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
13435 /* Return cost of comparison done using sahf operation.
13436 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13438 ix86_fp_comparison_sahf_cost (enum rtx_code code)
13440 enum rtx_code bypass_code, first_code, second_code;
13441 /* Return arbitrarily high cost when instruction is not preferred - this
13442 avoids gcc from using it. */
13443 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
13445 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13446 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
13449 /* Compute cost of the comparison done using any method.
13450 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13452 ix86_fp_comparison_cost (enum rtx_code code)
13454 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
13457 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
13458 sahf_cost = ix86_fp_comparison_sahf_cost (code);
13460 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
13461 if (min > sahf_cost)
13463 if (min > fcomi_cost)
13468 /* Return true if we should use an FCOMI instruction for this
13472 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
13474 enum rtx_code swapped_code = swap_condition (code);
13476 return ((ix86_fp_comparison_cost (code)
13477 == ix86_fp_comparison_fcomi_cost (code))
13478 || (ix86_fp_comparison_cost (swapped_code)
13479 == ix86_fp_comparison_fcomi_cost (swapped_code)));
13482 /* Swap, force into registers, or otherwise massage the two operands
13483 to a fp comparison. The operands are updated in place; the new
13484 comparison code is returned. */
13486 static enum rtx_code
13487 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
13489 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
13490 rtx op0 = *pop0, op1 = *pop1;
13491 enum machine_mode op_mode = GET_MODE (op0);
13492 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
13494 /* All of the unordered compare instructions only work on registers.
13495 The same is true of the fcomi compare instructions. The XFmode
13496 compare instructions require registers except when comparing
13497 against zero or when converting operand 1 from fixed point to
13501 && (fpcmp_mode == CCFPUmode
13502 || (op_mode == XFmode
13503 && ! (standard_80387_constant_p (op0) == 1
13504 || standard_80387_constant_p (op1) == 1)
13505 && GET_CODE (op1) != FLOAT)
13506 || ix86_use_fcomi_compare (code)))
13508 op0 = force_reg (op_mode, op0);
13509 op1 = force_reg (op_mode, op1);
13513 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
13514 things around if they appear profitable, otherwise force op0
13515 into a register. */
13517 if (standard_80387_constant_p (op0) == 0
13519 && ! (standard_80387_constant_p (op1) == 0
13523 tmp = op0, op0 = op1, op1 = tmp;
13524 code = swap_condition (code);
13528 op0 = force_reg (op_mode, op0);
13530 if (CONSTANT_P (op1))
13532 int tmp = standard_80387_constant_p (op1);
13534 op1 = validize_mem (force_const_mem (op_mode, op1));
13538 op1 = force_reg (op_mode, op1);
13541 op1 = force_reg (op_mode, op1);
13545 /* Try to rearrange the comparison to make it cheaper. */
13546 if (ix86_fp_comparison_cost (code)
13547 > ix86_fp_comparison_cost (swap_condition (code))
13548 && (REG_P (op1) || can_create_pseudo_p ()))
13551 tmp = op0, op0 = op1, op1 = tmp;
13552 code = swap_condition (code);
13554 op0 = force_reg (op_mode, op0);
13562 /* Convert comparison codes we use to represent FP comparison to integer
13563 code that will result in proper branch. Return UNKNOWN if no such code
13567 ix86_fp_compare_code_to_integer (enum rtx_code code)
13596 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13599 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13600 rtx *second_test, rtx *bypass_test)
13602 enum machine_mode fpcmp_mode, intcmp_mode;
13604 int cost = ix86_fp_comparison_cost (code);
13605 enum rtx_code bypass_code, first_code, second_code;
13607 fpcmp_mode = ix86_fp_compare_mode (code);
13608 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
13611 *second_test = NULL_RTX;
13613 *bypass_test = NULL_RTX;
13615 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13617 /* Do fcomi/sahf based test when profitable. */
13618 if (ix86_fp_comparison_arithmetics_cost (code) > cost
13619 && (bypass_code == UNKNOWN || bypass_test)
13620 && (second_code == UNKNOWN || second_test))
13622 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13623 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
13629 gcc_assert (TARGET_SAHF);
13632 scratch = gen_reg_rtx (HImode);
13633 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
13635 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
13638 /* The FP codes work out to act like unsigned. */
13639 intcmp_mode = fpcmp_mode;
13641 if (bypass_code != UNKNOWN)
13642 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
13643 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13645 if (second_code != UNKNOWN)
13646 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
13647 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13652 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
13653 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13654 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
13656 scratch = gen_reg_rtx (HImode);
13657 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
13659 /* In the unordered case, we have to check C2 for NaN's, which
13660 doesn't happen to work out to anything nice combination-wise.
13661 So do some bit twiddling on the value we've got in AH to come
13662 up with an appropriate set of condition codes. */
13664 intcmp_mode = CCNOmode;
13669 if (code == GT || !TARGET_IEEE_FP)
13671 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13676 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13677 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13678 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
13679 intcmp_mode = CCmode;
13685 if (code == LT && TARGET_IEEE_FP)
13687 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13688 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
13689 intcmp_mode = CCmode;
13694 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
13700 if (code == GE || !TARGET_IEEE_FP)
13702 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
13707 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13708 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13715 if (code == LE && TARGET_IEEE_FP)
13717 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13718 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13719 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13720 intcmp_mode = CCmode;
13725 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13731 if (code == EQ && TARGET_IEEE_FP)
13733 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13734 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13735 intcmp_mode = CCmode;
13740 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13747 if (code == NE && TARGET_IEEE_FP)
13749 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13750 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13756 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13762 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13766 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13771 gcc_unreachable ();
13775 /* Return the test that should be put into the flags user, i.e.
13776 the bcc, scc, or cmov instruction. */
13777 return gen_rtx_fmt_ee (code, VOIDmode,
13778 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13783 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
13786 op0 = ix86_compare_op0;
13787 op1 = ix86_compare_op1;
13790 *second_test = NULL_RTX;
13792 *bypass_test = NULL_RTX;
13794 if (ix86_compare_emitted)
13796 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
13797 ix86_compare_emitted = NULL_RTX;
13799 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
13801 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
13802 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
13803 second_test, bypass_test);
13806 ret = ix86_expand_int_compare (code, op0, op1);
13811 /* Return true if the CODE will result in nontrivial jump sequence. */
13813 ix86_fp_jump_nontrivial_p (enum rtx_code code)
13815 enum rtx_code bypass_code, first_code, second_code;
13818 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13819 return bypass_code != UNKNOWN || second_code != UNKNOWN;
13823 ix86_expand_branch (enum rtx_code code, rtx label)
13827 /* If we have emitted a compare insn, go straight to simple.
13828 ix86_expand_compare won't emit anything if ix86_compare_emitted
13830 if (ix86_compare_emitted)
13833 switch (GET_MODE (ix86_compare_op0))
13839 tmp = ix86_expand_compare (code, NULL, NULL);
13840 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13841 gen_rtx_LABEL_REF (VOIDmode, label),
13843 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13852 enum rtx_code bypass_code, first_code, second_code;
13854 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
13855 &ix86_compare_op1);
13857 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13859 /* Check whether we will use the natural sequence with one jump. If
13860 so, we can expand jump early. Otherwise delay expansion by
13861 creating compound insn to not confuse optimizers. */
13862 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
13864 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
13865 gen_rtx_LABEL_REF (VOIDmode, label),
13866 pc_rtx, NULL_RTX, NULL_RTX);
13870 tmp = gen_rtx_fmt_ee (code, VOIDmode,
13871 ix86_compare_op0, ix86_compare_op1);
13872 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13873 gen_rtx_LABEL_REF (VOIDmode, label),
13875 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
13877 use_fcomi = ix86_use_fcomi_compare (code);
13878 vec = rtvec_alloc (3 + !use_fcomi);
13879 RTVEC_ELT (vec, 0) = tmp;
13881 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
13883 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
13886 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
13888 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
13897 /* Expand DImode branch into multiple compare+branch. */
13899 rtx lo[2], hi[2], label2;
13900 enum rtx_code code1, code2, code3;
13901 enum machine_mode submode;
13903 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
13905 tmp = ix86_compare_op0;
13906 ix86_compare_op0 = ix86_compare_op1;
13907 ix86_compare_op1 = tmp;
13908 code = swap_condition (code);
13910 if (GET_MODE (ix86_compare_op0) == DImode)
13912 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
13913 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
13918 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
13919 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
13923 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
13924 avoid two branches. This costs one extra insn, so disable when
13925 optimizing for size. */
13927 if ((code == EQ || code == NE)
13928 && (!optimize_insn_for_size_p ()
13929 || hi[1] == const0_rtx || lo[1] == const0_rtx))
13934 if (hi[1] != const0_rtx)
13935 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
13936 NULL_RTX, 0, OPTAB_WIDEN);
13939 if (lo[1] != const0_rtx)
13940 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
13941 NULL_RTX, 0, OPTAB_WIDEN);
13943 tmp = expand_binop (submode, ior_optab, xor1, xor0,
13944 NULL_RTX, 0, OPTAB_WIDEN);
13946 ix86_compare_op0 = tmp;
13947 ix86_compare_op1 = const0_rtx;
13948 ix86_expand_branch (code, label);
13952 /* Otherwise, if we are doing less-than or greater-or-equal-than,
13953 op1 is a constant and the low word is zero, then we can just
13954 examine the high word. Similarly for low word -1 and
13955 less-or-equal-than or greater-than. */
13957 if (CONST_INT_P (hi[1]))
13960 case LT: case LTU: case GE: case GEU:
13961 if (lo[1] == const0_rtx)
13963 ix86_compare_op0 = hi[0];
13964 ix86_compare_op1 = hi[1];
13965 ix86_expand_branch (code, label);
13969 case LE: case LEU: case GT: case GTU:
13970 if (lo[1] == constm1_rtx)
13972 ix86_compare_op0 = hi[0];
13973 ix86_compare_op1 = hi[1];
13974 ix86_expand_branch (code, label);
13982 /* Otherwise, we need two or three jumps. */
13984 label2 = gen_label_rtx ();
13987 code2 = swap_condition (code);
13988 code3 = unsigned_condition (code);
13992 case LT: case GT: case LTU: case GTU:
13995 case LE: code1 = LT; code2 = GT; break;
13996 case GE: code1 = GT; code2 = LT; break;
13997 case LEU: code1 = LTU; code2 = GTU; break;
13998 case GEU: code1 = GTU; code2 = LTU; break;
14000 case EQ: code1 = UNKNOWN; code2 = NE; break;
14001 case NE: code2 = UNKNOWN; break;
14004 gcc_unreachable ();
14009 * if (hi(a) < hi(b)) goto true;
14010 * if (hi(a) > hi(b)) goto false;
14011 * if (lo(a) < lo(b)) goto true;
14015 ix86_compare_op0 = hi[0];
14016 ix86_compare_op1 = hi[1];
14018 if (code1 != UNKNOWN)
14019 ix86_expand_branch (code1, label);
14020 if (code2 != UNKNOWN)
14021 ix86_expand_branch (code2, label2);
14023 ix86_compare_op0 = lo[0];
14024 ix86_compare_op1 = lo[1];
14025 ix86_expand_branch (code3, label);
14027 if (code2 != UNKNOWN)
14028 emit_label (label2);
14033 gcc_unreachable ();
14037 /* Split branch based on floating point condition. */
14039 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14040 rtx target1, rtx target2, rtx tmp, rtx pushed)
14042 rtx second, bypass;
14043 rtx label = NULL_RTX;
14045 int bypass_probability = -1, second_probability = -1, probability = -1;
14048 if (target2 != pc_rtx)
14051 code = reverse_condition_maybe_unordered (code);
14056 condition = ix86_expand_fp_compare (code, op1, op2,
14057 tmp, &second, &bypass);
14059 /* Remove pushed operand from stack. */
14061 ix86_free_from_memory (GET_MODE (pushed));
14063 if (split_branch_probability >= 0)
14065 /* Distribute the probabilities across the jumps.
14066 Assume the BYPASS and SECOND to be always test
14068 probability = split_branch_probability;
14070 /* Value of 1 is low enough to make no need for probability
14071 to be updated. Later we may run some experiments and see
14072 if unordered values are more frequent in practice. */
14074 bypass_probability = 1;
14076 second_probability = 1;
14078 if (bypass != NULL_RTX)
14080 label = gen_label_rtx ();
14081 i = emit_jump_insn (gen_rtx_SET
14083 gen_rtx_IF_THEN_ELSE (VOIDmode,
14085 gen_rtx_LABEL_REF (VOIDmode,
14088 if (bypass_probability >= 0)
14090 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14091 GEN_INT (bypass_probability),
14094 i = emit_jump_insn (gen_rtx_SET
14096 gen_rtx_IF_THEN_ELSE (VOIDmode,
14097 condition, target1, target2)));
14098 if (probability >= 0)
14100 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14101 GEN_INT (probability),
14103 if (second != NULL_RTX)
14105 i = emit_jump_insn (gen_rtx_SET
14107 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14109 if (second_probability >= 0)
14111 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14112 GEN_INT (second_probability),
14115 if (label != NULL_RTX)
14116 emit_label (label);
14120 ix86_expand_setcc (enum rtx_code code, rtx dest)
14122 rtx ret, tmp, tmpreg, equiv;
14123 rtx second_test, bypass_test;
14125 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14126 return 0; /* FAIL */
14128 gcc_assert (GET_MODE (dest) == QImode);
14130 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14131 PUT_MODE (ret, QImode);
14136 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14137 if (bypass_test || second_test)
14139 rtx test = second_test;
14141 rtx tmp2 = gen_reg_rtx (QImode);
14144 gcc_assert (!second_test);
14145 test = bypass_test;
14147 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14149 PUT_MODE (test, QImode);
14150 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14153 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14155 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14158 /* Attach a REG_EQUAL note describing the comparison result. */
14159 if (ix86_compare_op0 && ix86_compare_op1)
14161 equiv = simplify_gen_relational (code, QImode,
14162 GET_MODE (ix86_compare_op0),
14163 ix86_compare_op0, ix86_compare_op1);
14164 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14167 return 1; /* DONE */
14170 /* Expand comparison setting or clearing carry flag. Return true when
14171 successful and set pop for the operation. */
14173 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14175 enum machine_mode mode =
14176 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14178 /* Do not handle DImode compares that go through special path. */
14179 if (mode == (TARGET_64BIT ? TImode : DImode))
14182 if (SCALAR_FLOAT_MODE_P (mode))
14184 rtx second_test = NULL, bypass_test = NULL;
14185 rtx compare_op, compare_seq;
14187 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14189 /* Shortcut: following common codes never translate
14190 into carry flag compares. */
14191 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14192 || code == ORDERED || code == UNORDERED)
14195 /* These comparisons require zero flag; swap operands so they won't. */
14196 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14197 && !TARGET_IEEE_FP)
14202 code = swap_condition (code);
14205 /* Try to expand the comparison and verify that we end up with
14206 carry flag based comparison. This fails to be true only when
14207 we decide to expand comparison using arithmetic that is not
14208 too common scenario. */
14210 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14211 &second_test, &bypass_test);
14212 compare_seq = get_insns ();
14215 if (second_test || bypass_test)
14218 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14219 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14220 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14222 code = GET_CODE (compare_op);
14224 if (code != LTU && code != GEU)
14227 emit_insn (compare_seq);
14232 if (!INTEGRAL_MODE_P (mode))
14241 /* Convert a==0 into (unsigned)a<1. */
14244 if (op1 != const0_rtx)
14247 code = (code == EQ ? LTU : GEU);
14250 /* Convert a>b into b<a or a>=b-1. */
14253 if (CONST_INT_P (op1))
14255 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14256 /* Bail out on overflow. We still can swap operands but that
14257 would force loading of the constant into register. */
14258 if (op1 == const0_rtx
14259 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14261 code = (code == GTU ? GEU : LTU);
14268 code = (code == GTU ? LTU : GEU);
14272 /* Convert a>=0 into (unsigned)a<0x80000000. */
14275 if (mode == DImode || op1 != const0_rtx)
14277 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14278 code = (code == LT ? GEU : LTU);
14282 if (mode == DImode || op1 != constm1_rtx)
14284 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14285 code = (code == LE ? GEU : LTU);
14291 /* Swapping operands may cause constant to appear as first operand. */
14292 if (!nonimmediate_operand (op0, VOIDmode))
14294 if (!can_create_pseudo_p ())
14296 op0 = force_reg (mode, op0);
14298 ix86_compare_op0 = op0;
14299 ix86_compare_op1 = op1;
14300 *pop = ix86_expand_compare (code, NULL, NULL);
14301 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14306 ix86_expand_int_movcc (rtx operands[])
14308 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14309 rtx compare_seq, compare_op;
14310 rtx second_test, bypass_test;
14311 enum machine_mode mode = GET_MODE (operands[0]);
14312 bool sign_bit_compare_p = false;;
14315 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14316 compare_seq = get_insns ();
14319 compare_code = GET_CODE (compare_op);
14321 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14322 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14323 sign_bit_compare_p = true;
14325 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14326 HImode insns, we'd be swallowed in word prefix ops. */
14328 if ((mode != HImode || TARGET_FAST_PREFIX)
14329 && (mode != (TARGET_64BIT ? TImode : DImode))
14330 && CONST_INT_P (operands[2])
14331 && CONST_INT_P (operands[3]))
14333 rtx out = operands[0];
14334 HOST_WIDE_INT ct = INTVAL (operands[2]);
14335 HOST_WIDE_INT cf = INTVAL (operands[3]);
14336 HOST_WIDE_INT diff;
14339 /* Sign bit compares are better done using shifts than we do by using
14341 if (sign_bit_compare_p
14342 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14343 ix86_compare_op1, &compare_op))
14345 /* Detect overlap between destination and compare sources. */
14348 if (!sign_bit_compare_p)
14350 bool fpcmp = false;
14352 compare_code = GET_CODE (compare_op);
14354 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14355 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14358 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14361 /* To simplify rest of code, restrict to the GEU case. */
14362 if (compare_code == LTU)
14364 HOST_WIDE_INT tmp = ct;
14367 compare_code = reverse_condition (compare_code);
14368 code = reverse_condition (code);
14373 PUT_CODE (compare_op,
14374 reverse_condition_maybe_unordered
14375 (GET_CODE (compare_op)));
14377 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14381 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14382 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14383 tmp = gen_reg_rtx (mode);
14385 if (mode == DImode)
14386 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14388 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14392 if (code == GT || code == GE)
14393 code = reverse_condition (code);
14396 HOST_WIDE_INT tmp = ct;
14401 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14402 ix86_compare_op1, VOIDmode, 0, -1);
14415 tmp = expand_simple_binop (mode, PLUS,
14417 copy_rtx (tmp), 1, OPTAB_DIRECT);
14428 tmp = expand_simple_binop (mode, IOR,
14430 copy_rtx (tmp), 1, OPTAB_DIRECT);
14432 else if (diff == -1 && ct)
14442 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14444 tmp = expand_simple_binop (mode, PLUS,
14445 copy_rtx (tmp), GEN_INT (cf),
14446 copy_rtx (tmp), 1, OPTAB_DIRECT);
14454 * andl cf - ct, dest
14464 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14467 tmp = expand_simple_binop (mode, AND,
14469 gen_int_mode (cf - ct, mode),
14470 copy_rtx (tmp), 1, OPTAB_DIRECT);
14472 tmp = expand_simple_binop (mode, PLUS,
14473 copy_rtx (tmp), GEN_INT (ct),
14474 copy_rtx (tmp), 1, OPTAB_DIRECT);
14477 if (!rtx_equal_p (tmp, out))
14478 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
14480 return 1; /* DONE */
14485 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14488 tmp = ct, ct = cf, cf = tmp;
14491 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14493 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14495 /* We may be reversing unordered compare to normal compare, that
14496 is not valid in general (we may convert non-trapping condition
14497 to trapping one), however on i386 we currently emit all
14498 comparisons unordered. */
14499 compare_code = reverse_condition_maybe_unordered (compare_code);
14500 code = reverse_condition_maybe_unordered (code);
14504 compare_code = reverse_condition (compare_code);
14505 code = reverse_condition (code);
14509 compare_code = UNKNOWN;
14510 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
14511 && CONST_INT_P (ix86_compare_op1))
14513 if (ix86_compare_op1 == const0_rtx
14514 && (code == LT || code == GE))
14515 compare_code = code;
14516 else if (ix86_compare_op1 == constm1_rtx)
14520 else if (code == GT)
14525 /* Optimize dest = (op0 < 0) ? -1 : cf. */
14526 if (compare_code != UNKNOWN
14527 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
14528 && (cf == -1 || ct == -1))
14530 /* If lea code below could be used, only optimize
14531 if it results in a 2 insn sequence. */
14533 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
14534 || diff == 3 || diff == 5 || diff == 9)
14535 || (compare_code == LT && ct == -1)
14536 || (compare_code == GE && cf == -1))
14539 * notl op1 (if necessary)
14547 code = reverse_condition (code);
14550 out = emit_store_flag (out, code, ix86_compare_op0,
14551 ix86_compare_op1, VOIDmode, 0, -1);
14553 out = expand_simple_binop (mode, IOR,
14555 out, 1, OPTAB_DIRECT);
14556 if (out != operands[0])
14557 emit_move_insn (operands[0], out);
14559 return 1; /* DONE */
14564 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14565 || diff == 3 || diff == 5 || diff == 9)
14566 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14568 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14574 * lea cf(dest*(ct-cf)),dest
14578 * This also catches the degenerate setcc-only case.
14584 out = emit_store_flag (out, code, ix86_compare_op0,
14585 ix86_compare_op1, VOIDmode, 0, 1);
14588 /* On x86_64 the lea instruction operates on Pmode, so we need
14589 to get arithmetics done in proper mode to match. */
14591 tmp = copy_rtx (out);
14595 out1 = copy_rtx (out);
14596 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14600 tmp = gen_rtx_PLUS (mode, tmp, out1);
14606 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14609 if (!rtx_equal_p (tmp, out))
14612 out = force_operand (tmp, copy_rtx (out));
14614 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
14616 if (!rtx_equal_p (out, operands[0]))
14617 emit_move_insn (operands[0], copy_rtx (out));
14619 return 1; /* DONE */
14623 * General case: Jumpful:
14624 * xorl dest,dest cmpl op1, op2
14625 * cmpl op1, op2 movl ct, dest
14626 * setcc dest jcc 1f
14627 * decl dest movl cf, dest
14628 * andl (cf-ct),dest 1:
14631 * Size 20. Size 14.
14633 * This is reasonably steep, but branch mispredict costs are
14634 * high on modern cpus, so consider failing only if optimizing
14638 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14639 && BRANCH_COST >= 2)
14643 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14648 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14650 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14652 /* We may be reversing unordered compare to normal compare,
14653 that is not valid in general (we may convert non-trapping
14654 condition to trapping one), however on i386 we currently
14655 emit all comparisons unordered. */
14656 code = reverse_condition_maybe_unordered (code);
14660 code = reverse_condition (code);
14661 if (compare_code != UNKNOWN)
14662 compare_code = reverse_condition (compare_code);
14666 if (compare_code != UNKNOWN)
14668 /* notl op1 (if needed)
14673 For x < 0 (resp. x <= -1) there will be no notl,
14674 so if possible swap the constants to get rid of the
14676 True/false will be -1/0 while code below (store flag
14677 followed by decrement) is 0/-1, so the constants need
14678 to be exchanged once more. */
14680 if (compare_code == GE || !cf)
14682 code = reverse_condition (code);
14687 HOST_WIDE_INT tmp = cf;
14692 out = emit_store_flag (out, code, ix86_compare_op0,
14693 ix86_compare_op1, VOIDmode, 0, -1);
14697 out = emit_store_flag (out, code, ix86_compare_op0,
14698 ix86_compare_op1, VOIDmode, 0, 1);
14700 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
14701 copy_rtx (out), 1, OPTAB_DIRECT);
14704 out = expand_simple_binop (mode, AND, copy_rtx (out),
14705 gen_int_mode (cf - ct, mode),
14706 copy_rtx (out), 1, OPTAB_DIRECT);
14708 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
14709 copy_rtx (out), 1, OPTAB_DIRECT);
14710 if (!rtx_equal_p (out, operands[0]))
14711 emit_move_insn (operands[0], copy_rtx (out));
14713 return 1; /* DONE */
14717 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14719 /* Try a few things more with specific constants and a variable. */
14722 rtx var, orig_out, out, tmp;
14724 if (BRANCH_COST <= 2)
14725 return 0; /* FAIL */
14727 /* If one of the two operands is an interesting constant, load a
14728 constant with the above and mask it in with a logical operation. */
14730 if (CONST_INT_P (operands[2]))
14733 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
14734 operands[3] = constm1_rtx, op = and_optab;
14735 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
14736 operands[3] = const0_rtx, op = ior_optab;
14738 return 0; /* FAIL */
14740 else if (CONST_INT_P (operands[3]))
14743 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
14744 operands[2] = constm1_rtx, op = and_optab;
14745 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
14746 operands[2] = const0_rtx, op = ior_optab;
14748 return 0; /* FAIL */
14751 return 0; /* FAIL */
14753 orig_out = operands[0];
14754 tmp = gen_reg_rtx (mode);
14757 /* Recurse to get the constant loaded. */
14758 if (ix86_expand_int_movcc (operands) == 0)
14759 return 0; /* FAIL */
14761 /* Mask in the interesting variable. */
14762 out = expand_binop (mode, op, var, tmp, orig_out, 0,
14764 if (!rtx_equal_p (out, orig_out))
14765 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
14767 return 1; /* DONE */
14771 * For comparison with above,
14781 if (! nonimmediate_operand (operands[2], mode))
14782 operands[2] = force_reg (mode, operands[2]);
14783 if (! nonimmediate_operand (operands[3], mode))
14784 operands[3] = force_reg (mode, operands[3]);
14786 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
14788 rtx tmp = gen_reg_rtx (mode);
14789 emit_move_insn (tmp, operands[3]);
14792 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
14794 rtx tmp = gen_reg_rtx (mode);
14795 emit_move_insn (tmp, operands[2]);
14799 if (! register_operand (operands[2], VOIDmode)
14801 || ! register_operand (operands[3], VOIDmode)))
14802 operands[2] = force_reg (mode, operands[2]);
14805 && ! register_operand (operands[3], VOIDmode))
14806 operands[3] = force_reg (mode, operands[3]);
14808 emit_insn (compare_seq);
14809 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14810 gen_rtx_IF_THEN_ELSE (mode,
14811 compare_op, operands[2],
14814 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14815 gen_rtx_IF_THEN_ELSE (mode,
14817 copy_rtx (operands[3]),
14818 copy_rtx (operands[0]))));
14820 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14821 gen_rtx_IF_THEN_ELSE (mode,
14823 copy_rtx (operands[2]),
14824 copy_rtx (operands[0]))));
14826 return 1; /* DONE */
14829 /* Swap, force into registers, or otherwise massage the two operands
14830 to an sse comparison with a mask result. Thus we differ a bit from
14831 ix86_prepare_fp_compare_args which expects to produce a flags result.
14833 The DEST operand exists to help determine whether to commute commutative
14834 operators. The POP0/POP1 operands are updated in place. The new
14835 comparison code is returned, or UNKNOWN if not implementable. */
14837 static enum rtx_code
14838 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
14839 rtx *pop0, rtx *pop1)
14847 /* We have no LTGT as an operator. We could implement it with
14848 NE & ORDERED, but this requires an extra temporary. It's
14849 not clear that it's worth it. */
14856 /* These are supported directly. */
14863 /* For commutative operators, try to canonicalize the destination
14864 operand to be first in the comparison - this helps reload to
14865 avoid extra moves. */
14866 if (!dest || !rtx_equal_p (dest, *pop1))
14874 /* These are not supported directly. Swap the comparison operands
14875 to transform into something that is supported. */
14879 code = swap_condition (code);
14883 gcc_unreachable ();
14889 /* Detect conditional moves that exactly match min/max operational
14890 semantics. Note that this is IEEE safe, as long as we don't
14891 interchange the operands.
14893 Returns FALSE if this conditional move doesn't match a MIN/MAX,
14894 and TRUE if the operation is successful and instructions are emitted. */
14897 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
14898 rtx cmp_op1, rtx if_true, rtx if_false)
14900 enum machine_mode mode;
14906 else if (code == UNGE)
14909 if_true = if_false;
14915 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
14917 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
14922 mode = GET_MODE (dest);
14924 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
14925 but MODE may be a vector mode and thus not appropriate. */
14926 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
14928 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
14931 if_true = force_reg (mode, if_true);
14932 v = gen_rtvec (2, if_true, if_false);
14933 tmp = gen_rtx_UNSPEC (mode, v, u);
14937 code = is_min ? SMIN : SMAX;
14938 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
14941 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
14945 /* Expand an sse vector comparison. Return the register with the result. */
14948 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
14949 rtx op_true, rtx op_false)
14951 enum machine_mode mode = GET_MODE (dest);
14954 cmp_op0 = force_reg (mode, cmp_op0);
14955 if (!nonimmediate_operand (cmp_op1, mode))
14956 cmp_op1 = force_reg (mode, cmp_op1);
14959 || reg_overlap_mentioned_p (dest, op_true)
14960 || reg_overlap_mentioned_p (dest, op_false))
14961 dest = gen_reg_rtx (mode);
14963 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
14964 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14969 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
14970 operations. This is used for both scalar and vector conditional moves. */
14973 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
14975 enum machine_mode mode = GET_MODE (dest);
14978 if (op_false == CONST0_RTX (mode))
14980 op_true = force_reg (mode, op_true);
14981 x = gen_rtx_AND (mode, cmp, op_true);
14982 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14984 else if (op_true == CONST0_RTX (mode))
14986 op_false = force_reg (mode, op_false);
14987 x = gen_rtx_NOT (mode, cmp);
14988 x = gen_rtx_AND (mode, x, op_false);
14989 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14991 else if (TARGET_SSE5)
14993 rtx pcmov = gen_rtx_SET (mode, dest,
14994 gen_rtx_IF_THEN_ELSE (mode, cmp,
15001 op_true = force_reg (mode, op_true);
15002 op_false = force_reg (mode, op_false);
15004 t2 = gen_reg_rtx (mode);
15006 t3 = gen_reg_rtx (mode);
15010 x = gen_rtx_AND (mode, op_true, cmp);
15011 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15013 x = gen_rtx_NOT (mode, cmp);
15014 x = gen_rtx_AND (mode, x, op_false);
15015 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15017 x = gen_rtx_IOR (mode, t3, t2);
15018 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15022 /* Expand a floating-point conditional move. Return true if successful. */
15025 ix86_expand_fp_movcc (rtx operands[])
15027 enum machine_mode mode = GET_MODE (operands[0]);
15028 enum rtx_code code = GET_CODE (operands[1]);
15029 rtx tmp, compare_op, second_test, bypass_test;
15031 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15033 enum machine_mode cmode;
15035 /* Since we've no cmove for sse registers, don't force bad register
15036 allocation just to gain access to it. Deny movcc when the
15037 comparison mode doesn't match the move mode. */
15038 cmode = GET_MODE (ix86_compare_op0);
15039 if (cmode == VOIDmode)
15040 cmode = GET_MODE (ix86_compare_op1);
15044 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15046 &ix86_compare_op1);
15047 if (code == UNKNOWN)
15050 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15051 ix86_compare_op1, operands[2],
15055 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15056 ix86_compare_op1, operands[2], operands[3]);
15057 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15061 /* The floating point conditional move instructions don't directly
15062 support conditions resulting from a signed integer comparison. */
15064 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15066 /* The floating point conditional move instructions don't directly
15067 support signed integer comparisons. */
15069 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15071 gcc_assert (!second_test && !bypass_test);
15072 tmp = gen_reg_rtx (QImode);
15073 ix86_expand_setcc (code, tmp);
15075 ix86_compare_op0 = tmp;
15076 ix86_compare_op1 = const0_rtx;
15077 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15079 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15081 tmp = gen_reg_rtx (mode);
15082 emit_move_insn (tmp, operands[3]);
15085 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15087 tmp = gen_reg_rtx (mode);
15088 emit_move_insn (tmp, operands[2]);
15092 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15093 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15094 operands[2], operands[3])));
15096 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15097 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15098 operands[3], operands[0])));
15100 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15101 gen_rtx_IF_THEN_ELSE (mode, second_test,
15102 operands[2], operands[0])));
15107 /* Expand a floating-point vector conditional move; a vcond operation
15108 rather than a movcc operation. */
15111 ix86_expand_fp_vcond (rtx operands[])
15113 enum rtx_code code = GET_CODE (operands[3]);
15116 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15117 &operands[4], &operands[5]);
15118 if (code == UNKNOWN)
15121 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15122 operands[5], operands[1], operands[2]))
15125 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15126 operands[1], operands[2]);
15127 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15131 /* Expand a signed/unsigned integral vector conditional move. */
15134 ix86_expand_int_vcond (rtx operands[])
15136 enum machine_mode mode = GET_MODE (operands[0]);
15137 enum rtx_code code = GET_CODE (operands[3]);
15138 bool negate = false;
15141 cop0 = operands[4];
15142 cop1 = operands[5];
15144 /* SSE5 supports all of the comparisons on all vector int types. */
15147 /* Canonicalize the comparison to EQ, GT, GTU. */
15158 code = reverse_condition (code);
15164 code = reverse_condition (code);
15170 code = swap_condition (code);
15171 x = cop0, cop0 = cop1, cop1 = x;
15175 gcc_unreachable ();
15178 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15179 if (mode == V2DImode)
15184 /* SSE4.1 supports EQ. */
15185 if (!TARGET_SSE4_1)
15191 /* SSE4.2 supports GT/GTU. */
15192 if (!TARGET_SSE4_2)
15197 gcc_unreachable ();
15201 /* Unsigned parallel compare is not supported by the hardware. Play some
15202 tricks to turn this into a signed comparison against 0. */
15205 cop0 = force_reg (mode, cop0);
15214 /* Perform a parallel modulo subtraction. */
15215 t1 = gen_reg_rtx (mode);
15216 emit_insn ((mode == V4SImode
15218 : gen_subv2di3) (t1, cop0, cop1));
15220 /* Extract the original sign bit of op0. */
15221 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15223 t2 = gen_reg_rtx (mode);
15224 emit_insn ((mode == V4SImode
15226 : gen_andv2di3) (t2, cop0, mask));
15228 /* XOR it back into the result of the subtraction. This results
15229 in the sign bit set iff we saw unsigned underflow. */
15230 x = gen_reg_rtx (mode);
15231 emit_insn ((mode == V4SImode
15233 : gen_xorv2di3) (x, t1, t2));
15241 /* Perform a parallel unsigned saturating subtraction. */
15242 x = gen_reg_rtx (mode);
15243 emit_insn (gen_rtx_SET (VOIDmode, x,
15244 gen_rtx_US_MINUS (mode, cop0, cop1)));
15251 gcc_unreachable ();
15255 cop1 = CONST0_RTX (mode);
15259 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15260 operands[1+negate], operands[2-negate]);
15262 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15263 operands[2-negate]);
15267 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15268 true if we should do zero extension, else sign extension. HIGH_P is
15269 true if we want the N/2 high elements, else the low elements. */
15272 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15274 enum machine_mode imode = GET_MODE (operands[1]);
15275 rtx (*unpack)(rtx, rtx, rtx);
15282 unpack = gen_vec_interleave_highv16qi;
15284 unpack = gen_vec_interleave_lowv16qi;
15288 unpack = gen_vec_interleave_highv8hi;
15290 unpack = gen_vec_interleave_lowv8hi;
15294 unpack = gen_vec_interleave_highv4si;
15296 unpack = gen_vec_interleave_lowv4si;
15299 gcc_unreachable ();
15302 dest = gen_lowpart (imode, operands[0]);
15305 se = force_reg (imode, CONST0_RTX (imode));
15307 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15308 operands[1], pc_rtx, pc_rtx);
15310 emit_insn (unpack (dest, operands[1], se));
15313 /* This function performs the same task as ix86_expand_sse_unpack,
15314 but with SSE4.1 instructions. */
15317 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15319 enum machine_mode imode = GET_MODE (operands[1]);
15320 rtx (*unpack)(rtx, rtx);
15327 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15329 unpack = gen_sse4_1_extendv8qiv8hi2;
15333 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15335 unpack = gen_sse4_1_extendv4hiv4si2;
15339 unpack = gen_sse4_1_zero_extendv2siv2di2;
15341 unpack = gen_sse4_1_extendv2siv2di2;
15344 gcc_unreachable ();
15347 dest = operands[0];
15350 /* Shift higher 8 bytes to lower 8 bytes. */
15351 src = gen_reg_rtx (imode);
15352 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15353 gen_lowpart (TImode, operands[1]),
15359 emit_insn (unpack (dest, src));
15362 /* This function performs the same task as ix86_expand_sse_unpack,
15363 but with sse5 instructions. */
15366 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15368 enum machine_mode imode = GET_MODE (operands[1]);
15369 int pperm_bytes[16];
15371 int h = (high_p) ? 8 : 0;
15374 rtvec v = rtvec_alloc (16);
15377 rtx op0 = operands[0], op1 = operands[1];
15382 vs = rtvec_alloc (8);
15383 h2 = (high_p) ? 8 : 0;
15384 for (i = 0; i < 8; i++)
15386 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15387 pperm_bytes[2*i+1] = ((unsigned_p)
15389 : PPERM_SIGN | PPERM_SRC2 | i | h);
15392 for (i = 0; i < 16; i++)
15393 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15395 for (i = 0; i < 8; i++)
15396 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15398 p = gen_rtx_PARALLEL (VOIDmode, vs);
15399 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15401 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15403 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15407 vs = rtvec_alloc (4);
15408 h2 = (high_p) ? 4 : 0;
15409 for (i = 0; i < 4; i++)
15411 sign_extend = ((unsigned_p)
15413 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15414 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15415 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15416 pperm_bytes[4*i+2] = sign_extend;
15417 pperm_bytes[4*i+3] = sign_extend;
15420 for (i = 0; i < 16; i++)
15421 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15423 for (i = 0; i < 4; i++)
15424 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15426 p = gen_rtx_PARALLEL (VOIDmode, vs);
15427 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15429 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15431 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15435 vs = rtvec_alloc (2);
15436 h2 = (high_p) ? 2 : 0;
15437 for (i = 0; i < 2; i++)
15439 sign_extend = ((unsigned_p)
15441 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
15442 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
15443 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
15444 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
15445 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
15446 pperm_bytes[8*i+4] = sign_extend;
15447 pperm_bytes[8*i+5] = sign_extend;
15448 pperm_bytes[8*i+6] = sign_extend;
15449 pperm_bytes[8*i+7] = sign_extend;
15452 for (i = 0; i < 16; i++)
15453 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15455 for (i = 0; i < 2; i++)
15456 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15458 p = gen_rtx_PARALLEL (VOIDmode, vs);
15459 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15461 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
15463 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
15467 gcc_unreachable ();
15473 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
15474 next narrower integer vector type */
15476 ix86_expand_sse5_pack (rtx operands[3])
15478 enum machine_mode imode = GET_MODE (operands[0]);
15479 int pperm_bytes[16];
15481 rtvec v = rtvec_alloc (16);
15483 rtx op0 = operands[0];
15484 rtx op1 = operands[1];
15485 rtx op2 = operands[2];
15490 for (i = 0; i < 8; i++)
15492 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
15493 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
15496 for (i = 0; i < 16; i++)
15497 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15499 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15500 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
15504 for (i = 0; i < 4; i++)
15506 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
15507 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
15508 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
15509 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
15512 for (i = 0; i < 16; i++)
15513 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15515 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15516 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
15520 for (i = 0; i < 2; i++)
15522 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
15523 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
15524 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
15525 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
15526 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
15527 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
15528 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
15529 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
15532 for (i = 0; i < 16; i++)
15533 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15535 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15536 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
15540 gcc_unreachable ();
15546 /* Expand conditional increment or decrement using adb/sbb instructions.
15547 The default case using setcc followed by the conditional move can be
15548 done by generic code. */
15550 ix86_expand_int_addcc (rtx operands[])
15552 enum rtx_code code = GET_CODE (operands[1]);
15554 rtx val = const0_rtx;
15555 bool fpcmp = false;
15556 enum machine_mode mode = GET_MODE (operands[0]);
15558 if (operands[3] != const1_rtx
15559 && operands[3] != constm1_rtx)
15561 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15562 ix86_compare_op1, &compare_op))
15564 code = GET_CODE (compare_op);
15566 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15567 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15570 code = ix86_fp_compare_code_to_integer (code);
15577 PUT_CODE (compare_op,
15578 reverse_condition_maybe_unordered
15579 (GET_CODE (compare_op)));
15581 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15583 PUT_MODE (compare_op, mode);
15585 /* Construct either adc or sbb insn. */
15586 if ((code == LTU) == (operands[3] == constm1_rtx))
15588 switch (GET_MODE (operands[0]))
15591 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15594 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15597 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15600 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15603 gcc_unreachable ();
15608 switch (GET_MODE (operands[0]))
15611 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
15614 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
15617 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
15620 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15623 gcc_unreachable ();
15626 return 1; /* DONE */
15630 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
15631 works for floating pointer parameters and nonoffsetable memories.
15632 For pushes, it returns just stack offsets; the values will be saved
15633 in the right order. Maximally three parts are generated. */
15636 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
15641 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
15643 size = (GET_MODE_SIZE (mode) + 4) / 8;
15645 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
15646 gcc_assert (size >= 2 && size <= 4);
15648 /* Optimize constant pool reference to immediates. This is used by fp
15649 moves, that force all constants to memory to allow combining. */
15650 if (MEM_P (operand) && MEM_READONLY_P (operand))
15652 rtx tmp = maybe_get_pool_constant (operand);
15657 if (MEM_P (operand) && !offsettable_memref_p (operand))
15659 /* The only non-offsetable memories we handle are pushes. */
15660 int ok = push_operand (operand, VOIDmode);
15664 operand = copy_rtx (operand);
15665 PUT_MODE (operand, Pmode);
15666 parts[0] = parts[1] = parts[2] = parts[3] = operand;
15670 if (GET_CODE (operand) == CONST_VECTOR)
15672 enum machine_mode imode = int_mode_for_mode (mode);
15673 /* Caution: if we looked through a constant pool memory above,
15674 the operand may actually have a different mode now. That's
15675 ok, since we want to pun this all the way back to an integer. */
15676 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
15677 gcc_assert (operand != NULL);
15683 if (mode == DImode)
15684 split_di (&operand, 1, &parts[0], &parts[1]);
15689 if (REG_P (operand))
15691 gcc_assert (reload_completed);
15692 for (i = 0; i < size; i++)
15693 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
15695 else if (offsettable_memref_p (operand))
15697 operand = adjust_address (operand, SImode, 0);
15698 parts[0] = operand;
15699 for (i = 1; i < size; i++)
15700 parts[i] = adjust_address (operand, SImode, 4 * i);
15702 else if (GET_CODE (operand) == CONST_DOUBLE)
15707 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15711 real_to_target (l, &r, mode);
15712 parts[3] = gen_int_mode (l[3], SImode);
15713 parts[2] = gen_int_mode (l[2], SImode);
15716 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
15717 parts[2] = gen_int_mode (l[2], SImode);
15720 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15723 gcc_unreachable ();
15725 parts[1] = gen_int_mode (l[1], SImode);
15726 parts[0] = gen_int_mode (l[0], SImode);
15729 gcc_unreachable ();
15734 if (mode == TImode)
15735 split_ti (&operand, 1, &parts[0], &parts[1]);
15736 if (mode == XFmode || mode == TFmode)
15738 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
15739 if (REG_P (operand))
15741 gcc_assert (reload_completed);
15742 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
15743 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
15745 else if (offsettable_memref_p (operand))
15747 operand = adjust_address (operand, DImode, 0);
15748 parts[0] = operand;
15749 parts[1] = adjust_address (operand, upper_mode, 8);
15751 else if (GET_CODE (operand) == CONST_DOUBLE)
15756 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15757 real_to_target (l, &r, mode);
15759 /* Do not use shift by 32 to avoid warning on 32bit systems. */
15760 if (HOST_BITS_PER_WIDE_INT >= 64)
15763 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
15764 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
15767 parts[0] = immed_double_const (l[0], l[1], DImode);
15769 if (upper_mode == SImode)
15770 parts[1] = gen_int_mode (l[2], SImode);
15771 else if (HOST_BITS_PER_WIDE_INT >= 64)
15774 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
15775 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
15778 parts[1] = immed_double_const (l[2], l[3], DImode);
15781 gcc_unreachable ();
15788 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
15789 Return false when normal moves are needed; true when all required
15790 insns have been emitted. Operands 2-4 contain the input values
15791 int the correct order; operands 5-7 contain the output values. */
15794 ix86_split_long_move (rtx operands[])
15799 int collisions = 0;
15800 enum machine_mode mode = GET_MODE (operands[0]);
15801 bool collisionparts[4];
15803 /* The DFmode expanders may ask us to move double.
15804 For 64bit target this is single move. By hiding the fact
15805 here we simplify i386.md splitters. */
15806 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
15808 /* Optimize constant pool reference to immediates. This is used by
15809 fp moves, that force all constants to memory to allow combining. */
15811 if (MEM_P (operands[1])
15812 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
15813 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
15814 operands[1] = get_pool_constant (XEXP (operands[1], 0));
15815 if (push_operand (operands[0], VOIDmode))
15817 operands[0] = copy_rtx (operands[0]);
15818 PUT_MODE (operands[0], Pmode);
15821 operands[0] = gen_lowpart (DImode, operands[0]);
15822 operands[1] = gen_lowpart (DImode, operands[1]);
15823 emit_move_insn (operands[0], operands[1]);
15827 /* The only non-offsettable memory we handle is push. */
15828 if (push_operand (operands[0], VOIDmode))
15831 gcc_assert (!MEM_P (operands[0])
15832 || offsettable_memref_p (operands[0]));
15834 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
15835 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
15837 /* When emitting push, take care for source operands on the stack. */
15838 if (push && MEM_P (operands[1])
15839 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
15840 for (i = 0; i < nparts - 1; i++)
15841 part[1][i] = change_address (part[1][i],
15842 GET_MODE (part[1][i]),
15843 XEXP (part[1][i + 1], 0));
15845 /* We need to do copy in the right order in case an address register
15846 of the source overlaps the destination. */
15847 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
15851 for (i = 0; i < nparts; i++)
15854 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
15855 if (collisionparts[i])
15859 /* Collision in the middle part can be handled by reordering. */
15860 if (collisions == 1 && nparts == 3 && collisionparts [1])
15862 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15863 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15865 else if (collisions == 1
15867 && (collisionparts [1] || collisionparts [2]))
15869 if (collisionparts [1])
15871 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15872 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15876 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
15877 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
15881 /* If there are more collisions, we can't handle it by reordering.
15882 Do an lea to the last part and use only one colliding move. */
15883 else if (collisions > 1)
15889 base = part[0][nparts - 1];
15891 /* Handle the case when the last part isn't valid for lea.
15892 Happens in 64-bit mode storing the 12-byte XFmode. */
15893 if (GET_MODE (base) != Pmode)
15894 base = gen_rtx_REG (Pmode, REGNO (base));
15896 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
15897 part[1][0] = replace_equiv_address (part[1][0], base);
15898 for (i = 1; i < nparts; i++)
15900 tmp = plus_constant (base, UNITS_PER_WORD * i);
15901 part[1][i] = replace_equiv_address (part[1][i], tmp);
15912 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
15913 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
15914 emit_move_insn (part[0][2], part[1][2]);
15916 else if (nparts == 4)
15918 emit_move_insn (part[0][3], part[1][3]);
15919 emit_move_insn (part[0][2], part[1][2]);
15924 /* In 64bit mode we don't have 32bit push available. In case this is
15925 register, it is OK - we will just use larger counterpart. We also
15926 retype memory - these comes from attempt to avoid REX prefix on
15927 moving of second half of TFmode value. */
15928 if (GET_MODE (part[1][1]) == SImode)
15930 switch (GET_CODE (part[1][1]))
15933 part[1][1] = adjust_address (part[1][1], DImode, 0);
15937 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
15941 gcc_unreachable ();
15944 if (GET_MODE (part[1][0]) == SImode)
15945 part[1][0] = part[1][1];
15948 emit_move_insn (part[0][1], part[1][1]);
15949 emit_move_insn (part[0][0], part[1][0]);
15953 /* Choose correct order to not overwrite the source before it is copied. */
15954 if ((REG_P (part[0][0])
15955 && REG_P (part[1][1])
15956 && (REGNO (part[0][0]) == REGNO (part[1][1])
15958 && REGNO (part[0][0]) == REGNO (part[1][2]))
15960 && REGNO (part[0][0]) == REGNO (part[1][3]))))
15962 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
15964 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
15966 operands[2 + i] = part[0][j];
15967 operands[6 + i] = part[1][j];
15972 for (i = 0; i < nparts; i++)
15974 operands[2 + i] = part[0][i];
15975 operands[6 + i] = part[1][i];
15979 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
15980 if (optimize_insn_for_size_p ())
15982 for (j = 0; j < nparts - 1; j++)
15983 if (CONST_INT_P (operands[6 + j])
15984 && operands[6 + j] != const0_rtx
15985 && REG_P (operands[2 + j]))
15986 for (i = j; i < nparts - 1; i++)
15987 if (CONST_INT_P (operands[7 + i])
15988 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
15989 operands[7 + i] = operands[2 + j];
15992 for (i = 0; i < nparts; i++)
15993 emit_move_insn (operands[2 + i], operands[6 + i]);
15998 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
15999 left shift by a constant, either using a single shift or
16000 a sequence of add instructions. */
16003 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16007 emit_insn ((mode == DImode
16009 : gen_adddi3) (operand, operand, operand));
16011 else if (!optimize_insn_for_size_p ()
16012 && count * ix86_cost->add <= ix86_cost->shift_const)
16015 for (i=0; i<count; i++)
16017 emit_insn ((mode == DImode
16019 : gen_adddi3) (operand, operand, operand));
16023 emit_insn ((mode == DImode
16025 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16029 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16031 rtx low[2], high[2];
16033 const int single_width = mode == DImode ? 32 : 64;
16035 if (CONST_INT_P (operands[2]))
16037 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16038 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16040 if (count >= single_width)
16042 emit_move_insn (high[0], low[1]);
16043 emit_move_insn (low[0], const0_rtx);
16045 if (count > single_width)
16046 ix86_expand_ashl_const (high[0], count - single_width, mode);
16050 if (!rtx_equal_p (operands[0], operands[1]))
16051 emit_move_insn (operands[0], operands[1]);
16052 emit_insn ((mode == DImode
16054 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16055 ix86_expand_ashl_const (low[0], count, mode);
16060 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16062 if (operands[1] == const1_rtx)
16064 /* Assuming we've chosen a QImode capable registers, then 1 << N
16065 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16066 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16068 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16070 ix86_expand_clear (low[0]);
16071 ix86_expand_clear (high[0]);
16072 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16074 d = gen_lowpart (QImode, low[0]);
16075 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16076 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16077 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16079 d = gen_lowpart (QImode, high[0]);
16080 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16081 s = gen_rtx_NE (QImode, flags, const0_rtx);
16082 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16085 /* Otherwise, we can get the same results by manually performing
16086 a bit extract operation on bit 5/6, and then performing the two
16087 shifts. The two methods of getting 0/1 into low/high are exactly
16088 the same size. Avoiding the shift in the bit extract case helps
16089 pentium4 a bit; no one else seems to care much either way. */
16094 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16095 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16097 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16098 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16100 emit_insn ((mode == DImode
16102 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16103 emit_insn ((mode == DImode
16105 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16106 emit_move_insn (low[0], high[0]);
16107 emit_insn ((mode == DImode
16109 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16112 emit_insn ((mode == DImode
16114 : gen_ashldi3) (low[0], low[0], operands[2]));
16115 emit_insn ((mode == DImode
16117 : gen_ashldi3) (high[0], high[0], operands[2]));
16121 if (operands[1] == constm1_rtx)
16123 /* For -1 << N, we can avoid the shld instruction, because we
16124 know that we're shifting 0...31/63 ones into a -1. */
16125 emit_move_insn (low[0], constm1_rtx);
16126 if (optimize_insn_for_size_p ())
16127 emit_move_insn (high[0], low[0]);
16129 emit_move_insn (high[0], constm1_rtx);
16133 if (!rtx_equal_p (operands[0], operands[1]))
16134 emit_move_insn (operands[0], operands[1]);
16136 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16137 emit_insn ((mode == DImode
16139 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16142 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16144 if (TARGET_CMOVE && scratch)
16146 ix86_expand_clear (scratch);
16147 emit_insn ((mode == DImode
16148 ? gen_x86_shift_adj_1
16149 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16153 emit_insn ((mode == DImode
16154 ? gen_x86_shift_adj_2
16155 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16159 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16161 rtx low[2], high[2];
16163 const int single_width = mode == DImode ? 32 : 64;
16165 if (CONST_INT_P (operands[2]))
16167 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16168 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16170 if (count == single_width * 2 - 1)
16172 emit_move_insn (high[0], high[1]);
16173 emit_insn ((mode == DImode
16175 : gen_ashrdi3) (high[0], high[0],
16176 GEN_INT (single_width - 1)));
16177 emit_move_insn (low[0], high[0]);
16180 else if (count >= single_width)
16182 emit_move_insn (low[0], high[1]);
16183 emit_move_insn (high[0], low[0]);
16184 emit_insn ((mode == DImode
16186 : gen_ashrdi3) (high[0], high[0],
16187 GEN_INT (single_width - 1)));
16188 if (count > single_width)
16189 emit_insn ((mode == DImode
16191 : gen_ashrdi3) (low[0], low[0],
16192 GEN_INT (count - single_width)));
16196 if (!rtx_equal_p (operands[0], operands[1]))
16197 emit_move_insn (operands[0], operands[1]);
16198 emit_insn ((mode == DImode
16200 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16201 emit_insn ((mode == DImode
16203 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16208 if (!rtx_equal_p (operands[0], operands[1]))
16209 emit_move_insn (operands[0], operands[1]);
16211 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16213 emit_insn ((mode == DImode
16215 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16216 emit_insn ((mode == DImode
16218 : gen_ashrdi3) (high[0], high[0], operands[2]));
16220 if (TARGET_CMOVE && scratch)
16222 emit_move_insn (scratch, high[0]);
16223 emit_insn ((mode == DImode
16225 : gen_ashrdi3) (scratch, scratch,
16226 GEN_INT (single_width - 1)));
16227 emit_insn ((mode == DImode
16228 ? gen_x86_shift_adj_1
16229 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16233 emit_insn ((mode == DImode
16234 ? gen_x86_shift_adj_3
16235 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16240 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16242 rtx low[2], high[2];
16244 const int single_width = mode == DImode ? 32 : 64;
16246 if (CONST_INT_P (operands[2]))
16248 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16249 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16251 if (count >= single_width)
16253 emit_move_insn (low[0], high[1]);
16254 ix86_expand_clear (high[0]);
16256 if (count > single_width)
16257 emit_insn ((mode == DImode
16259 : gen_lshrdi3) (low[0], low[0],
16260 GEN_INT (count - single_width)));
16264 if (!rtx_equal_p (operands[0], operands[1]))
16265 emit_move_insn (operands[0], operands[1]);
16266 emit_insn ((mode == DImode
16268 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16269 emit_insn ((mode == DImode
16271 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16276 if (!rtx_equal_p (operands[0], operands[1]))
16277 emit_move_insn (operands[0], operands[1]);
16279 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16281 emit_insn ((mode == DImode
16283 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16284 emit_insn ((mode == DImode
16286 : gen_lshrdi3) (high[0], high[0], operands[2]));
16288 /* Heh. By reversing the arguments, we can reuse this pattern. */
16289 if (TARGET_CMOVE && scratch)
16291 ix86_expand_clear (scratch);
16292 emit_insn ((mode == DImode
16293 ? gen_x86_shift_adj_1
16294 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16298 emit_insn ((mode == DImode
16299 ? gen_x86_shift_adj_2
16300 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16304 /* Predict just emitted jump instruction to be taken with probability PROB. */
16306 predict_jump (int prob)
16308 rtx insn = get_last_insn ();
16309 gcc_assert (JUMP_P (insn));
16311 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16316 /* Helper function for the string operations below. Dest VARIABLE whether
16317 it is aligned to VALUE bytes. If true, jump to the label. */
16319 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16321 rtx label = gen_label_rtx ();
16322 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16323 if (GET_MODE (variable) == DImode)
16324 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16326 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16327 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16330 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16332 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16336 /* Adjust COUNTER by the VALUE. */
16338 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16340 if (GET_MODE (countreg) == DImode)
16341 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16343 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16346 /* Zero extend possibly SImode EXP to Pmode register. */
16348 ix86_zero_extend_to_Pmode (rtx exp)
16351 if (GET_MODE (exp) == VOIDmode)
16352 return force_reg (Pmode, exp);
16353 if (GET_MODE (exp) == Pmode)
16354 return copy_to_mode_reg (Pmode, exp);
16355 r = gen_reg_rtx (Pmode);
16356 emit_insn (gen_zero_extendsidi2 (r, exp));
16360 /* Divide COUNTREG by SCALE. */
16362 scale_counter (rtx countreg, int scale)
16365 rtx piece_size_mask;
16369 if (CONST_INT_P (countreg))
16370 return GEN_INT (INTVAL (countreg) / scale);
16371 gcc_assert (REG_P (countreg));
16373 piece_size_mask = GEN_INT (scale - 1);
16374 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16375 GEN_INT (exact_log2 (scale)),
16376 NULL, 1, OPTAB_DIRECT);
16380 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16381 DImode for constant loop counts. */
16383 static enum machine_mode
16384 counter_mode (rtx count_exp)
16386 if (GET_MODE (count_exp) != VOIDmode)
16387 return GET_MODE (count_exp);
16388 if (GET_CODE (count_exp) != CONST_INT)
16390 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16395 /* When SRCPTR is non-NULL, output simple loop to move memory
16396 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16397 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16398 equivalent loop to set memory by VALUE (supposed to be in MODE).
16400 The size is rounded down to whole number of chunk size moved at once.
16401 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16405 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16406 rtx destptr, rtx srcptr, rtx value,
16407 rtx count, enum machine_mode mode, int unroll,
16410 rtx out_label, top_label, iter, tmp;
16411 enum machine_mode iter_mode = counter_mode (count);
16412 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16413 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16419 top_label = gen_label_rtx ();
16420 out_label = gen_label_rtx ();
16421 iter = gen_reg_rtx (iter_mode);
16423 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16424 NULL, 1, OPTAB_DIRECT);
16425 /* Those two should combine. */
16426 if (piece_size == const1_rtx)
16428 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
16430 predict_jump (REG_BR_PROB_BASE * 10 / 100);
16432 emit_move_insn (iter, const0_rtx);
16434 emit_label (top_label);
16436 tmp = convert_modes (Pmode, iter_mode, iter, true);
16437 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
16438 destmem = change_address (destmem, mode, x_addr);
16442 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
16443 srcmem = change_address (srcmem, mode, y_addr);
16445 /* When unrolling for chips that reorder memory reads and writes,
16446 we can save registers by using single temporary.
16447 Also using 4 temporaries is overkill in 32bit mode. */
16448 if (!TARGET_64BIT && 0)
16450 for (i = 0; i < unroll; i++)
16455 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16457 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16459 emit_move_insn (destmem, srcmem);
16465 gcc_assert (unroll <= 4);
16466 for (i = 0; i < unroll; i++)
16468 tmpreg[i] = gen_reg_rtx (mode);
16472 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16474 emit_move_insn (tmpreg[i], srcmem);
16476 for (i = 0; i < unroll; i++)
16481 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16483 emit_move_insn (destmem, tmpreg[i]);
16488 for (i = 0; i < unroll; i++)
16492 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16493 emit_move_insn (destmem, value);
16496 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
16497 true, OPTAB_LIB_WIDEN);
16499 emit_move_insn (iter, tmp);
16501 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
16503 if (expected_size != -1)
16505 expected_size /= GET_MODE_SIZE (mode) * unroll;
16506 if (expected_size == 0)
16508 else if (expected_size > REG_BR_PROB_BASE)
16509 predict_jump (REG_BR_PROB_BASE - 1);
16511 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
16514 predict_jump (REG_BR_PROB_BASE * 80 / 100);
16515 iter = ix86_zero_extend_to_Pmode (iter);
16516 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
16517 true, OPTAB_LIB_WIDEN);
16518 if (tmp != destptr)
16519 emit_move_insn (destptr, tmp);
16522 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
16523 true, OPTAB_LIB_WIDEN);
16525 emit_move_insn (srcptr, tmp);
16527 emit_label (out_label);
16530 /* Output "rep; mov" instruction.
16531 Arguments have same meaning as for previous function */
16533 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
16534 rtx destptr, rtx srcptr,
16536 enum machine_mode mode)
16542 /* If the size is known, it is shorter to use rep movs. */
16543 if (mode == QImode && CONST_INT_P (count)
16544 && !(INTVAL (count) & 3))
16547 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16548 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16549 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
16550 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
16551 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16552 if (mode != QImode)
16554 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16555 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16556 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16557 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16558 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16559 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16563 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16564 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16566 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16570 /* Output "rep; stos" instruction.
16571 Arguments have same meaning as for previous function */
16573 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16575 enum machine_mode mode)
16580 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16581 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16582 value = force_reg (mode, gen_lowpart (mode, value));
16583 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16584 if (mode != QImode)
16586 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16587 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16588 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16591 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16592 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
16596 emit_strmov (rtx destmem, rtx srcmem,
16597 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
16599 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
16600 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
16601 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16604 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
16606 expand_movmem_epilogue (rtx destmem, rtx srcmem,
16607 rtx destptr, rtx srcptr, rtx count, int max_size)
16610 if (CONST_INT_P (count))
16612 HOST_WIDE_INT countval = INTVAL (count);
16615 if ((countval & 0x10) && max_size > 16)
16619 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16620 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
16623 gcc_unreachable ();
16626 if ((countval & 0x08) && max_size > 8)
16629 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16632 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16633 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
16637 if ((countval & 0x04) && max_size > 4)
16639 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16642 if ((countval & 0x02) && max_size > 2)
16644 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
16647 if ((countval & 0x01) && max_size > 1)
16649 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
16656 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
16657 count, 1, OPTAB_DIRECT);
16658 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
16659 count, QImode, 1, 4);
16663 /* When there are stringops, we can cheaply increase dest and src pointers.
16664 Otherwise we save code size by maintaining offset (zero is readily
16665 available from preceding rep operation) and using x86 addressing modes.
16667 if (TARGET_SINGLE_STRINGOP)
16671 rtx label = ix86_expand_aligntest (count, 4, true);
16672 src = change_address (srcmem, SImode, srcptr);
16673 dest = change_address (destmem, SImode, destptr);
16674 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16675 emit_label (label);
16676 LABEL_NUSES (label) = 1;
16680 rtx label = ix86_expand_aligntest (count, 2, true);
16681 src = change_address (srcmem, HImode, srcptr);
16682 dest = change_address (destmem, HImode, destptr);
16683 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16684 emit_label (label);
16685 LABEL_NUSES (label) = 1;
16689 rtx label = ix86_expand_aligntest (count, 1, true);
16690 src = change_address (srcmem, QImode, srcptr);
16691 dest = change_address (destmem, QImode, destptr);
16692 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16693 emit_label (label);
16694 LABEL_NUSES (label) = 1;
16699 rtx offset = force_reg (Pmode, const0_rtx);
16704 rtx label = ix86_expand_aligntest (count, 4, true);
16705 src = change_address (srcmem, SImode, srcptr);
16706 dest = change_address (destmem, SImode, destptr);
16707 emit_move_insn (dest, src);
16708 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
16709 true, OPTAB_LIB_WIDEN);
16711 emit_move_insn (offset, tmp);
16712 emit_label (label);
16713 LABEL_NUSES (label) = 1;
16717 rtx label = ix86_expand_aligntest (count, 2, true);
16718 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16719 src = change_address (srcmem, HImode, tmp);
16720 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16721 dest = change_address (destmem, HImode, tmp);
16722 emit_move_insn (dest, src);
16723 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
16724 true, OPTAB_LIB_WIDEN);
16726 emit_move_insn (offset, tmp);
16727 emit_label (label);
16728 LABEL_NUSES (label) = 1;
16732 rtx label = ix86_expand_aligntest (count, 1, true);
16733 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16734 src = change_address (srcmem, QImode, tmp);
16735 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16736 dest = change_address (destmem, QImode, tmp);
16737 emit_move_insn (dest, src);
16738 emit_label (label);
16739 LABEL_NUSES (label) = 1;
16744 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16746 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
16747 rtx count, int max_size)
16750 expand_simple_binop (counter_mode (count), AND, count,
16751 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
16752 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
16753 gen_lowpart (QImode, value), count, QImode,
16757 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16759 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
16763 if (CONST_INT_P (count))
16765 HOST_WIDE_INT countval = INTVAL (count);
16768 if ((countval & 0x10) && max_size > 16)
16772 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16773 emit_insn (gen_strset (destptr, dest, value));
16774 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
16775 emit_insn (gen_strset (destptr, dest, value));
16778 gcc_unreachable ();
16781 if ((countval & 0x08) && max_size > 8)
16785 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16786 emit_insn (gen_strset (destptr, dest, value));
16790 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16791 emit_insn (gen_strset (destptr, dest, value));
16792 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
16793 emit_insn (gen_strset (destptr, dest, value));
16797 if ((countval & 0x04) && max_size > 4)
16799 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16800 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16803 if ((countval & 0x02) && max_size > 2)
16805 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
16806 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16809 if ((countval & 0x01) && max_size > 1)
16811 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
16812 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16819 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
16824 rtx label = ix86_expand_aligntest (count, 16, true);
16827 dest = change_address (destmem, DImode, destptr);
16828 emit_insn (gen_strset (destptr, dest, value));
16829 emit_insn (gen_strset (destptr, dest, value));
16833 dest = change_address (destmem, SImode, destptr);
16834 emit_insn (gen_strset (destptr, dest, value));
16835 emit_insn (gen_strset (destptr, dest, value));
16836 emit_insn (gen_strset (destptr, dest, value));
16837 emit_insn (gen_strset (destptr, dest, value));
16839 emit_label (label);
16840 LABEL_NUSES (label) = 1;
16844 rtx label = ix86_expand_aligntest (count, 8, true);
16847 dest = change_address (destmem, DImode, destptr);
16848 emit_insn (gen_strset (destptr, dest, value));
16852 dest = change_address (destmem, SImode, destptr);
16853 emit_insn (gen_strset (destptr, dest, value));
16854 emit_insn (gen_strset (destptr, dest, value));
16856 emit_label (label);
16857 LABEL_NUSES (label) = 1;
16861 rtx label = ix86_expand_aligntest (count, 4, true);
16862 dest = change_address (destmem, SImode, destptr);
16863 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16864 emit_label (label);
16865 LABEL_NUSES (label) = 1;
16869 rtx label = ix86_expand_aligntest (count, 2, true);
16870 dest = change_address (destmem, HImode, destptr);
16871 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16872 emit_label (label);
16873 LABEL_NUSES (label) = 1;
16877 rtx label = ix86_expand_aligntest (count, 1, true);
16878 dest = change_address (destmem, QImode, destptr);
16879 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16880 emit_label (label);
16881 LABEL_NUSES (label) = 1;
16885 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
16886 DESIRED_ALIGNMENT. */
16888 expand_movmem_prologue (rtx destmem, rtx srcmem,
16889 rtx destptr, rtx srcptr, rtx count,
16890 int align, int desired_alignment)
16892 if (align <= 1 && desired_alignment > 1)
16894 rtx label = ix86_expand_aligntest (destptr, 1, false);
16895 srcmem = change_address (srcmem, QImode, srcptr);
16896 destmem = change_address (destmem, QImode, destptr);
16897 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16898 ix86_adjust_counter (count, 1);
16899 emit_label (label);
16900 LABEL_NUSES (label) = 1;
16902 if (align <= 2 && desired_alignment > 2)
16904 rtx label = ix86_expand_aligntest (destptr, 2, false);
16905 srcmem = change_address (srcmem, HImode, srcptr);
16906 destmem = change_address (destmem, HImode, destptr);
16907 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16908 ix86_adjust_counter (count, 2);
16909 emit_label (label);
16910 LABEL_NUSES (label) = 1;
16912 if (align <= 4 && desired_alignment > 4)
16914 rtx label = ix86_expand_aligntest (destptr, 4, false);
16915 srcmem = change_address (srcmem, SImode, srcptr);
16916 destmem = change_address (destmem, SImode, destptr);
16917 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16918 ix86_adjust_counter (count, 4);
16919 emit_label (label);
16920 LABEL_NUSES (label) = 1;
16922 gcc_assert (desired_alignment <= 8);
16925 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
16926 DESIRED_ALIGNMENT. */
16928 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
16929 int align, int desired_alignment)
16931 if (align <= 1 && desired_alignment > 1)
16933 rtx label = ix86_expand_aligntest (destptr, 1, false);
16934 destmem = change_address (destmem, QImode, destptr);
16935 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
16936 ix86_adjust_counter (count, 1);
16937 emit_label (label);
16938 LABEL_NUSES (label) = 1;
16940 if (align <= 2 && desired_alignment > 2)
16942 rtx label = ix86_expand_aligntest (destptr, 2, false);
16943 destmem = change_address (destmem, HImode, destptr);
16944 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
16945 ix86_adjust_counter (count, 2);
16946 emit_label (label);
16947 LABEL_NUSES (label) = 1;
16949 if (align <= 4 && desired_alignment > 4)
16951 rtx label = ix86_expand_aligntest (destptr, 4, false);
16952 destmem = change_address (destmem, SImode, destptr);
16953 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
16954 ix86_adjust_counter (count, 4);
16955 emit_label (label);
16956 LABEL_NUSES (label) = 1;
16958 gcc_assert (desired_alignment <= 8);
16961 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
16962 static enum stringop_alg
16963 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
16964 int *dynamic_check)
16966 const struct stringop_algs * algs;
16967 /* Algorithms using the rep prefix want at least edi and ecx;
16968 additionally, memset wants eax and memcpy wants esi. Don't
16969 consider such algorithms if the user has appropriated those
16970 registers for their own purposes. */
16971 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
16973 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
16975 #define ALG_USABLE_P(alg) (rep_prefix_usable \
16976 || (alg != rep_prefix_1_byte \
16977 && alg != rep_prefix_4_byte \
16978 && alg != rep_prefix_8_byte))
16979 const struct processor_costs *cost;
16981 cost = optimize_insn_for_size_p () ? &ix86_size_cost : ix86_cost;
16983 *dynamic_check = -1;
16985 algs = &cost->memset[TARGET_64BIT != 0];
16987 algs = &cost->memcpy[TARGET_64BIT != 0];
16988 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
16989 return stringop_alg;
16990 /* rep; movq or rep; movl is the smallest variant. */
16991 else if (optimize_insn_for_size_p ())
16993 if (!count || (count & 3))
16994 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
16996 return rep_prefix_usable ? rep_prefix_4_byte : loop;
16998 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17000 else if (expected_size != -1 && expected_size < 4)
17001 return loop_1_byte;
17002 else if (expected_size != -1)
17005 enum stringop_alg alg = libcall;
17006 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17008 /* We get here if the algorithms that were not libcall-based
17009 were rep-prefix based and we are unable to use rep prefixes
17010 based on global register usage. Break out of the loop and
17011 use the heuristic below. */
17012 if (algs->size[i].max == 0)
17014 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17016 enum stringop_alg candidate = algs->size[i].alg;
17018 if (candidate != libcall && ALG_USABLE_P (candidate))
17020 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17021 last non-libcall inline algorithm. */
17022 if (TARGET_INLINE_ALL_STRINGOPS)
17024 /* When the current size is best to be copied by a libcall,
17025 but we are still forced to inline, run the heuristic below
17026 that will pick code for medium sized blocks. */
17027 if (alg != libcall)
17031 else if (ALG_USABLE_P (candidate))
17035 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17037 /* When asked to inline the call anyway, try to pick meaningful choice.
17038 We look for maximal size of block that is faster to copy by hand and
17039 take blocks of at most of that size guessing that average size will
17040 be roughly half of the block.
17042 If this turns out to be bad, we might simply specify the preferred
17043 choice in ix86_costs. */
17044 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17045 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17048 enum stringop_alg alg;
17050 bool any_alg_usable_p = true;
17052 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17054 enum stringop_alg candidate = algs->size[i].alg;
17055 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17057 if (candidate != libcall && candidate
17058 && ALG_USABLE_P (candidate))
17059 max = algs->size[i].max;
17061 /* If there aren't any usable algorithms, then recursing on
17062 smaller sizes isn't going to find anything. Just return the
17063 simple byte-at-a-time copy loop. */
17064 if (!any_alg_usable_p)
17066 /* Pick something reasonable. */
17067 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17068 *dynamic_check = 128;
17069 return loop_1_byte;
17073 alg = decide_alg (count, max / 2, memset, dynamic_check);
17074 gcc_assert (*dynamic_check == -1);
17075 gcc_assert (alg != libcall);
17076 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17077 *dynamic_check = max;
17080 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17081 #undef ALG_USABLE_P
17084 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17085 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17087 decide_alignment (int align,
17088 enum stringop_alg alg,
17091 int desired_align = 0;
17095 gcc_unreachable ();
17097 case unrolled_loop:
17098 desired_align = GET_MODE_SIZE (Pmode);
17100 case rep_prefix_8_byte:
17103 case rep_prefix_4_byte:
17104 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17105 copying whole cacheline at once. */
17106 if (TARGET_PENTIUMPRO)
17111 case rep_prefix_1_byte:
17112 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17113 copying whole cacheline at once. */
17114 if (TARGET_PENTIUMPRO)
17128 if (desired_align < align)
17129 desired_align = align;
17130 if (expected_size != -1 && expected_size < 4)
17131 desired_align = align;
17132 return desired_align;
17135 /* Return the smallest power of 2 greater than VAL. */
17137 smallest_pow2_greater_than (int val)
17145 /* Expand string move (memcpy) operation. Use i386 string operations when
17146 profitable. expand_setmem contains similar code. The code depends upon
17147 architecture, block size and alignment, but always has the same
17150 1) Prologue guard: Conditional that jumps up to epilogues for small
17151 blocks that can be handled by epilogue alone. This is faster but
17152 also needed for correctness, since prologue assume the block is larger
17153 than the desired alignment.
17155 Optional dynamic check for size and libcall for large
17156 blocks is emitted here too, with -minline-stringops-dynamically.
17158 2) Prologue: copy first few bytes in order to get destination aligned
17159 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17160 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17161 We emit either a jump tree on power of two sized blocks, or a byte loop.
17163 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17164 with specified algorithm.
17166 4) Epilogue: code copying tail of the block that is too small to be
17167 handled by main body (or up to size guarded by prologue guard). */
17170 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17171 rtx expected_align_exp, rtx expected_size_exp)
17177 rtx jump_around_label = NULL;
17178 HOST_WIDE_INT align = 1;
17179 unsigned HOST_WIDE_INT count = 0;
17180 HOST_WIDE_INT expected_size = -1;
17181 int size_needed = 0, epilogue_size_needed;
17182 int desired_align = 0;
17183 enum stringop_alg alg;
17186 if (CONST_INT_P (align_exp))
17187 align = INTVAL (align_exp);
17188 /* i386 can do misaligned access on reasonably increased cost. */
17189 if (CONST_INT_P (expected_align_exp)
17190 && INTVAL (expected_align_exp) > align)
17191 align = INTVAL (expected_align_exp);
17192 if (CONST_INT_P (count_exp))
17193 count = expected_size = INTVAL (count_exp);
17194 if (CONST_INT_P (expected_size_exp) && count == 0)
17195 expected_size = INTVAL (expected_size_exp);
17197 /* Make sure we don't need to care about overflow later on. */
17198 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17201 /* Step 0: Decide on preferred algorithm, desired alignment and
17202 size of chunks to be copied by main loop. */
17204 alg = decide_alg (count, expected_size, false, &dynamic_check);
17205 desired_align = decide_alignment (align, alg, expected_size);
17207 if (!TARGET_ALIGN_STRINGOPS)
17208 align = desired_align;
17210 if (alg == libcall)
17212 gcc_assert (alg != no_stringop);
17214 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17215 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17216 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17221 gcc_unreachable ();
17223 size_needed = GET_MODE_SIZE (Pmode);
17225 case unrolled_loop:
17226 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17228 case rep_prefix_8_byte:
17231 case rep_prefix_4_byte:
17234 case rep_prefix_1_byte:
17240 epilogue_size_needed = size_needed;
17242 /* Step 1: Prologue guard. */
17244 /* Alignment code needs count to be in register. */
17245 if (CONST_INT_P (count_exp) && desired_align > align)
17246 count_exp = force_reg (counter_mode (count_exp), count_exp);
17247 gcc_assert (desired_align >= 1 && align >= 1);
17249 /* Ensure that alignment prologue won't copy past end of block. */
17250 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17252 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17253 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17254 Make sure it is power of 2. */
17255 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17257 if (CONST_INT_P (count_exp))
17259 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
17264 label = gen_label_rtx ();
17265 emit_cmp_and_jump_insns (count_exp,
17266 GEN_INT (epilogue_size_needed),
17267 LTU, 0, counter_mode (count_exp), 1, label);
17268 if (expected_size == -1 || expected_size < epilogue_size_needed)
17269 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17271 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17275 /* Emit code to decide on runtime whether library call or inline should be
17277 if (dynamic_check != -1)
17279 if (CONST_INT_P (count_exp))
17281 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
17283 emit_block_move_via_libcall (dst, src, count_exp, false);
17284 count_exp = const0_rtx;
17290 rtx hot_label = gen_label_rtx ();
17291 jump_around_label = gen_label_rtx ();
17292 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17293 LEU, 0, GET_MODE (count_exp), 1, hot_label);
17294 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17295 emit_block_move_via_libcall (dst, src, count_exp, false);
17296 emit_jump (jump_around_label);
17297 emit_label (hot_label);
17301 /* Step 2: Alignment prologue. */
17303 if (desired_align > align)
17305 /* Except for the first move in epilogue, we no longer know
17306 constant offset in aliasing info. It don't seems to worth
17307 the pain to maintain it for the first move, so throw away
17309 src = change_address (src, BLKmode, srcreg);
17310 dst = change_address (dst, BLKmode, destreg);
17311 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
17314 if (label && size_needed == 1)
17316 emit_label (label);
17317 LABEL_NUSES (label) = 1;
17321 /* Step 3: Main loop. */
17327 gcc_unreachable ();
17329 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17330 count_exp, QImode, 1, expected_size);
17333 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17334 count_exp, Pmode, 1, expected_size);
17336 case unrolled_loop:
17337 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
17338 registers for 4 temporaries anyway. */
17339 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17340 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
17343 case rep_prefix_8_byte:
17344 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17347 case rep_prefix_4_byte:
17348 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17351 case rep_prefix_1_byte:
17352 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17356 /* Adjust properly the offset of src and dest memory for aliasing. */
17357 if (CONST_INT_P (count_exp))
17359 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
17360 (count / size_needed) * size_needed);
17361 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17362 (count / size_needed) * size_needed);
17366 src = change_address (src, BLKmode, srcreg);
17367 dst = change_address (dst, BLKmode, destreg);
17370 /* Step 4: Epilogue to copy the remaining bytes. */
17374 /* When the main loop is done, COUNT_EXP might hold original count,
17375 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17376 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17377 bytes. Compensate if needed. */
17379 if (size_needed < epilogue_size_needed)
17382 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17383 GEN_INT (size_needed - 1), count_exp, 1,
17385 if (tmp != count_exp)
17386 emit_move_insn (count_exp, tmp);
17388 emit_label (label);
17389 LABEL_NUSES (label) = 1;
17392 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17393 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
17394 epilogue_size_needed);
17395 if (jump_around_label)
17396 emit_label (jump_around_label);
17400 /* Helper function for memcpy. For QImode value 0xXY produce
17401 0xXYXYXYXY of wide specified by MODE. This is essentially
17402 a * 0x10101010, but we can do slightly better than
17403 synth_mult by unwinding the sequence by hand on CPUs with
17406 promote_duplicated_reg (enum machine_mode mode, rtx val)
17408 enum machine_mode valmode = GET_MODE (val);
17410 int nops = mode == DImode ? 3 : 2;
17412 gcc_assert (mode == SImode || mode == DImode);
17413 if (val == const0_rtx)
17414 return copy_to_mode_reg (mode, const0_rtx);
17415 if (CONST_INT_P (val))
17417 HOST_WIDE_INT v = INTVAL (val) & 255;
17421 if (mode == DImode)
17422 v |= (v << 16) << 16;
17423 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
17426 if (valmode == VOIDmode)
17428 if (valmode != QImode)
17429 val = gen_lowpart (QImode, val);
17430 if (mode == QImode)
17432 if (!TARGET_PARTIAL_REG_STALL)
17434 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
17435 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
17436 <= (ix86_cost->shift_const + ix86_cost->add) * nops
17437 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
17439 rtx reg = convert_modes (mode, QImode, val, true);
17440 tmp = promote_duplicated_reg (mode, const1_rtx);
17441 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
17446 rtx reg = convert_modes (mode, QImode, val, true);
17448 if (!TARGET_PARTIAL_REG_STALL)
17449 if (mode == SImode)
17450 emit_insn (gen_movsi_insv_1 (reg, reg));
17452 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
17455 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
17456 NULL, 1, OPTAB_DIRECT);
17458 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17460 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
17461 NULL, 1, OPTAB_DIRECT);
17462 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17463 if (mode == SImode)
17465 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
17466 NULL, 1, OPTAB_DIRECT);
17467 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17472 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
17473 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
17474 alignment from ALIGN to DESIRED_ALIGN. */
17476 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
17481 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
17482 promoted_val = promote_duplicated_reg (DImode, val);
17483 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
17484 promoted_val = promote_duplicated_reg (SImode, val);
17485 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
17486 promoted_val = promote_duplicated_reg (HImode, val);
17488 promoted_val = val;
17490 return promoted_val;
17493 /* Expand string clear operation (bzero). Use i386 string operations when
17494 profitable. See expand_movmem comment for explanation of individual
17495 steps performed. */
17497 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
17498 rtx expected_align_exp, rtx expected_size_exp)
17503 rtx jump_around_label = NULL;
17504 HOST_WIDE_INT align = 1;
17505 unsigned HOST_WIDE_INT count = 0;
17506 HOST_WIDE_INT expected_size = -1;
17507 int size_needed = 0, epilogue_size_needed;
17508 int desired_align = 0;
17509 enum stringop_alg alg;
17510 rtx promoted_val = NULL;
17511 bool force_loopy_epilogue = false;
17514 if (CONST_INT_P (align_exp))
17515 align = INTVAL (align_exp);
17516 /* i386 can do misaligned access on reasonably increased cost. */
17517 if (CONST_INT_P (expected_align_exp)
17518 && INTVAL (expected_align_exp) > align)
17519 align = INTVAL (expected_align_exp);
17520 if (CONST_INT_P (count_exp))
17521 count = expected_size = INTVAL (count_exp);
17522 if (CONST_INT_P (expected_size_exp) && count == 0)
17523 expected_size = INTVAL (expected_size_exp);
17525 /* Make sure we don't need to care about overflow later on. */
17526 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17529 /* Step 0: Decide on preferred algorithm, desired alignment and
17530 size of chunks to be copied by main loop. */
17532 alg = decide_alg (count, expected_size, true, &dynamic_check);
17533 desired_align = decide_alignment (align, alg, expected_size);
17535 if (!TARGET_ALIGN_STRINGOPS)
17536 align = desired_align;
17538 if (alg == libcall)
17540 gcc_assert (alg != no_stringop);
17542 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
17543 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17548 gcc_unreachable ();
17550 size_needed = GET_MODE_SIZE (Pmode);
17552 case unrolled_loop:
17553 size_needed = GET_MODE_SIZE (Pmode) * 4;
17555 case rep_prefix_8_byte:
17558 case rep_prefix_4_byte:
17561 case rep_prefix_1_byte:
17566 epilogue_size_needed = size_needed;
17568 /* Step 1: Prologue guard. */
17570 /* Alignment code needs count to be in register. */
17571 if (CONST_INT_P (count_exp) && desired_align > align)
17573 enum machine_mode mode = SImode;
17574 if (TARGET_64BIT && (count & ~0xffffffff))
17576 count_exp = force_reg (mode, count_exp);
17578 /* Do the cheap promotion to allow better CSE across the
17579 main loop and epilogue (ie one load of the big constant in the
17580 front of all code. */
17581 if (CONST_INT_P (val_exp))
17582 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17583 desired_align, align);
17584 /* Ensure that alignment prologue won't copy past end of block. */
17585 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17587 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17588 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17589 Make sure it is power of 2. */
17590 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17592 /* To improve performance of small blocks, we jump around the VAL
17593 promoting mode. This mean that if the promoted VAL is not constant,
17594 we might not use it in the epilogue and have to use byte
17596 if (epilogue_size_needed > 2 && !promoted_val)
17597 force_loopy_epilogue = true;
17598 label = gen_label_rtx ();
17599 emit_cmp_and_jump_insns (count_exp,
17600 GEN_INT (epilogue_size_needed),
17601 LTU, 0, counter_mode (count_exp), 1, label);
17602 if (GET_CODE (count_exp) == CONST_INT)
17604 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
17605 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17607 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17609 if (dynamic_check != -1)
17611 rtx hot_label = gen_label_rtx ();
17612 jump_around_label = gen_label_rtx ();
17613 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17614 LEU, 0, counter_mode (count_exp), 1, hot_label);
17615 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17616 set_storage_via_libcall (dst, count_exp, val_exp, false);
17617 emit_jump (jump_around_label);
17618 emit_label (hot_label);
17621 /* Step 2: Alignment prologue. */
17623 /* Do the expensive promotion once we branched off the small blocks. */
17625 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17626 desired_align, align);
17627 gcc_assert (desired_align >= 1 && align >= 1);
17629 if (desired_align > align)
17631 /* Except for the first move in epilogue, we no longer know
17632 constant offset in aliasing info. It don't seems to worth
17633 the pain to maintain it for the first move, so throw away
17635 dst = change_address (dst, BLKmode, destreg);
17636 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
17639 if (label && size_needed == 1)
17641 emit_label (label);
17642 LABEL_NUSES (label) = 1;
17646 /* Step 3: Main loop. */
17652 gcc_unreachable ();
17654 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17655 count_exp, QImode, 1, expected_size);
17658 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17659 count_exp, Pmode, 1, expected_size);
17661 case unrolled_loop:
17662 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17663 count_exp, Pmode, 4, expected_size);
17665 case rep_prefix_8_byte:
17666 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17669 case rep_prefix_4_byte:
17670 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17673 case rep_prefix_1_byte:
17674 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17678 /* Adjust properly the offset of src and dest memory for aliasing. */
17679 if (CONST_INT_P (count_exp))
17680 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17681 (count / size_needed) * size_needed);
17683 dst = change_address (dst, BLKmode, destreg);
17685 /* Step 4: Epilogue to copy the remaining bytes. */
17689 /* When the main loop is done, COUNT_EXP might hold original count,
17690 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17691 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17692 bytes. Compensate if needed. */
17694 if (size_needed < desired_align - align)
17697 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17698 GEN_INT (size_needed - 1), count_exp, 1,
17700 size_needed = desired_align - align + 1;
17701 if (tmp != count_exp)
17702 emit_move_insn (count_exp, tmp);
17704 emit_label (label);
17705 LABEL_NUSES (label) = 1;
17707 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17709 if (force_loopy_epilogue)
17710 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
17713 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
17716 if (jump_around_label)
17717 emit_label (jump_around_label);
17721 /* Expand the appropriate insns for doing strlen if not just doing
17724 out = result, initialized with the start address
17725 align_rtx = alignment of the address.
17726 scratch = scratch register, initialized with the startaddress when
17727 not aligned, otherwise undefined
17729 This is just the body. It needs the initializations mentioned above and
17730 some address computing at the end. These things are done in i386.md. */
17733 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
17737 rtx align_2_label = NULL_RTX;
17738 rtx align_3_label = NULL_RTX;
17739 rtx align_4_label = gen_label_rtx ();
17740 rtx end_0_label = gen_label_rtx ();
17742 rtx tmpreg = gen_reg_rtx (SImode);
17743 rtx scratch = gen_reg_rtx (SImode);
17747 if (CONST_INT_P (align_rtx))
17748 align = INTVAL (align_rtx);
17750 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
17752 /* Is there a known alignment and is it less than 4? */
17755 rtx scratch1 = gen_reg_rtx (Pmode);
17756 emit_move_insn (scratch1, out);
17757 /* Is there a known alignment and is it not 2? */
17760 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
17761 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
17763 /* Leave just the 3 lower bits. */
17764 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
17765 NULL_RTX, 0, OPTAB_WIDEN);
17767 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17768 Pmode, 1, align_4_label);
17769 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
17770 Pmode, 1, align_2_label);
17771 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
17772 Pmode, 1, align_3_label);
17776 /* Since the alignment is 2, we have to check 2 or 0 bytes;
17777 check if is aligned to 4 - byte. */
17779 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
17780 NULL_RTX, 0, OPTAB_WIDEN);
17782 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17783 Pmode, 1, align_4_label);
17786 mem = change_address (src, QImode, out);
17788 /* Now compare the bytes. */
17790 /* Compare the first n unaligned byte on a byte per byte basis. */
17791 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
17792 QImode, 1, end_0_label);
17794 /* Increment the address. */
17795 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17797 /* Not needed with an alignment of 2 */
17800 emit_label (align_2_label);
17802 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17805 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17807 emit_label (align_3_label);
17810 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17813 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17816 /* Generate loop to check 4 bytes at a time. It is not a good idea to
17817 align this loop. It gives only huge programs, but does not help to
17819 emit_label (align_4_label);
17821 mem = change_address (src, SImode, out);
17822 emit_move_insn (scratch, mem);
17823 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
17825 /* This formula yields a nonzero result iff one of the bytes is zero.
17826 This saves three branches inside loop and many cycles. */
17828 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
17829 emit_insn (gen_one_cmplsi2 (scratch, scratch));
17830 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
17831 emit_insn (gen_andsi3 (tmpreg, tmpreg,
17832 gen_int_mode (0x80808080, SImode)));
17833 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
17838 rtx reg = gen_reg_rtx (SImode);
17839 rtx reg2 = gen_reg_rtx (Pmode);
17840 emit_move_insn (reg, tmpreg);
17841 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
17843 /* If zero is not in the first two bytes, move two bytes forward. */
17844 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17845 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17846 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17847 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
17848 gen_rtx_IF_THEN_ELSE (SImode, tmp,
17851 /* Emit lea manually to avoid clobbering of flags. */
17852 emit_insn (gen_rtx_SET (SImode, reg2,
17853 gen_rtx_PLUS (Pmode, out, const2_rtx)));
17855 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17856 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17857 emit_insn (gen_rtx_SET (VOIDmode, out,
17858 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
17865 rtx end_2_label = gen_label_rtx ();
17866 /* Is zero in the first two bytes? */
17868 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17869 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17870 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
17871 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17872 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
17874 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17875 JUMP_LABEL (tmp) = end_2_label;
17877 /* Not in the first two. Move two bytes forward. */
17878 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
17879 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
17881 emit_label (end_2_label);
17885 /* Avoid branch in fixing the byte. */
17886 tmpreg = gen_lowpart (QImode, tmpreg);
17887 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
17888 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
17889 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
17891 emit_label (end_0_label);
17894 /* Expand strlen. */
17897 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
17899 rtx addr, scratch1, scratch2, scratch3, scratch4;
17901 /* The generic case of strlen expander is long. Avoid it's
17902 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
17904 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17905 && !TARGET_INLINE_ALL_STRINGOPS
17906 && !optimize_insn_for_size_p ()
17907 && (!CONST_INT_P (align) || INTVAL (align) < 4))
17910 addr = force_reg (Pmode, XEXP (src, 0));
17911 scratch1 = gen_reg_rtx (Pmode);
17913 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17914 && !optimize_insn_for_size_p ())
17916 /* Well it seems that some optimizer does not combine a call like
17917 foo(strlen(bar), strlen(bar));
17918 when the move and the subtraction is done here. It does calculate
17919 the length just once when these instructions are done inside of
17920 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
17921 often used and I use one fewer register for the lifetime of
17922 output_strlen_unroll() this is better. */
17924 emit_move_insn (out, addr);
17926 ix86_expand_strlensi_unroll_1 (out, src, align);
17928 /* strlensi_unroll_1 returns the address of the zero at the end of
17929 the string, like memchr(), so compute the length by subtracting
17930 the start address. */
17931 emit_insn ((*ix86_gen_sub3) (out, out, addr));
17937 /* Can't use this if the user has appropriated eax, ecx, or edi. */
17938 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
17941 scratch2 = gen_reg_rtx (Pmode);
17942 scratch3 = gen_reg_rtx (Pmode);
17943 scratch4 = force_reg (Pmode, constm1_rtx);
17945 emit_move_insn (scratch3, addr);
17946 eoschar = force_reg (QImode, eoschar);
17948 src = replace_equiv_address_nv (src, scratch3);
17950 /* If .md starts supporting :P, this can be done in .md. */
17951 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
17952 scratch4), UNSPEC_SCAS);
17953 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
17954 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
17955 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
17960 /* For given symbol (function) construct code to compute address of it's PLT
17961 entry in large x86-64 PIC model. */
17963 construct_plt_address (rtx symbol)
17965 rtx tmp = gen_reg_rtx (Pmode);
17966 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
17968 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
17969 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
17971 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
17972 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
17977 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
17978 rtx callarg2 ATTRIBUTE_UNUSED,
17979 rtx pop, int sibcall)
17981 rtx use = NULL, call;
17983 if (pop == const0_rtx)
17985 gcc_assert (!TARGET_64BIT || !pop);
17987 if (TARGET_MACHO && !TARGET_64BIT)
17990 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
17991 fnaddr = machopic_indirect_call_target (fnaddr);
17996 /* Static functions and indirect calls don't need the pic register. */
17997 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
17998 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
17999 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18000 use_reg (&use, pic_offset_table_rtx);
18003 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18005 rtx al = gen_rtx_REG (QImode, AX_REG);
18006 emit_move_insn (al, callarg2);
18007 use_reg (&use, al);
18010 if (ix86_cmodel == CM_LARGE_PIC
18011 && GET_CODE (fnaddr) == MEM
18012 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18013 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18014 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18015 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
18017 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18018 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18020 if (sibcall && TARGET_64BIT
18021 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
18024 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18025 fnaddr = gen_rtx_REG (Pmode, R11_REG);
18026 emit_move_insn (fnaddr, addr);
18027 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18030 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18032 call = gen_rtx_SET (VOIDmode, retval, call);
18035 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18036 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18037 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18040 call = emit_call_insn (call);
18042 CALL_INSN_FUNCTION_USAGE (call) = use;
18046 /* Clear stack slot assignments remembered from previous functions.
18047 This is called from INIT_EXPANDERS once before RTL is emitted for each
18050 static struct machine_function *
18051 ix86_init_machine_status (void)
18053 struct machine_function *f;
18055 f = GGC_CNEW (struct machine_function);
18056 f->use_fast_prologue_epilogue_nregs = -1;
18057 f->tls_descriptor_call_expanded_p = 0;
18058 f->call_abi = DEFAULT_ABI;
18063 /* Return a MEM corresponding to a stack slot with mode MODE.
18064 Allocate a new slot if necessary.
18066 The RTL for a function can have several slots available: N is
18067 which slot to use. */
18070 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18072 struct stack_local_entry *s;
18074 gcc_assert (n < MAX_386_STACK_LOCALS);
18076 /* Virtual slot is valid only before vregs are instantiated. */
18077 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18079 for (s = ix86_stack_locals; s; s = s->next)
18080 if (s->mode == mode && s->n == n)
18081 return copy_rtx (s->rtl);
18083 s = (struct stack_local_entry *)
18084 ggc_alloc (sizeof (struct stack_local_entry));
18087 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18089 s->next = ix86_stack_locals;
18090 ix86_stack_locals = s;
18094 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18096 static GTY(()) rtx ix86_tls_symbol;
18098 ix86_tls_get_addr (void)
18101 if (!ix86_tls_symbol)
18103 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18104 (TARGET_ANY_GNU_TLS
18106 ? "___tls_get_addr"
18107 : "__tls_get_addr");
18110 return ix86_tls_symbol;
18113 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
18115 static GTY(()) rtx ix86_tls_module_base_symbol;
18117 ix86_tls_module_base (void)
18120 if (!ix86_tls_module_base_symbol)
18122 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
18123 "_TLS_MODULE_BASE_");
18124 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
18125 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
18128 return ix86_tls_module_base_symbol;
18131 /* Calculate the length of the memory address in the instruction
18132 encoding. Does not include the one-byte modrm, opcode, or prefix. */
18135 memory_address_length (rtx addr)
18137 struct ix86_address parts;
18138 rtx base, index, disp;
18142 if (GET_CODE (addr) == PRE_DEC
18143 || GET_CODE (addr) == POST_INC
18144 || GET_CODE (addr) == PRE_MODIFY
18145 || GET_CODE (addr) == POST_MODIFY)
18148 ok = ix86_decompose_address (addr, &parts);
18151 if (parts.base && GET_CODE (parts.base) == SUBREG)
18152 parts.base = SUBREG_REG (parts.base);
18153 if (parts.index && GET_CODE (parts.index) == SUBREG)
18154 parts.index = SUBREG_REG (parts.index);
18157 index = parts.index;
18162 - esp as the base always wants an index,
18163 - ebp as the base always wants a displacement. */
18165 /* Register Indirect. */
18166 if (base && !index && !disp)
18168 /* esp (for its index) and ebp (for its displacement) need
18169 the two-byte modrm form. */
18170 if (addr == stack_pointer_rtx
18171 || addr == arg_pointer_rtx
18172 || addr == frame_pointer_rtx
18173 || addr == hard_frame_pointer_rtx)
18177 /* Direct Addressing. */
18178 else if (disp && !base && !index)
18183 /* Find the length of the displacement constant. */
18186 if (base && satisfies_constraint_K (disp))
18191 /* ebp always wants a displacement. */
18192 else if (base == hard_frame_pointer_rtx)
18195 /* An index requires the two-byte modrm form.... */
18197 /* ...like esp, which always wants an index. */
18198 || base == stack_pointer_rtx
18199 || base == arg_pointer_rtx
18200 || base == frame_pointer_rtx)
18207 /* Compute default value for "length_immediate" attribute. When SHORTFORM
18208 is set, expect that insn have 8bit immediate alternative. */
18210 ix86_attr_length_immediate_default (rtx insn, int shortform)
18214 extract_insn_cached (insn);
18215 for (i = recog_data.n_operands - 1; i >= 0; --i)
18216 if (CONSTANT_P (recog_data.operand[i]))
18219 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
18223 switch (get_attr_mode (insn))
18234 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
18239 fatal_insn ("unknown insn mode", insn);
18245 /* Compute default value for "length_address" attribute. */
18247 ix86_attr_length_address_default (rtx insn)
18251 if (get_attr_type (insn) == TYPE_LEA)
18253 rtx set = PATTERN (insn);
18255 if (GET_CODE (set) == PARALLEL)
18256 set = XVECEXP (set, 0, 0);
18258 gcc_assert (GET_CODE (set) == SET);
18260 return memory_address_length (SET_SRC (set));
18263 extract_insn_cached (insn);
18264 for (i = recog_data.n_operands - 1; i >= 0; --i)
18265 if (MEM_P (recog_data.operand[i]))
18267 return memory_address_length (XEXP (recog_data.operand[i], 0));
18273 /* Compute default value for "length_vex" attribute. It includes
18274 2 or 3 byte VEX prefix and 1 opcode byte. */
18277 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
18282 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18283 byte VEX prefix. */
18284 if (!has_0f_opcode || has_vex_w)
18287 /* We can always use 2 byte VEX prefix in 32bit. */
18291 extract_insn_cached (insn);
18293 for (i = recog_data.n_operands - 1; i >= 0; --i)
18294 if (REG_P (recog_data.operand[i]))
18296 /* REX.W bit uses 3 byte VEX prefix. */
18297 if (GET_MODE (recog_data.operand[i]) == DImode)
18302 /* REX.X or REX.B bits use 3 byte VEX prefix. */
18303 if (MEM_P (recog_data.operand[i])
18304 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
18311 /* Return the maximum number of instructions a cpu can issue. */
18314 ix86_issue_rate (void)
18318 case PROCESSOR_PENTIUM:
18322 case PROCESSOR_PENTIUMPRO:
18323 case PROCESSOR_PENTIUM4:
18324 case PROCESSOR_ATHLON:
18326 case PROCESSOR_AMDFAM10:
18327 case PROCESSOR_NOCONA:
18328 case PROCESSOR_GENERIC32:
18329 case PROCESSOR_GENERIC64:
18332 case PROCESSOR_CORE2:
18340 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
18341 by DEP_INSN and nothing set by DEP_INSN. */
18344 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18348 /* Simplify the test for uninteresting insns. */
18349 if (insn_type != TYPE_SETCC
18350 && insn_type != TYPE_ICMOV
18351 && insn_type != TYPE_FCMOV
18352 && insn_type != TYPE_IBR)
18355 if ((set = single_set (dep_insn)) != 0)
18357 set = SET_DEST (set);
18360 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
18361 && XVECLEN (PATTERN (dep_insn), 0) == 2
18362 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
18363 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
18365 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18366 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18371 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
18374 /* This test is true if the dependent insn reads the flags but
18375 not any other potentially set register. */
18376 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
18379 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
18385 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
18386 address with operands set by DEP_INSN. */
18389 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18393 if (insn_type == TYPE_LEA
18396 addr = PATTERN (insn);
18398 if (GET_CODE (addr) == PARALLEL)
18399 addr = XVECEXP (addr, 0, 0);
18401 gcc_assert (GET_CODE (addr) == SET);
18403 addr = SET_SRC (addr);
18408 extract_insn_cached (insn);
18409 for (i = recog_data.n_operands - 1; i >= 0; --i)
18410 if (MEM_P (recog_data.operand[i]))
18412 addr = XEXP (recog_data.operand[i], 0);
18419 return modified_in_p (addr, dep_insn);
18423 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
18425 enum attr_type insn_type, dep_insn_type;
18426 enum attr_memory memory;
18428 int dep_insn_code_number;
18430 /* Anti and output dependencies have zero cost on all CPUs. */
18431 if (REG_NOTE_KIND (link) != 0)
18434 dep_insn_code_number = recog_memoized (dep_insn);
18436 /* If we can't recognize the insns, we can't really do anything. */
18437 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
18440 insn_type = get_attr_type (insn);
18441 dep_insn_type = get_attr_type (dep_insn);
18445 case PROCESSOR_PENTIUM:
18446 /* Address Generation Interlock adds a cycle of latency. */
18447 if (ix86_agi_dependent (insn, dep_insn, insn_type))
18450 /* ??? Compares pair with jump/setcc. */
18451 if (ix86_flags_dependent (insn, dep_insn, insn_type))
18454 /* Floating point stores require value to be ready one cycle earlier. */
18455 if (insn_type == TYPE_FMOV
18456 && get_attr_memory (insn) == MEMORY_STORE
18457 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18461 case PROCESSOR_PENTIUMPRO:
18462 memory = get_attr_memory (insn);
18464 /* INT->FP conversion is expensive. */
18465 if (get_attr_fp_int_src (dep_insn))
18468 /* There is one cycle extra latency between an FP op and a store. */
18469 if (insn_type == TYPE_FMOV
18470 && (set = single_set (dep_insn)) != NULL_RTX
18471 && (set2 = single_set (insn)) != NULL_RTX
18472 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
18473 && MEM_P (SET_DEST (set2)))
18476 /* Show ability of reorder buffer to hide latency of load by executing
18477 in parallel with previous instruction in case
18478 previous instruction is not needed to compute the address. */
18479 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18480 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18482 /* Claim moves to take one cycle, as core can issue one load
18483 at time and the next load can start cycle later. */
18484 if (dep_insn_type == TYPE_IMOV
18485 || dep_insn_type == TYPE_FMOV)
18493 memory = get_attr_memory (insn);
18495 /* The esp dependency is resolved before the instruction is really
18497 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
18498 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
18501 /* INT->FP conversion is expensive. */
18502 if (get_attr_fp_int_src (dep_insn))
18505 /* Show ability of reorder buffer to hide latency of load by executing
18506 in parallel with previous instruction in case
18507 previous instruction is not needed to compute the address. */
18508 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18509 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18511 /* Claim moves to take one cycle, as core can issue one load
18512 at time and the next load can start cycle later. */
18513 if (dep_insn_type == TYPE_IMOV
18514 || dep_insn_type == TYPE_FMOV)
18523 case PROCESSOR_ATHLON:
18525 case PROCESSOR_AMDFAM10:
18526 case PROCESSOR_GENERIC32:
18527 case PROCESSOR_GENERIC64:
18528 memory = get_attr_memory (insn);
18530 /* Show ability of reorder buffer to hide latency of load by executing
18531 in parallel with previous instruction in case
18532 previous instruction is not needed to compute the address. */
18533 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18534 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18536 enum attr_unit unit = get_attr_unit (insn);
18539 /* Because of the difference between the length of integer and
18540 floating unit pipeline preparation stages, the memory operands
18541 for floating point are cheaper.
18543 ??? For Athlon it the difference is most probably 2. */
18544 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
18547 loadcost = TARGET_ATHLON ? 2 : 0;
18549 if (cost >= loadcost)
18562 /* How many alternative schedules to try. This should be as wide as the
18563 scheduling freedom in the DFA, but no wider. Making this value too
18564 large results extra work for the scheduler. */
18567 ia32_multipass_dfa_lookahead (void)
18571 case PROCESSOR_PENTIUM:
18574 case PROCESSOR_PENTIUMPRO:
18584 /* Compute the alignment given to a constant that is being placed in memory.
18585 EXP is the constant and ALIGN is the alignment that the object would
18587 The value of this function is used instead of that alignment to align
18591 ix86_constant_alignment (tree exp, int align)
18593 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18594 || TREE_CODE (exp) == INTEGER_CST)
18596 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
18598 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
18601 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18602 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18603 return BITS_PER_WORD;
18608 /* Compute the alignment for a static variable.
18609 TYPE is the data type, and ALIGN is the alignment that
18610 the object would ordinarily have. The value of this function is used
18611 instead of that alignment to align the object. */
18614 ix86_data_alignment (tree type, int align)
18616 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
18618 if (AGGREGATE_TYPE_P (type)
18619 && TYPE_SIZE (type)
18620 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18621 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
18622 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
18623 && align < max_align)
18626 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18627 to 16byte boundary. */
18630 if (AGGREGATE_TYPE_P (type)
18631 && TYPE_SIZE (type)
18632 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18633 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
18634 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18638 if (TREE_CODE (type) == ARRAY_TYPE)
18640 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18642 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18645 else if (TREE_CODE (type) == COMPLEX_TYPE)
18648 if (TYPE_MODE (type) == DCmode && align < 64)
18650 if ((TYPE_MODE (type) == XCmode
18651 || TYPE_MODE (type) == TCmode) && align < 128)
18654 else if ((TREE_CODE (type) == RECORD_TYPE
18655 || TREE_CODE (type) == UNION_TYPE
18656 || TREE_CODE (type) == QUAL_UNION_TYPE)
18657 && TYPE_FIELDS (type))
18659 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18661 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18664 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18665 || TREE_CODE (type) == INTEGER_TYPE)
18667 if (TYPE_MODE (type) == DFmode && align < 64)
18669 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18676 /* Compute the alignment for a local variable or a stack slot. TYPE is
18677 the data type, MODE is the widest mode available and ALIGN is the
18678 alignment that the object would ordinarily have. The value of this
18679 macro is used instead of that alignment to align the object. */
18682 ix86_local_alignment (tree type, enum machine_mode mode,
18683 unsigned int align)
18685 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18686 register in MODE. We will return the largest alignment of XF
18690 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18691 align = GET_MODE_ALIGNMENT (DFmode);
18695 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18696 to 16byte boundary. */
18699 if (AGGREGATE_TYPE_P (type)
18700 && TYPE_SIZE (type)
18701 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18702 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
18703 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18706 if (TREE_CODE (type) == ARRAY_TYPE)
18708 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18710 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18713 else if (TREE_CODE (type) == COMPLEX_TYPE)
18715 if (TYPE_MODE (type) == DCmode && align < 64)
18717 if ((TYPE_MODE (type) == XCmode
18718 || TYPE_MODE (type) == TCmode) && align < 128)
18721 else if ((TREE_CODE (type) == RECORD_TYPE
18722 || TREE_CODE (type) == UNION_TYPE
18723 || TREE_CODE (type) == QUAL_UNION_TYPE)
18724 && TYPE_FIELDS (type))
18726 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18728 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18731 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18732 || TREE_CODE (type) == INTEGER_TYPE)
18735 if (TYPE_MODE (type) == DFmode && align < 64)
18737 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18743 /* Emit RTL insns to initialize the variable parts of a trampoline.
18744 FNADDR is an RTX for the address of the function's pure code.
18745 CXT is an RTX for the static chain value for the function. */
18747 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
18751 /* Compute offset from the end of the jmp to the target function. */
18752 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
18753 plus_constant (tramp, 10),
18754 NULL_RTX, 1, OPTAB_DIRECT);
18755 emit_move_insn (gen_rtx_MEM (QImode, tramp),
18756 gen_int_mode (0xb9, QImode));
18757 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
18758 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
18759 gen_int_mode (0xe9, QImode));
18760 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
18765 /* Try to load address using shorter movl instead of movabs.
18766 We may want to support movq for kernel mode, but kernel does not use
18767 trampolines at the moment. */
18768 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18770 fnaddr = copy_to_mode_reg (DImode, fnaddr);
18771 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18772 gen_int_mode (0xbb41, HImode));
18773 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
18774 gen_lowpart (SImode, fnaddr));
18779 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18780 gen_int_mode (0xbb49, HImode));
18781 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18785 /* Load static chain using movabs to r10. */
18786 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18787 gen_int_mode (0xba49, HImode));
18788 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18791 /* Jump to the r11 */
18792 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18793 gen_int_mode (0xff49, HImode));
18794 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
18795 gen_int_mode (0xe3, QImode));
18797 gcc_assert (offset <= TRAMPOLINE_SIZE);
18800 #ifdef ENABLE_EXECUTE_STACK
18801 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18802 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
18806 /* Codes for all the SSE/MMX builtins. */
18809 IX86_BUILTIN_ADDPS,
18810 IX86_BUILTIN_ADDSS,
18811 IX86_BUILTIN_DIVPS,
18812 IX86_BUILTIN_DIVSS,
18813 IX86_BUILTIN_MULPS,
18814 IX86_BUILTIN_MULSS,
18815 IX86_BUILTIN_SUBPS,
18816 IX86_BUILTIN_SUBSS,
18818 IX86_BUILTIN_CMPEQPS,
18819 IX86_BUILTIN_CMPLTPS,
18820 IX86_BUILTIN_CMPLEPS,
18821 IX86_BUILTIN_CMPGTPS,
18822 IX86_BUILTIN_CMPGEPS,
18823 IX86_BUILTIN_CMPNEQPS,
18824 IX86_BUILTIN_CMPNLTPS,
18825 IX86_BUILTIN_CMPNLEPS,
18826 IX86_BUILTIN_CMPNGTPS,
18827 IX86_BUILTIN_CMPNGEPS,
18828 IX86_BUILTIN_CMPORDPS,
18829 IX86_BUILTIN_CMPUNORDPS,
18830 IX86_BUILTIN_CMPEQSS,
18831 IX86_BUILTIN_CMPLTSS,
18832 IX86_BUILTIN_CMPLESS,
18833 IX86_BUILTIN_CMPNEQSS,
18834 IX86_BUILTIN_CMPNLTSS,
18835 IX86_BUILTIN_CMPNLESS,
18836 IX86_BUILTIN_CMPNGTSS,
18837 IX86_BUILTIN_CMPNGESS,
18838 IX86_BUILTIN_CMPORDSS,
18839 IX86_BUILTIN_CMPUNORDSS,
18841 IX86_BUILTIN_COMIEQSS,
18842 IX86_BUILTIN_COMILTSS,
18843 IX86_BUILTIN_COMILESS,
18844 IX86_BUILTIN_COMIGTSS,
18845 IX86_BUILTIN_COMIGESS,
18846 IX86_BUILTIN_COMINEQSS,
18847 IX86_BUILTIN_UCOMIEQSS,
18848 IX86_BUILTIN_UCOMILTSS,
18849 IX86_BUILTIN_UCOMILESS,
18850 IX86_BUILTIN_UCOMIGTSS,
18851 IX86_BUILTIN_UCOMIGESS,
18852 IX86_BUILTIN_UCOMINEQSS,
18854 IX86_BUILTIN_CVTPI2PS,
18855 IX86_BUILTIN_CVTPS2PI,
18856 IX86_BUILTIN_CVTSI2SS,
18857 IX86_BUILTIN_CVTSI642SS,
18858 IX86_BUILTIN_CVTSS2SI,
18859 IX86_BUILTIN_CVTSS2SI64,
18860 IX86_BUILTIN_CVTTPS2PI,
18861 IX86_BUILTIN_CVTTSS2SI,
18862 IX86_BUILTIN_CVTTSS2SI64,
18864 IX86_BUILTIN_MAXPS,
18865 IX86_BUILTIN_MAXSS,
18866 IX86_BUILTIN_MINPS,
18867 IX86_BUILTIN_MINSS,
18869 IX86_BUILTIN_LOADUPS,
18870 IX86_BUILTIN_STOREUPS,
18871 IX86_BUILTIN_MOVSS,
18873 IX86_BUILTIN_MOVHLPS,
18874 IX86_BUILTIN_MOVLHPS,
18875 IX86_BUILTIN_LOADHPS,
18876 IX86_BUILTIN_LOADLPS,
18877 IX86_BUILTIN_STOREHPS,
18878 IX86_BUILTIN_STORELPS,
18880 IX86_BUILTIN_MASKMOVQ,
18881 IX86_BUILTIN_MOVMSKPS,
18882 IX86_BUILTIN_PMOVMSKB,
18884 IX86_BUILTIN_MOVNTPS,
18885 IX86_BUILTIN_MOVNTQ,
18887 IX86_BUILTIN_LOADDQU,
18888 IX86_BUILTIN_STOREDQU,
18890 IX86_BUILTIN_PACKSSWB,
18891 IX86_BUILTIN_PACKSSDW,
18892 IX86_BUILTIN_PACKUSWB,
18894 IX86_BUILTIN_PADDB,
18895 IX86_BUILTIN_PADDW,
18896 IX86_BUILTIN_PADDD,
18897 IX86_BUILTIN_PADDQ,
18898 IX86_BUILTIN_PADDSB,
18899 IX86_BUILTIN_PADDSW,
18900 IX86_BUILTIN_PADDUSB,
18901 IX86_BUILTIN_PADDUSW,
18902 IX86_BUILTIN_PSUBB,
18903 IX86_BUILTIN_PSUBW,
18904 IX86_BUILTIN_PSUBD,
18905 IX86_BUILTIN_PSUBQ,
18906 IX86_BUILTIN_PSUBSB,
18907 IX86_BUILTIN_PSUBSW,
18908 IX86_BUILTIN_PSUBUSB,
18909 IX86_BUILTIN_PSUBUSW,
18912 IX86_BUILTIN_PANDN,
18916 IX86_BUILTIN_PAVGB,
18917 IX86_BUILTIN_PAVGW,
18919 IX86_BUILTIN_PCMPEQB,
18920 IX86_BUILTIN_PCMPEQW,
18921 IX86_BUILTIN_PCMPEQD,
18922 IX86_BUILTIN_PCMPGTB,
18923 IX86_BUILTIN_PCMPGTW,
18924 IX86_BUILTIN_PCMPGTD,
18926 IX86_BUILTIN_PMADDWD,
18928 IX86_BUILTIN_PMAXSW,
18929 IX86_BUILTIN_PMAXUB,
18930 IX86_BUILTIN_PMINSW,
18931 IX86_BUILTIN_PMINUB,
18933 IX86_BUILTIN_PMULHUW,
18934 IX86_BUILTIN_PMULHW,
18935 IX86_BUILTIN_PMULLW,
18937 IX86_BUILTIN_PSADBW,
18938 IX86_BUILTIN_PSHUFW,
18940 IX86_BUILTIN_PSLLW,
18941 IX86_BUILTIN_PSLLD,
18942 IX86_BUILTIN_PSLLQ,
18943 IX86_BUILTIN_PSRAW,
18944 IX86_BUILTIN_PSRAD,
18945 IX86_BUILTIN_PSRLW,
18946 IX86_BUILTIN_PSRLD,
18947 IX86_BUILTIN_PSRLQ,
18948 IX86_BUILTIN_PSLLWI,
18949 IX86_BUILTIN_PSLLDI,
18950 IX86_BUILTIN_PSLLQI,
18951 IX86_BUILTIN_PSRAWI,
18952 IX86_BUILTIN_PSRADI,
18953 IX86_BUILTIN_PSRLWI,
18954 IX86_BUILTIN_PSRLDI,
18955 IX86_BUILTIN_PSRLQI,
18957 IX86_BUILTIN_PUNPCKHBW,
18958 IX86_BUILTIN_PUNPCKHWD,
18959 IX86_BUILTIN_PUNPCKHDQ,
18960 IX86_BUILTIN_PUNPCKLBW,
18961 IX86_BUILTIN_PUNPCKLWD,
18962 IX86_BUILTIN_PUNPCKLDQ,
18964 IX86_BUILTIN_SHUFPS,
18966 IX86_BUILTIN_RCPPS,
18967 IX86_BUILTIN_RCPSS,
18968 IX86_BUILTIN_RSQRTPS,
18969 IX86_BUILTIN_RSQRTPS_NR,
18970 IX86_BUILTIN_RSQRTSS,
18971 IX86_BUILTIN_RSQRTF,
18972 IX86_BUILTIN_SQRTPS,
18973 IX86_BUILTIN_SQRTPS_NR,
18974 IX86_BUILTIN_SQRTSS,
18976 IX86_BUILTIN_UNPCKHPS,
18977 IX86_BUILTIN_UNPCKLPS,
18979 IX86_BUILTIN_ANDPS,
18980 IX86_BUILTIN_ANDNPS,
18982 IX86_BUILTIN_XORPS,
18985 IX86_BUILTIN_LDMXCSR,
18986 IX86_BUILTIN_STMXCSR,
18987 IX86_BUILTIN_SFENCE,
18989 /* 3DNow! Original */
18990 IX86_BUILTIN_FEMMS,
18991 IX86_BUILTIN_PAVGUSB,
18992 IX86_BUILTIN_PF2ID,
18993 IX86_BUILTIN_PFACC,
18994 IX86_BUILTIN_PFADD,
18995 IX86_BUILTIN_PFCMPEQ,
18996 IX86_BUILTIN_PFCMPGE,
18997 IX86_BUILTIN_PFCMPGT,
18998 IX86_BUILTIN_PFMAX,
18999 IX86_BUILTIN_PFMIN,
19000 IX86_BUILTIN_PFMUL,
19001 IX86_BUILTIN_PFRCP,
19002 IX86_BUILTIN_PFRCPIT1,
19003 IX86_BUILTIN_PFRCPIT2,
19004 IX86_BUILTIN_PFRSQIT1,
19005 IX86_BUILTIN_PFRSQRT,
19006 IX86_BUILTIN_PFSUB,
19007 IX86_BUILTIN_PFSUBR,
19008 IX86_BUILTIN_PI2FD,
19009 IX86_BUILTIN_PMULHRW,
19011 /* 3DNow! Athlon Extensions */
19012 IX86_BUILTIN_PF2IW,
19013 IX86_BUILTIN_PFNACC,
19014 IX86_BUILTIN_PFPNACC,
19015 IX86_BUILTIN_PI2FW,
19016 IX86_BUILTIN_PSWAPDSI,
19017 IX86_BUILTIN_PSWAPDSF,
19020 IX86_BUILTIN_ADDPD,
19021 IX86_BUILTIN_ADDSD,
19022 IX86_BUILTIN_DIVPD,
19023 IX86_BUILTIN_DIVSD,
19024 IX86_BUILTIN_MULPD,
19025 IX86_BUILTIN_MULSD,
19026 IX86_BUILTIN_SUBPD,
19027 IX86_BUILTIN_SUBSD,
19029 IX86_BUILTIN_CMPEQPD,
19030 IX86_BUILTIN_CMPLTPD,
19031 IX86_BUILTIN_CMPLEPD,
19032 IX86_BUILTIN_CMPGTPD,
19033 IX86_BUILTIN_CMPGEPD,
19034 IX86_BUILTIN_CMPNEQPD,
19035 IX86_BUILTIN_CMPNLTPD,
19036 IX86_BUILTIN_CMPNLEPD,
19037 IX86_BUILTIN_CMPNGTPD,
19038 IX86_BUILTIN_CMPNGEPD,
19039 IX86_BUILTIN_CMPORDPD,
19040 IX86_BUILTIN_CMPUNORDPD,
19041 IX86_BUILTIN_CMPEQSD,
19042 IX86_BUILTIN_CMPLTSD,
19043 IX86_BUILTIN_CMPLESD,
19044 IX86_BUILTIN_CMPNEQSD,
19045 IX86_BUILTIN_CMPNLTSD,
19046 IX86_BUILTIN_CMPNLESD,
19047 IX86_BUILTIN_CMPORDSD,
19048 IX86_BUILTIN_CMPUNORDSD,
19050 IX86_BUILTIN_COMIEQSD,
19051 IX86_BUILTIN_COMILTSD,
19052 IX86_BUILTIN_COMILESD,
19053 IX86_BUILTIN_COMIGTSD,
19054 IX86_BUILTIN_COMIGESD,
19055 IX86_BUILTIN_COMINEQSD,
19056 IX86_BUILTIN_UCOMIEQSD,
19057 IX86_BUILTIN_UCOMILTSD,
19058 IX86_BUILTIN_UCOMILESD,
19059 IX86_BUILTIN_UCOMIGTSD,
19060 IX86_BUILTIN_UCOMIGESD,
19061 IX86_BUILTIN_UCOMINEQSD,
19063 IX86_BUILTIN_MAXPD,
19064 IX86_BUILTIN_MAXSD,
19065 IX86_BUILTIN_MINPD,
19066 IX86_BUILTIN_MINSD,
19068 IX86_BUILTIN_ANDPD,
19069 IX86_BUILTIN_ANDNPD,
19071 IX86_BUILTIN_XORPD,
19073 IX86_BUILTIN_SQRTPD,
19074 IX86_BUILTIN_SQRTSD,
19076 IX86_BUILTIN_UNPCKHPD,
19077 IX86_BUILTIN_UNPCKLPD,
19079 IX86_BUILTIN_SHUFPD,
19081 IX86_BUILTIN_LOADUPD,
19082 IX86_BUILTIN_STOREUPD,
19083 IX86_BUILTIN_MOVSD,
19085 IX86_BUILTIN_LOADHPD,
19086 IX86_BUILTIN_LOADLPD,
19088 IX86_BUILTIN_CVTDQ2PD,
19089 IX86_BUILTIN_CVTDQ2PS,
19091 IX86_BUILTIN_CVTPD2DQ,
19092 IX86_BUILTIN_CVTPD2PI,
19093 IX86_BUILTIN_CVTPD2PS,
19094 IX86_BUILTIN_CVTTPD2DQ,
19095 IX86_BUILTIN_CVTTPD2PI,
19097 IX86_BUILTIN_CVTPI2PD,
19098 IX86_BUILTIN_CVTSI2SD,
19099 IX86_BUILTIN_CVTSI642SD,
19101 IX86_BUILTIN_CVTSD2SI,
19102 IX86_BUILTIN_CVTSD2SI64,
19103 IX86_BUILTIN_CVTSD2SS,
19104 IX86_BUILTIN_CVTSS2SD,
19105 IX86_BUILTIN_CVTTSD2SI,
19106 IX86_BUILTIN_CVTTSD2SI64,
19108 IX86_BUILTIN_CVTPS2DQ,
19109 IX86_BUILTIN_CVTPS2PD,
19110 IX86_BUILTIN_CVTTPS2DQ,
19112 IX86_BUILTIN_MOVNTI,
19113 IX86_BUILTIN_MOVNTPD,
19114 IX86_BUILTIN_MOVNTDQ,
19116 IX86_BUILTIN_MOVQ128,
19119 IX86_BUILTIN_MASKMOVDQU,
19120 IX86_BUILTIN_MOVMSKPD,
19121 IX86_BUILTIN_PMOVMSKB128,
19123 IX86_BUILTIN_PACKSSWB128,
19124 IX86_BUILTIN_PACKSSDW128,
19125 IX86_BUILTIN_PACKUSWB128,
19127 IX86_BUILTIN_PADDB128,
19128 IX86_BUILTIN_PADDW128,
19129 IX86_BUILTIN_PADDD128,
19130 IX86_BUILTIN_PADDQ128,
19131 IX86_BUILTIN_PADDSB128,
19132 IX86_BUILTIN_PADDSW128,
19133 IX86_BUILTIN_PADDUSB128,
19134 IX86_BUILTIN_PADDUSW128,
19135 IX86_BUILTIN_PSUBB128,
19136 IX86_BUILTIN_PSUBW128,
19137 IX86_BUILTIN_PSUBD128,
19138 IX86_BUILTIN_PSUBQ128,
19139 IX86_BUILTIN_PSUBSB128,
19140 IX86_BUILTIN_PSUBSW128,
19141 IX86_BUILTIN_PSUBUSB128,
19142 IX86_BUILTIN_PSUBUSW128,
19144 IX86_BUILTIN_PAND128,
19145 IX86_BUILTIN_PANDN128,
19146 IX86_BUILTIN_POR128,
19147 IX86_BUILTIN_PXOR128,
19149 IX86_BUILTIN_PAVGB128,
19150 IX86_BUILTIN_PAVGW128,
19152 IX86_BUILTIN_PCMPEQB128,
19153 IX86_BUILTIN_PCMPEQW128,
19154 IX86_BUILTIN_PCMPEQD128,
19155 IX86_BUILTIN_PCMPGTB128,
19156 IX86_BUILTIN_PCMPGTW128,
19157 IX86_BUILTIN_PCMPGTD128,
19159 IX86_BUILTIN_PMADDWD128,
19161 IX86_BUILTIN_PMAXSW128,
19162 IX86_BUILTIN_PMAXUB128,
19163 IX86_BUILTIN_PMINSW128,
19164 IX86_BUILTIN_PMINUB128,
19166 IX86_BUILTIN_PMULUDQ,
19167 IX86_BUILTIN_PMULUDQ128,
19168 IX86_BUILTIN_PMULHUW128,
19169 IX86_BUILTIN_PMULHW128,
19170 IX86_BUILTIN_PMULLW128,
19172 IX86_BUILTIN_PSADBW128,
19173 IX86_BUILTIN_PSHUFHW,
19174 IX86_BUILTIN_PSHUFLW,
19175 IX86_BUILTIN_PSHUFD,
19177 IX86_BUILTIN_PSLLDQI128,
19178 IX86_BUILTIN_PSLLWI128,
19179 IX86_BUILTIN_PSLLDI128,
19180 IX86_BUILTIN_PSLLQI128,
19181 IX86_BUILTIN_PSRAWI128,
19182 IX86_BUILTIN_PSRADI128,
19183 IX86_BUILTIN_PSRLDQI128,
19184 IX86_BUILTIN_PSRLWI128,
19185 IX86_BUILTIN_PSRLDI128,
19186 IX86_BUILTIN_PSRLQI128,
19188 IX86_BUILTIN_PSLLDQ128,
19189 IX86_BUILTIN_PSLLW128,
19190 IX86_BUILTIN_PSLLD128,
19191 IX86_BUILTIN_PSLLQ128,
19192 IX86_BUILTIN_PSRAW128,
19193 IX86_BUILTIN_PSRAD128,
19194 IX86_BUILTIN_PSRLW128,
19195 IX86_BUILTIN_PSRLD128,
19196 IX86_BUILTIN_PSRLQ128,
19198 IX86_BUILTIN_PUNPCKHBW128,
19199 IX86_BUILTIN_PUNPCKHWD128,
19200 IX86_BUILTIN_PUNPCKHDQ128,
19201 IX86_BUILTIN_PUNPCKHQDQ128,
19202 IX86_BUILTIN_PUNPCKLBW128,
19203 IX86_BUILTIN_PUNPCKLWD128,
19204 IX86_BUILTIN_PUNPCKLDQ128,
19205 IX86_BUILTIN_PUNPCKLQDQ128,
19207 IX86_BUILTIN_CLFLUSH,
19208 IX86_BUILTIN_MFENCE,
19209 IX86_BUILTIN_LFENCE,
19212 IX86_BUILTIN_ADDSUBPS,
19213 IX86_BUILTIN_HADDPS,
19214 IX86_BUILTIN_HSUBPS,
19215 IX86_BUILTIN_MOVSHDUP,
19216 IX86_BUILTIN_MOVSLDUP,
19217 IX86_BUILTIN_ADDSUBPD,
19218 IX86_BUILTIN_HADDPD,
19219 IX86_BUILTIN_HSUBPD,
19220 IX86_BUILTIN_LDDQU,
19222 IX86_BUILTIN_MONITOR,
19223 IX86_BUILTIN_MWAIT,
19226 IX86_BUILTIN_PHADDW,
19227 IX86_BUILTIN_PHADDD,
19228 IX86_BUILTIN_PHADDSW,
19229 IX86_BUILTIN_PHSUBW,
19230 IX86_BUILTIN_PHSUBD,
19231 IX86_BUILTIN_PHSUBSW,
19232 IX86_BUILTIN_PMADDUBSW,
19233 IX86_BUILTIN_PMULHRSW,
19234 IX86_BUILTIN_PSHUFB,
19235 IX86_BUILTIN_PSIGNB,
19236 IX86_BUILTIN_PSIGNW,
19237 IX86_BUILTIN_PSIGND,
19238 IX86_BUILTIN_PALIGNR,
19239 IX86_BUILTIN_PABSB,
19240 IX86_BUILTIN_PABSW,
19241 IX86_BUILTIN_PABSD,
19243 IX86_BUILTIN_PHADDW128,
19244 IX86_BUILTIN_PHADDD128,
19245 IX86_BUILTIN_PHADDSW128,
19246 IX86_BUILTIN_PHSUBW128,
19247 IX86_BUILTIN_PHSUBD128,
19248 IX86_BUILTIN_PHSUBSW128,
19249 IX86_BUILTIN_PMADDUBSW128,
19250 IX86_BUILTIN_PMULHRSW128,
19251 IX86_BUILTIN_PSHUFB128,
19252 IX86_BUILTIN_PSIGNB128,
19253 IX86_BUILTIN_PSIGNW128,
19254 IX86_BUILTIN_PSIGND128,
19255 IX86_BUILTIN_PALIGNR128,
19256 IX86_BUILTIN_PABSB128,
19257 IX86_BUILTIN_PABSW128,
19258 IX86_BUILTIN_PABSD128,
19260 /* AMDFAM10 - SSE4A New Instructions. */
19261 IX86_BUILTIN_MOVNTSD,
19262 IX86_BUILTIN_MOVNTSS,
19263 IX86_BUILTIN_EXTRQI,
19264 IX86_BUILTIN_EXTRQ,
19265 IX86_BUILTIN_INSERTQI,
19266 IX86_BUILTIN_INSERTQ,
19269 IX86_BUILTIN_BLENDPD,
19270 IX86_BUILTIN_BLENDPS,
19271 IX86_BUILTIN_BLENDVPD,
19272 IX86_BUILTIN_BLENDVPS,
19273 IX86_BUILTIN_PBLENDVB128,
19274 IX86_BUILTIN_PBLENDW128,
19279 IX86_BUILTIN_INSERTPS128,
19281 IX86_BUILTIN_MOVNTDQA,
19282 IX86_BUILTIN_MPSADBW128,
19283 IX86_BUILTIN_PACKUSDW128,
19284 IX86_BUILTIN_PCMPEQQ,
19285 IX86_BUILTIN_PHMINPOSUW128,
19287 IX86_BUILTIN_PMAXSB128,
19288 IX86_BUILTIN_PMAXSD128,
19289 IX86_BUILTIN_PMAXUD128,
19290 IX86_BUILTIN_PMAXUW128,
19292 IX86_BUILTIN_PMINSB128,
19293 IX86_BUILTIN_PMINSD128,
19294 IX86_BUILTIN_PMINUD128,
19295 IX86_BUILTIN_PMINUW128,
19297 IX86_BUILTIN_PMOVSXBW128,
19298 IX86_BUILTIN_PMOVSXBD128,
19299 IX86_BUILTIN_PMOVSXBQ128,
19300 IX86_BUILTIN_PMOVSXWD128,
19301 IX86_BUILTIN_PMOVSXWQ128,
19302 IX86_BUILTIN_PMOVSXDQ128,
19304 IX86_BUILTIN_PMOVZXBW128,
19305 IX86_BUILTIN_PMOVZXBD128,
19306 IX86_BUILTIN_PMOVZXBQ128,
19307 IX86_BUILTIN_PMOVZXWD128,
19308 IX86_BUILTIN_PMOVZXWQ128,
19309 IX86_BUILTIN_PMOVZXDQ128,
19311 IX86_BUILTIN_PMULDQ128,
19312 IX86_BUILTIN_PMULLD128,
19314 IX86_BUILTIN_ROUNDPD,
19315 IX86_BUILTIN_ROUNDPS,
19316 IX86_BUILTIN_ROUNDSD,
19317 IX86_BUILTIN_ROUNDSS,
19319 IX86_BUILTIN_PTESTZ,
19320 IX86_BUILTIN_PTESTC,
19321 IX86_BUILTIN_PTESTNZC,
19323 IX86_BUILTIN_VEC_INIT_V2SI,
19324 IX86_BUILTIN_VEC_INIT_V4HI,
19325 IX86_BUILTIN_VEC_INIT_V8QI,
19326 IX86_BUILTIN_VEC_EXT_V2DF,
19327 IX86_BUILTIN_VEC_EXT_V2DI,
19328 IX86_BUILTIN_VEC_EXT_V4SF,
19329 IX86_BUILTIN_VEC_EXT_V4SI,
19330 IX86_BUILTIN_VEC_EXT_V8HI,
19331 IX86_BUILTIN_VEC_EXT_V2SI,
19332 IX86_BUILTIN_VEC_EXT_V4HI,
19333 IX86_BUILTIN_VEC_EXT_V16QI,
19334 IX86_BUILTIN_VEC_SET_V2DI,
19335 IX86_BUILTIN_VEC_SET_V4SF,
19336 IX86_BUILTIN_VEC_SET_V4SI,
19337 IX86_BUILTIN_VEC_SET_V8HI,
19338 IX86_BUILTIN_VEC_SET_V4HI,
19339 IX86_BUILTIN_VEC_SET_V16QI,
19341 IX86_BUILTIN_VEC_PACK_SFIX,
19344 IX86_BUILTIN_CRC32QI,
19345 IX86_BUILTIN_CRC32HI,
19346 IX86_BUILTIN_CRC32SI,
19347 IX86_BUILTIN_CRC32DI,
19349 IX86_BUILTIN_PCMPESTRI128,
19350 IX86_BUILTIN_PCMPESTRM128,
19351 IX86_BUILTIN_PCMPESTRA128,
19352 IX86_BUILTIN_PCMPESTRC128,
19353 IX86_BUILTIN_PCMPESTRO128,
19354 IX86_BUILTIN_PCMPESTRS128,
19355 IX86_BUILTIN_PCMPESTRZ128,
19356 IX86_BUILTIN_PCMPISTRI128,
19357 IX86_BUILTIN_PCMPISTRM128,
19358 IX86_BUILTIN_PCMPISTRA128,
19359 IX86_BUILTIN_PCMPISTRC128,
19360 IX86_BUILTIN_PCMPISTRO128,
19361 IX86_BUILTIN_PCMPISTRS128,
19362 IX86_BUILTIN_PCMPISTRZ128,
19364 IX86_BUILTIN_PCMPGTQ,
19366 /* AES instructions */
19367 IX86_BUILTIN_AESENC128,
19368 IX86_BUILTIN_AESENCLAST128,
19369 IX86_BUILTIN_AESDEC128,
19370 IX86_BUILTIN_AESDECLAST128,
19371 IX86_BUILTIN_AESIMC128,
19372 IX86_BUILTIN_AESKEYGENASSIST128,
19374 /* PCLMUL instruction */
19375 IX86_BUILTIN_PCLMULQDQ128,
19378 IX86_BUILTIN_ADDPD256,
19379 IX86_BUILTIN_ADDPS256,
19380 IX86_BUILTIN_ADDSUBPD256,
19381 IX86_BUILTIN_ADDSUBPS256,
19382 IX86_BUILTIN_ANDPD256,
19383 IX86_BUILTIN_ANDPS256,
19384 IX86_BUILTIN_ANDNPD256,
19385 IX86_BUILTIN_ANDNPS256,
19386 IX86_BUILTIN_BLENDPD256,
19387 IX86_BUILTIN_BLENDPS256,
19388 IX86_BUILTIN_BLENDVPD256,
19389 IX86_BUILTIN_BLENDVPS256,
19390 IX86_BUILTIN_DIVPD256,
19391 IX86_BUILTIN_DIVPS256,
19392 IX86_BUILTIN_DPPS256,
19393 IX86_BUILTIN_HADDPD256,
19394 IX86_BUILTIN_HADDPS256,
19395 IX86_BUILTIN_HSUBPD256,
19396 IX86_BUILTIN_HSUBPS256,
19397 IX86_BUILTIN_MAXPD256,
19398 IX86_BUILTIN_MAXPS256,
19399 IX86_BUILTIN_MINPD256,
19400 IX86_BUILTIN_MINPS256,
19401 IX86_BUILTIN_MULPD256,
19402 IX86_BUILTIN_MULPS256,
19403 IX86_BUILTIN_ORPD256,
19404 IX86_BUILTIN_ORPS256,
19405 IX86_BUILTIN_SHUFPD256,
19406 IX86_BUILTIN_SHUFPS256,
19407 IX86_BUILTIN_SUBPD256,
19408 IX86_BUILTIN_SUBPS256,
19409 IX86_BUILTIN_XORPD256,
19410 IX86_BUILTIN_XORPS256,
19411 IX86_BUILTIN_CMPSD,
19412 IX86_BUILTIN_CMPSS,
19413 IX86_BUILTIN_CMPPD,
19414 IX86_BUILTIN_CMPPS,
19415 IX86_BUILTIN_CMPPD256,
19416 IX86_BUILTIN_CMPPS256,
19417 IX86_BUILTIN_CVTDQ2PD256,
19418 IX86_BUILTIN_CVTDQ2PS256,
19419 IX86_BUILTIN_CVTPD2PS256,
19420 IX86_BUILTIN_CVTPS2DQ256,
19421 IX86_BUILTIN_CVTPS2PD256,
19422 IX86_BUILTIN_CVTTPD2DQ256,
19423 IX86_BUILTIN_CVTPD2DQ256,
19424 IX86_BUILTIN_CVTTPS2DQ256,
19425 IX86_BUILTIN_EXTRACTF128PD256,
19426 IX86_BUILTIN_EXTRACTF128PS256,
19427 IX86_BUILTIN_EXTRACTF128SI256,
19428 IX86_BUILTIN_VZEROALL,
19429 IX86_BUILTIN_VZEROUPPER,
19430 IX86_BUILTIN_VZEROUPPER_REX64,
19431 IX86_BUILTIN_VPERMILVARPD,
19432 IX86_BUILTIN_VPERMILVARPS,
19433 IX86_BUILTIN_VPERMILVARPD256,
19434 IX86_BUILTIN_VPERMILVARPS256,
19435 IX86_BUILTIN_VPERMILPD,
19436 IX86_BUILTIN_VPERMILPS,
19437 IX86_BUILTIN_VPERMILPD256,
19438 IX86_BUILTIN_VPERMILPS256,
19439 IX86_BUILTIN_VPERMIL2PD,
19440 IX86_BUILTIN_VPERMIL2PS,
19441 IX86_BUILTIN_VPERMIL2PD256,
19442 IX86_BUILTIN_VPERMIL2PS256,
19443 IX86_BUILTIN_VPERM2F128PD256,
19444 IX86_BUILTIN_VPERM2F128PS256,
19445 IX86_BUILTIN_VPERM2F128SI256,
19446 IX86_BUILTIN_VBROADCASTSS,
19447 IX86_BUILTIN_VBROADCASTSD256,
19448 IX86_BUILTIN_VBROADCASTSS256,
19449 IX86_BUILTIN_VBROADCASTPD256,
19450 IX86_BUILTIN_VBROADCASTPS256,
19451 IX86_BUILTIN_VINSERTF128PD256,
19452 IX86_BUILTIN_VINSERTF128PS256,
19453 IX86_BUILTIN_VINSERTF128SI256,
19454 IX86_BUILTIN_LOADUPD256,
19455 IX86_BUILTIN_LOADUPS256,
19456 IX86_BUILTIN_STOREUPD256,
19457 IX86_BUILTIN_STOREUPS256,
19458 IX86_BUILTIN_LDDQU256,
19459 IX86_BUILTIN_LOADDQU256,
19460 IX86_BUILTIN_STOREDQU256,
19461 IX86_BUILTIN_MASKLOADPD,
19462 IX86_BUILTIN_MASKLOADPS,
19463 IX86_BUILTIN_MASKSTOREPD,
19464 IX86_BUILTIN_MASKSTOREPS,
19465 IX86_BUILTIN_MASKLOADPD256,
19466 IX86_BUILTIN_MASKLOADPS256,
19467 IX86_BUILTIN_MASKSTOREPD256,
19468 IX86_BUILTIN_MASKSTOREPS256,
19469 IX86_BUILTIN_MOVSHDUP256,
19470 IX86_BUILTIN_MOVSLDUP256,
19471 IX86_BUILTIN_MOVDDUP256,
19473 IX86_BUILTIN_SQRTPD256,
19474 IX86_BUILTIN_SQRTPS256,
19475 IX86_BUILTIN_SQRTPS_NR256,
19476 IX86_BUILTIN_RSQRTPS256,
19477 IX86_BUILTIN_RSQRTPS_NR256,
19479 IX86_BUILTIN_RCPPS256,
19481 IX86_BUILTIN_ROUNDPD256,
19482 IX86_BUILTIN_ROUNDPS256,
19484 IX86_BUILTIN_UNPCKHPD256,
19485 IX86_BUILTIN_UNPCKLPD256,
19486 IX86_BUILTIN_UNPCKHPS256,
19487 IX86_BUILTIN_UNPCKLPS256,
19489 IX86_BUILTIN_SI256_SI,
19490 IX86_BUILTIN_PS256_PS,
19491 IX86_BUILTIN_PD256_PD,
19492 IX86_BUILTIN_SI_SI256,
19493 IX86_BUILTIN_PS_PS256,
19494 IX86_BUILTIN_PD_PD256,
19496 IX86_BUILTIN_VTESTZPD,
19497 IX86_BUILTIN_VTESTCPD,
19498 IX86_BUILTIN_VTESTNZCPD,
19499 IX86_BUILTIN_VTESTZPS,
19500 IX86_BUILTIN_VTESTCPS,
19501 IX86_BUILTIN_VTESTNZCPS,
19502 IX86_BUILTIN_VTESTZPD256,
19503 IX86_BUILTIN_VTESTCPD256,
19504 IX86_BUILTIN_VTESTNZCPD256,
19505 IX86_BUILTIN_VTESTZPS256,
19506 IX86_BUILTIN_VTESTCPS256,
19507 IX86_BUILTIN_VTESTNZCPS256,
19508 IX86_BUILTIN_PTESTZ256,
19509 IX86_BUILTIN_PTESTC256,
19510 IX86_BUILTIN_PTESTNZC256,
19512 IX86_BUILTIN_MOVMSKPD256,
19513 IX86_BUILTIN_MOVMSKPS256,
19515 /* TFmode support builtins. */
19517 IX86_BUILTIN_FABSQ,
19518 IX86_BUILTIN_COPYSIGNQ,
19520 /* SSE5 instructions */
19521 IX86_BUILTIN_FMADDSS,
19522 IX86_BUILTIN_FMADDSD,
19523 IX86_BUILTIN_FMADDPS,
19524 IX86_BUILTIN_FMADDPD,
19525 IX86_BUILTIN_FMSUBSS,
19526 IX86_BUILTIN_FMSUBSD,
19527 IX86_BUILTIN_FMSUBPS,
19528 IX86_BUILTIN_FMSUBPD,
19529 IX86_BUILTIN_FNMADDSS,
19530 IX86_BUILTIN_FNMADDSD,
19531 IX86_BUILTIN_FNMADDPS,
19532 IX86_BUILTIN_FNMADDPD,
19533 IX86_BUILTIN_FNMSUBSS,
19534 IX86_BUILTIN_FNMSUBSD,
19535 IX86_BUILTIN_FNMSUBPS,
19536 IX86_BUILTIN_FNMSUBPD,
19537 IX86_BUILTIN_PCMOV_V2DI,
19538 IX86_BUILTIN_PCMOV_V4SI,
19539 IX86_BUILTIN_PCMOV_V8HI,
19540 IX86_BUILTIN_PCMOV_V16QI,
19541 IX86_BUILTIN_PCMOV_V4SF,
19542 IX86_BUILTIN_PCMOV_V2DF,
19543 IX86_BUILTIN_PPERM,
19544 IX86_BUILTIN_PERMPS,
19545 IX86_BUILTIN_PERMPD,
19546 IX86_BUILTIN_PMACSSWW,
19547 IX86_BUILTIN_PMACSWW,
19548 IX86_BUILTIN_PMACSSWD,
19549 IX86_BUILTIN_PMACSWD,
19550 IX86_BUILTIN_PMACSSDD,
19551 IX86_BUILTIN_PMACSDD,
19552 IX86_BUILTIN_PMACSSDQL,
19553 IX86_BUILTIN_PMACSSDQH,
19554 IX86_BUILTIN_PMACSDQL,
19555 IX86_BUILTIN_PMACSDQH,
19556 IX86_BUILTIN_PMADCSSWD,
19557 IX86_BUILTIN_PMADCSWD,
19558 IX86_BUILTIN_PHADDBW,
19559 IX86_BUILTIN_PHADDBD,
19560 IX86_BUILTIN_PHADDBQ,
19561 IX86_BUILTIN_PHADDWD,
19562 IX86_BUILTIN_PHADDWQ,
19563 IX86_BUILTIN_PHADDDQ,
19564 IX86_BUILTIN_PHADDUBW,
19565 IX86_BUILTIN_PHADDUBD,
19566 IX86_BUILTIN_PHADDUBQ,
19567 IX86_BUILTIN_PHADDUWD,
19568 IX86_BUILTIN_PHADDUWQ,
19569 IX86_BUILTIN_PHADDUDQ,
19570 IX86_BUILTIN_PHSUBBW,
19571 IX86_BUILTIN_PHSUBWD,
19572 IX86_BUILTIN_PHSUBDQ,
19573 IX86_BUILTIN_PROTB,
19574 IX86_BUILTIN_PROTW,
19575 IX86_BUILTIN_PROTD,
19576 IX86_BUILTIN_PROTQ,
19577 IX86_BUILTIN_PROTB_IMM,
19578 IX86_BUILTIN_PROTW_IMM,
19579 IX86_BUILTIN_PROTD_IMM,
19580 IX86_BUILTIN_PROTQ_IMM,
19581 IX86_BUILTIN_PSHLB,
19582 IX86_BUILTIN_PSHLW,
19583 IX86_BUILTIN_PSHLD,
19584 IX86_BUILTIN_PSHLQ,
19585 IX86_BUILTIN_PSHAB,
19586 IX86_BUILTIN_PSHAW,
19587 IX86_BUILTIN_PSHAD,
19588 IX86_BUILTIN_PSHAQ,
19589 IX86_BUILTIN_FRCZSS,
19590 IX86_BUILTIN_FRCZSD,
19591 IX86_BUILTIN_FRCZPS,
19592 IX86_BUILTIN_FRCZPD,
19593 IX86_BUILTIN_CVTPH2PS,
19594 IX86_BUILTIN_CVTPS2PH,
19596 IX86_BUILTIN_COMEQSS,
19597 IX86_BUILTIN_COMNESS,
19598 IX86_BUILTIN_COMLTSS,
19599 IX86_BUILTIN_COMLESS,
19600 IX86_BUILTIN_COMGTSS,
19601 IX86_BUILTIN_COMGESS,
19602 IX86_BUILTIN_COMUEQSS,
19603 IX86_BUILTIN_COMUNESS,
19604 IX86_BUILTIN_COMULTSS,
19605 IX86_BUILTIN_COMULESS,
19606 IX86_BUILTIN_COMUGTSS,
19607 IX86_BUILTIN_COMUGESS,
19608 IX86_BUILTIN_COMORDSS,
19609 IX86_BUILTIN_COMUNORDSS,
19610 IX86_BUILTIN_COMFALSESS,
19611 IX86_BUILTIN_COMTRUESS,
19613 IX86_BUILTIN_COMEQSD,
19614 IX86_BUILTIN_COMNESD,
19615 IX86_BUILTIN_COMLTSD,
19616 IX86_BUILTIN_COMLESD,
19617 IX86_BUILTIN_COMGTSD,
19618 IX86_BUILTIN_COMGESD,
19619 IX86_BUILTIN_COMUEQSD,
19620 IX86_BUILTIN_COMUNESD,
19621 IX86_BUILTIN_COMULTSD,
19622 IX86_BUILTIN_COMULESD,
19623 IX86_BUILTIN_COMUGTSD,
19624 IX86_BUILTIN_COMUGESD,
19625 IX86_BUILTIN_COMORDSD,
19626 IX86_BUILTIN_COMUNORDSD,
19627 IX86_BUILTIN_COMFALSESD,
19628 IX86_BUILTIN_COMTRUESD,
19630 IX86_BUILTIN_COMEQPS,
19631 IX86_BUILTIN_COMNEPS,
19632 IX86_BUILTIN_COMLTPS,
19633 IX86_BUILTIN_COMLEPS,
19634 IX86_BUILTIN_COMGTPS,
19635 IX86_BUILTIN_COMGEPS,
19636 IX86_BUILTIN_COMUEQPS,
19637 IX86_BUILTIN_COMUNEPS,
19638 IX86_BUILTIN_COMULTPS,
19639 IX86_BUILTIN_COMULEPS,
19640 IX86_BUILTIN_COMUGTPS,
19641 IX86_BUILTIN_COMUGEPS,
19642 IX86_BUILTIN_COMORDPS,
19643 IX86_BUILTIN_COMUNORDPS,
19644 IX86_BUILTIN_COMFALSEPS,
19645 IX86_BUILTIN_COMTRUEPS,
19647 IX86_BUILTIN_COMEQPD,
19648 IX86_BUILTIN_COMNEPD,
19649 IX86_BUILTIN_COMLTPD,
19650 IX86_BUILTIN_COMLEPD,
19651 IX86_BUILTIN_COMGTPD,
19652 IX86_BUILTIN_COMGEPD,
19653 IX86_BUILTIN_COMUEQPD,
19654 IX86_BUILTIN_COMUNEPD,
19655 IX86_BUILTIN_COMULTPD,
19656 IX86_BUILTIN_COMULEPD,
19657 IX86_BUILTIN_COMUGTPD,
19658 IX86_BUILTIN_COMUGEPD,
19659 IX86_BUILTIN_COMORDPD,
19660 IX86_BUILTIN_COMUNORDPD,
19661 IX86_BUILTIN_COMFALSEPD,
19662 IX86_BUILTIN_COMTRUEPD,
19664 IX86_BUILTIN_PCOMEQUB,
19665 IX86_BUILTIN_PCOMNEUB,
19666 IX86_BUILTIN_PCOMLTUB,
19667 IX86_BUILTIN_PCOMLEUB,
19668 IX86_BUILTIN_PCOMGTUB,
19669 IX86_BUILTIN_PCOMGEUB,
19670 IX86_BUILTIN_PCOMFALSEUB,
19671 IX86_BUILTIN_PCOMTRUEUB,
19672 IX86_BUILTIN_PCOMEQUW,
19673 IX86_BUILTIN_PCOMNEUW,
19674 IX86_BUILTIN_PCOMLTUW,
19675 IX86_BUILTIN_PCOMLEUW,
19676 IX86_BUILTIN_PCOMGTUW,
19677 IX86_BUILTIN_PCOMGEUW,
19678 IX86_BUILTIN_PCOMFALSEUW,
19679 IX86_BUILTIN_PCOMTRUEUW,
19680 IX86_BUILTIN_PCOMEQUD,
19681 IX86_BUILTIN_PCOMNEUD,
19682 IX86_BUILTIN_PCOMLTUD,
19683 IX86_BUILTIN_PCOMLEUD,
19684 IX86_BUILTIN_PCOMGTUD,
19685 IX86_BUILTIN_PCOMGEUD,
19686 IX86_BUILTIN_PCOMFALSEUD,
19687 IX86_BUILTIN_PCOMTRUEUD,
19688 IX86_BUILTIN_PCOMEQUQ,
19689 IX86_BUILTIN_PCOMNEUQ,
19690 IX86_BUILTIN_PCOMLTUQ,
19691 IX86_BUILTIN_PCOMLEUQ,
19692 IX86_BUILTIN_PCOMGTUQ,
19693 IX86_BUILTIN_PCOMGEUQ,
19694 IX86_BUILTIN_PCOMFALSEUQ,
19695 IX86_BUILTIN_PCOMTRUEUQ,
19697 IX86_BUILTIN_PCOMEQB,
19698 IX86_BUILTIN_PCOMNEB,
19699 IX86_BUILTIN_PCOMLTB,
19700 IX86_BUILTIN_PCOMLEB,
19701 IX86_BUILTIN_PCOMGTB,
19702 IX86_BUILTIN_PCOMGEB,
19703 IX86_BUILTIN_PCOMFALSEB,
19704 IX86_BUILTIN_PCOMTRUEB,
19705 IX86_BUILTIN_PCOMEQW,
19706 IX86_BUILTIN_PCOMNEW,
19707 IX86_BUILTIN_PCOMLTW,
19708 IX86_BUILTIN_PCOMLEW,
19709 IX86_BUILTIN_PCOMGTW,
19710 IX86_BUILTIN_PCOMGEW,
19711 IX86_BUILTIN_PCOMFALSEW,
19712 IX86_BUILTIN_PCOMTRUEW,
19713 IX86_BUILTIN_PCOMEQD,
19714 IX86_BUILTIN_PCOMNED,
19715 IX86_BUILTIN_PCOMLTD,
19716 IX86_BUILTIN_PCOMLED,
19717 IX86_BUILTIN_PCOMGTD,
19718 IX86_BUILTIN_PCOMGED,
19719 IX86_BUILTIN_PCOMFALSED,
19720 IX86_BUILTIN_PCOMTRUED,
19721 IX86_BUILTIN_PCOMEQQ,
19722 IX86_BUILTIN_PCOMNEQ,
19723 IX86_BUILTIN_PCOMLTQ,
19724 IX86_BUILTIN_PCOMLEQ,
19725 IX86_BUILTIN_PCOMGTQ,
19726 IX86_BUILTIN_PCOMGEQ,
19727 IX86_BUILTIN_PCOMFALSEQ,
19728 IX86_BUILTIN_PCOMTRUEQ,
19733 /* Table for the ix86 builtin decls. */
19734 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
19736 /* Table to record which ISA options the builtin needs. */
19737 static int ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
19739 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
19740 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
19741 * function decl in the ix86_builtins array. Returns the function decl or
19742 * NULL_TREE, if the builtin was not added.
19744 * Record all builtins, even if it isn't an instruction set in the current ISA
19745 * in case the user uses function specific options for a different ISA. When
19746 * the builtin is expanded, check at that time whether it is valid. */
19749 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
19751 tree decl = NULL_TREE;
19753 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
19755 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
19757 ix86_builtins[(int) code] = decl;
19758 ix86_builtins_isa[(int) code] = mask;
19764 /* Like def_builtin, but also marks the function decl "const". */
19767 def_builtin_const (int mask, const char *name, tree type,
19768 enum ix86_builtins code)
19770 tree decl = def_builtin (mask, name, type, code);
19772 TREE_READONLY (decl) = 1;
19776 /* Bits for builtin_description.flag. */
19778 /* Set when we don't support the comparison natively, and should
19779 swap_comparison in order to support it. */
19780 #define BUILTIN_DESC_SWAP_OPERANDS 1
19782 struct builtin_description
19784 const unsigned int mask;
19785 const enum insn_code icode;
19786 const char *const name;
19787 const enum ix86_builtins code;
19788 const enum rtx_code comparison;
19792 static const struct builtin_description bdesc_comi[] =
19794 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
19795 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
19796 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
19797 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
19798 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
19799 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
19800 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
19801 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
19802 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
19803 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
19804 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
19805 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
19806 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
19807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
19808 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
19809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
19810 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
19811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
19812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
19813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
19814 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
19815 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
19816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
19817 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
19820 static const struct builtin_description bdesc_pcmpestr[] =
19823 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
19824 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
19825 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
19826 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
19827 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
19828 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
19829 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
19832 static const struct builtin_description bdesc_pcmpistr[] =
19835 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
19836 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
19837 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
19838 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
19839 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
19840 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
19841 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
19844 /* Special builtin types */
19845 enum ix86_special_builtin_type
19847 SPECIAL_FTYPE_UNKNOWN,
19849 V32QI_FTYPE_PCCHAR,
19850 V16QI_FTYPE_PCCHAR,
19852 V8SF_FTYPE_PCFLOAT,
19854 V4DF_FTYPE_PCDOUBLE,
19855 V4SF_FTYPE_PCFLOAT,
19856 V2DF_FTYPE_PCDOUBLE,
19857 V8SF_FTYPE_PCV8SF_V8SF,
19858 V4DF_FTYPE_PCV4DF_V4DF,
19859 V4SF_FTYPE_V4SF_PCV2SF,
19860 V4SF_FTYPE_PCV4SF_V4SF,
19861 V2DF_FTYPE_V2DF_PCDOUBLE,
19862 V2DF_FTYPE_PCV2DF_V2DF,
19864 VOID_FTYPE_PV2SF_V4SF,
19865 VOID_FTYPE_PV2DI_V2DI,
19866 VOID_FTYPE_PCHAR_V32QI,
19867 VOID_FTYPE_PCHAR_V16QI,
19868 VOID_FTYPE_PFLOAT_V8SF,
19869 VOID_FTYPE_PFLOAT_V4SF,
19870 VOID_FTYPE_PDOUBLE_V4DF,
19871 VOID_FTYPE_PDOUBLE_V2DF,
19873 VOID_FTYPE_PINT_INT,
19874 VOID_FTYPE_PV8SF_V8SF_V8SF,
19875 VOID_FTYPE_PV4DF_V4DF_V4DF,
19876 VOID_FTYPE_PV4SF_V4SF_V4SF,
19877 VOID_FTYPE_PV2DF_V2DF_V2DF
19880 /* Builtin types */
19881 enum ix86_builtin_type
19884 FLOAT128_FTYPE_FLOAT128,
19886 FLOAT128_FTYPE_FLOAT128_FLOAT128,
19887 INT_FTYPE_V8SF_V8SF_PTEST,
19888 INT_FTYPE_V4DI_V4DI_PTEST,
19889 INT_FTYPE_V4DF_V4DF_PTEST,
19890 INT_FTYPE_V4SF_V4SF_PTEST,
19891 INT_FTYPE_V2DI_V2DI_PTEST,
19892 INT_FTYPE_V2DF_V2DF_PTEST,
19924 V4SF_FTYPE_V4SF_VEC_MERGE,
19933 V2DF_FTYPE_V2DF_VEC_MERGE,
19944 V16QI_FTYPE_V16QI_V16QI,
19945 V16QI_FTYPE_V8HI_V8HI,
19946 V8QI_FTYPE_V8QI_V8QI,
19947 V8QI_FTYPE_V4HI_V4HI,
19948 V8HI_FTYPE_V8HI_V8HI,
19949 V8HI_FTYPE_V8HI_V8HI_COUNT,
19950 V8HI_FTYPE_V16QI_V16QI,
19951 V8HI_FTYPE_V4SI_V4SI,
19952 V8HI_FTYPE_V8HI_SI_COUNT,
19953 V8SF_FTYPE_V8SF_V8SF,
19954 V8SF_FTYPE_V8SF_V8SI,
19955 V4SI_FTYPE_V4SI_V4SI,
19956 V4SI_FTYPE_V4SI_V4SI_COUNT,
19957 V4SI_FTYPE_V8HI_V8HI,
19958 V4SI_FTYPE_V4SF_V4SF,
19959 V4SI_FTYPE_V2DF_V2DF,
19960 V4SI_FTYPE_V4SI_SI_COUNT,
19961 V4HI_FTYPE_V4HI_V4HI,
19962 V4HI_FTYPE_V4HI_V4HI_COUNT,
19963 V4HI_FTYPE_V8QI_V8QI,
19964 V4HI_FTYPE_V2SI_V2SI,
19965 V4HI_FTYPE_V4HI_SI_COUNT,
19966 V4DF_FTYPE_V4DF_V4DF,
19967 V4DF_FTYPE_V4DF_V4DI,
19968 V4SF_FTYPE_V4SF_V4SF,
19969 V4SF_FTYPE_V4SF_V4SF_SWAP,
19970 V4SF_FTYPE_V4SF_V4SI,
19971 V4SF_FTYPE_V4SF_V2SI,
19972 V4SF_FTYPE_V4SF_V2DF,
19973 V4SF_FTYPE_V4SF_DI,
19974 V4SF_FTYPE_V4SF_SI,
19975 V2DI_FTYPE_V2DI_V2DI,
19976 V2DI_FTYPE_V2DI_V2DI_COUNT,
19977 V2DI_FTYPE_V16QI_V16QI,
19978 V2DI_FTYPE_V4SI_V4SI,
19979 V2DI_FTYPE_V2DI_V16QI,
19980 V2DI_FTYPE_V2DF_V2DF,
19981 V2DI_FTYPE_V2DI_SI_COUNT,
19982 V2SI_FTYPE_V2SI_V2SI,
19983 V2SI_FTYPE_V2SI_V2SI_COUNT,
19984 V2SI_FTYPE_V4HI_V4HI,
19985 V2SI_FTYPE_V2SF_V2SF,
19986 V2SI_FTYPE_V2SI_SI_COUNT,
19987 V2DF_FTYPE_V2DF_V2DF,
19988 V2DF_FTYPE_V2DF_V2DF_SWAP,
19989 V2DF_FTYPE_V2DF_V4SF,
19990 V2DF_FTYPE_V2DF_V2DI,
19991 V2DF_FTYPE_V2DF_DI,
19992 V2DF_FTYPE_V2DF_SI,
19993 V2SF_FTYPE_V2SF_V2SF,
19994 V1DI_FTYPE_V1DI_V1DI,
19995 V1DI_FTYPE_V1DI_V1DI_COUNT,
19996 V1DI_FTYPE_V8QI_V8QI,
19997 V1DI_FTYPE_V2SI_V2SI,
19998 V1DI_FTYPE_V1DI_SI_COUNT,
19999 UINT64_FTYPE_UINT64_UINT64,
20000 UINT_FTYPE_UINT_UINT,
20001 UINT_FTYPE_UINT_USHORT,
20002 UINT_FTYPE_UINT_UCHAR,
20003 V8HI_FTYPE_V8HI_INT,
20004 V4SI_FTYPE_V4SI_INT,
20005 V4HI_FTYPE_V4HI_INT,
20006 V8SF_FTYPE_V8SF_INT,
20007 V4SI_FTYPE_V8SI_INT,
20008 V4SF_FTYPE_V8SF_INT,
20009 V2DF_FTYPE_V4DF_INT,
20010 V4DF_FTYPE_V4DF_INT,
20011 V4SF_FTYPE_V4SF_INT,
20012 V2DI_FTYPE_V2DI_INT,
20013 V2DI2TI_FTYPE_V2DI_INT,
20014 V2DF_FTYPE_V2DF_INT,
20015 V16QI_FTYPE_V16QI_V16QI_V16QI,
20016 V8SF_FTYPE_V8SF_V8SF_V8SF,
20017 V4DF_FTYPE_V4DF_V4DF_V4DF,
20018 V4SF_FTYPE_V4SF_V4SF_V4SF,
20019 V2DF_FTYPE_V2DF_V2DF_V2DF,
20020 V16QI_FTYPE_V16QI_V16QI_INT,
20021 V8SI_FTYPE_V8SI_V8SI_INT,
20022 V8SI_FTYPE_V8SI_V4SI_INT,
20023 V8HI_FTYPE_V8HI_V8HI_INT,
20024 V8SF_FTYPE_V8SF_V8SF_INT,
20025 V8SF_FTYPE_V8SF_V4SF_INT,
20026 V4SI_FTYPE_V4SI_V4SI_INT,
20027 V4DF_FTYPE_V4DF_V4DF_INT,
20028 V4DF_FTYPE_V4DF_V2DF_INT,
20029 V4SF_FTYPE_V4SF_V4SF_INT,
20030 V2DI_FTYPE_V2DI_V2DI_INT,
20031 V2DI2TI_FTYPE_V2DI_V2DI_INT,
20032 V1DI2DI_FTYPE_V1DI_V1DI_INT,
20033 V2DF_FTYPE_V2DF_V2DF_INT,
20034 V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
20035 V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
20036 V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
20037 V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
20038 V2DI_FTYPE_V2DI_UINT_UINT,
20039 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
20042 /* Special builtins with variable number of arguments. */
20043 static const struct builtin_description bdesc_special_args[] =
20046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20049 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20053 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20054 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20056 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20057 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20061 /* SSE or 3DNow!A */
20062 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20063 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
20066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
20070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
20072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
20073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
20074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20080 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20083 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
20086 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20087 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
20091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
20092 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
20094 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20095 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20096 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
20098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
20100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
20106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
20109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
20110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
20111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
20112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
20113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
20114 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
20115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
20118 /* Builtins with variable number of arguments. */
20119 static const struct builtin_description bdesc_args[] =
20122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
20158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
20160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
20162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
20166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20188 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20189 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20195 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20196 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20197 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20198 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20200 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20201 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20202 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20203 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20204 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20205 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20208 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20209 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20210 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20211 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20212 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20213 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
20217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20218 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20220 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20221 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20222 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20224 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20225 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20226 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20227 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20231 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20232 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20233 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
20251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20263 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20264 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20268 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20270 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20271 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
20280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
20281 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
20283 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
20285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20289 /* SSE MMX or 3Dnow!A */
20290 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20295 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20296 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20297 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20299 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
20300 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
20302 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
20305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
20308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
20309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
20310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
20311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
20313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
20316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
20321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20323 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20324 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
20328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20330 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20331 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20332 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20333 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
20344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20361 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20365 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20367 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20368 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20374 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
20376 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20377 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20378 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20379 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20380 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20381 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20382 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20383 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20394 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20395 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
20397 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20399 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20400 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20412 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20413 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20414 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
20427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
20428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
20430 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
20433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
20434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
20436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
20438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
20439 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
20440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
20441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
20443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
20444 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20445 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20446 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
20447 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20448 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20449 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
20451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
20452 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20453 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20454 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
20455 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20456 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20457 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
20459 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20460 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20461 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20462 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
20465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
20466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
20468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
20470 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
20471 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
20473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
20476 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
20477 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
20480 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
20481 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20483 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20484 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20485 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20486 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20487 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20488 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20491 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
20492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
20493 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
20494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
20495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
20496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
20511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
20512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20524 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
20525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
20528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
20531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
20532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
20536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
20537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
20539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
20540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
20541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
20542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
20543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
20544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
20545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
20546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
20547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
20548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
20549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
20550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
20551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
20553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
20554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20563 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
20564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20566 /* SSE4.1 and SSE5 */
20567 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
20568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
20569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20570 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20572 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20573 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20574 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20577 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20578 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
20579 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
20580 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
20581 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
20584 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
20585 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
20586 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
20587 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20590 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
20591 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
20593 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20594 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20595 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20596 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20599 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
20602 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20603 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20606 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20607 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20610 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20616 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20617 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20618 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20619 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20620 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20621 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20622 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20623 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20624 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20625 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20626 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20627 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
20630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
20631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
20632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
20634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
20637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
20638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
20648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
20649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
20650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
20651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
20652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
20653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
20654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
20655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
20656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
20657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
20658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
20661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
20662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
20663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
20664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
20665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
20666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
20667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
20668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
20669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
20670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
20671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
20673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
20677 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
20678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20679 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20681 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
20686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
20688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
20694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
20695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
20696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
20697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
20698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
20700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
20717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
20721 enum multi_arg_type {
20731 MULTI_ARG_3_PERMPS,
20732 MULTI_ARG_3_PERMPD,
20739 MULTI_ARG_2_DI_IMM,
20740 MULTI_ARG_2_SI_IMM,
20741 MULTI_ARG_2_HI_IMM,
20742 MULTI_ARG_2_QI_IMM,
20743 MULTI_ARG_2_SF_CMP,
20744 MULTI_ARG_2_DF_CMP,
20745 MULTI_ARG_2_DI_CMP,
20746 MULTI_ARG_2_SI_CMP,
20747 MULTI_ARG_2_HI_CMP,
20748 MULTI_ARG_2_QI_CMP,
20771 static const struct builtin_description bdesc_multi_arg[] =
20773 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
20774 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
20775 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
20776 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
20777 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
20778 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
20779 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
20780 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
20781 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
20782 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
20783 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
20784 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
20785 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
20786 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
20787 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
20788 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
20789 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
20790 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
20791 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
20792 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
20793 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
20794 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
20795 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
20796 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
20797 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
20798 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
20799 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
20800 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
20801 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20802 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
20803 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
20804 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
20805 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20806 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20807 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20808 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20809 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20810 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
20811 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
20812 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
20813 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
20814 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
20815 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
20816 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
20817 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
20818 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
20819 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
20820 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
20821 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
20822 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
20823 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
20824 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
20825 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
20826 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
20827 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
20828 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
20829 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
20830 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
20831 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
20832 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
20833 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
20834 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
20835 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
20836 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
20837 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
20838 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
20839 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
20840 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
20841 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
20842 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
20843 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
20844 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
20845 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
20846 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
20847 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
20849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
20850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
20851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
20852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
20853 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
20854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
20855 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
20856 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
20857 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
20860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
20861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
20862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
20863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
20864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
20866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
20867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
20868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
20869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
20870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
20871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
20872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
20873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
20874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
20875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
20876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
20877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
20878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
20879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
20880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
20881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
20883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
20884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
20885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
20886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
20887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
20888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
20889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
20890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
20891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
20894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
20895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
20896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
20897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
20898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
20900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
20901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
20902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
20903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
20904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
20905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
20906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
20907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
20908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
20909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
20910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
20911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
20912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
20913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
20914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
20915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
20917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
20918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
20919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
20920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
20921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
20922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
20923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
20925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
20926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
20927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
20928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
20929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
20930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
20931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
20933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
20934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
20935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
20936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
20937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
20938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
20939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
20941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
20942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
20943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
20944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
20945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
20946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
20947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
20949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
20950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
20951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
20952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
20953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
20954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
20955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
20957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
20958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
20959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
20960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
20961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
20962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
20963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
20965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
20966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
20967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
20968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
20969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
20970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
20971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
20973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
20974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
20975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
20976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
20977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
20978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
20979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
20981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
20982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
20983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
20984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
20985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
20986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
20987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
20988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
20990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
20991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
20992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
20993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
20994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
20995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
20996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
20997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
20999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21009 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
21010 in the current target ISA to allow the user to compile particular modules
21011 with different target specific options that differ from the command line
21014 ix86_init_mmx_sse_builtins (void)
21016 const struct builtin_description * d;
21019 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
21020 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21021 tree V1DI_type_node
21022 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
21023 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
21024 tree V2DI_type_node
21025 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
21026 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
21027 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
21028 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
21029 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21030 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
21031 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
21033 tree pchar_type_node = build_pointer_type (char_type_node);
21034 tree pcchar_type_node
21035 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
21036 tree pfloat_type_node = build_pointer_type (float_type_node);
21037 tree pcfloat_type_node
21038 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
21039 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
21040 tree pcv2sf_type_node
21041 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
21042 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
21043 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
21046 tree int_ftype_v4sf_v4sf
21047 = build_function_type_list (integer_type_node,
21048 V4SF_type_node, V4SF_type_node, NULL_TREE);
21049 tree v4si_ftype_v4sf_v4sf
21050 = build_function_type_list (V4SI_type_node,
21051 V4SF_type_node, V4SF_type_node, NULL_TREE);
21052 /* MMX/SSE/integer conversions. */
21053 tree int_ftype_v4sf
21054 = build_function_type_list (integer_type_node,
21055 V4SF_type_node, NULL_TREE);
21056 tree int64_ftype_v4sf
21057 = build_function_type_list (long_long_integer_type_node,
21058 V4SF_type_node, NULL_TREE);
21059 tree int_ftype_v8qi
21060 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
21061 tree v4sf_ftype_v4sf_int
21062 = build_function_type_list (V4SF_type_node,
21063 V4SF_type_node, integer_type_node, NULL_TREE);
21064 tree v4sf_ftype_v4sf_int64
21065 = build_function_type_list (V4SF_type_node,
21066 V4SF_type_node, long_long_integer_type_node,
21068 tree v4sf_ftype_v4sf_v2si
21069 = build_function_type_list (V4SF_type_node,
21070 V4SF_type_node, V2SI_type_node, NULL_TREE);
21072 /* Miscellaneous. */
21073 tree v8qi_ftype_v4hi_v4hi
21074 = build_function_type_list (V8QI_type_node,
21075 V4HI_type_node, V4HI_type_node, NULL_TREE);
21076 tree v4hi_ftype_v2si_v2si
21077 = build_function_type_list (V4HI_type_node,
21078 V2SI_type_node, V2SI_type_node, NULL_TREE);
21079 tree v4sf_ftype_v4sf_v4sf_int
21080 = build_function_type_list (V4SF_type_node,
21081 V4SF_type_node, V4SF_type_node,
21082 integer_type_node, NULL_TREE);
21083 tree v2si_ftype_v4hi_v4hi
21084 = build_function_type_list (V2SI_type_node,
21085 V4HI_type_node, V4HI_type_node, NULL_TREE);
21086 tree v4hi_ftype_v4hi_int
21087 = build_function_type_list (V4HI_type_node,
21088 V4HI_type_node, integer_type_node, NULL_TREE);
21089 tree v2si_ftype_v2si_int
21090 = build_function_type_list (V2SI_type_node,
21091 V2SI_type_node, integer_type_node, NULL_TREE);
21092 tree v1di_ftype_v1di_int
21093 = build_function_type_list (V1DI_type_node,
21094 V1DI_type_node, integer_type_node, NULL_TREE);
21096 tree void_ftype_void
21097 = build_function_type (void_type_node, void_list_node);
21098 tree void_ftype_unsigned
21099 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
21100 tree void_ftype_unsigned_unsigned
21101 = build_function_type_list (void_type_node, unsigned_type_node,
21102 unsigned_type_node, NULL_TREE);
21103 tree void_ftype_pcvoid_unsigned_unsigned
21104 = build_function_type_list (void_type_node, const_ptr_type_node,
21105 unsigned_type_node, unsigned_type_node,
21107 tree unsigned_ftype_void
21108 = build_function_type (unsigned_type_node, void_list_node);
21109 tree v2si_ftype_v4sf
21110 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
21111 /* Loads/stores. */
21112 tree void_ftype_v8qi_v8qi_pchar
21113 = build_function_type_list (void_type_node,
21114 V8QI_type_node, V8QI_type_node,
21115 pchar_type_node, NULL_TREE);
21116 tree v4sf_ftype_pcfloat
21117 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
21118 tree v4sf_ftype_v4sf_pcv2sf
21119 = build_function_type_list (V4SF_type_node,
21120 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
21121 tree void_ftype_pv2sf_v4sf
21122 = build_function_type_list (void_type_node,
21123 pv2sf_type_node, V4SF_type_node, NULL_TREE);
21124 tree void_ftype_pfloat_v4sf
21125 = build_function_type_list (void_type_node,
21126 pfloat_type_node, V4SF_type_node, NULL_TREE);
21127 tree void_ftype_pdi_di
21128 = build_function_type_list (void_type_node,
21129 pdi_type_node, long_long_unsigned_type_node,
21131 tree void_ftype_pv2di_v2di
21132 = build_function_type_list (void_type_node,
21133 pv2di_type_node, V2DI_type_node, NULL_TREE);
21134 /* Normal vector unops. */
21135 tree v4sf_ftype_v4sf
21136 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
21137 tree v16qi_ftype_v16qi
21138 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
21139 tree v8hi_ftype_v8hi
21140 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
21141 tree v4si_ftype_v4si
21142 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
21143 tree v8qi_ftype_v8qi
21144 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
21145 tree v4hi_ftype_v4hi
21146 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
21148 /* Normal vector binops. */
21149 tree v4sf_ftype_v4sf_v4sf
21150 = build_function_type_list (V4SF_type_node,
21151 V4SF_type_node, V4SF_type_node, NULL_TREE);
21152 tree v8qi_ftype_v8qi_v8qi
21153 = build_function_type_list (V8QI_type_node,
21154 V8QI_type_node, V8QI_type_node, NULL_TREE);
21155 tree v4hi_ftype_v4hi_v4hi
21156 = build_function_type_list (V4HI_type_node,
21157 V4HI_type_node, V4HI_type_node, NULL_TREE);
21158 tree v2si_ftype_v2si_v2si
21159 = build_function_type_list (V2SI_type_node,
21160 V2SI_type_node, V2SI_type_node, NULL_TREE);
21161 tree v1di_ftype_v1di_v1di
21162 = build_function_type_list (V1DI_type_node,
21163 V1DI_type_node, V1DI_type_node, NULL_TREE);
21164 tree v1di_ftype_v1di_v1di_int
21165 = build_function_type_list (V1DI_type_node,
21166 V1DI_type_node, V1DI_type_node,
21167 integer_type_node, NULL_TREE);
21168 tree v2si_ftype_v2sf
21169 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
21170 tree v2sf_ftype_v2si
21171 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
21172 tree v2si_ftype_v2si
21173 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
21174 tree v2sf_ftype_v2sf
21175 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
21176 tree v2sf_ftype_v2sf_v2sf
21177 = build_function_type_list (V2SF_type_node,
21178 V2SF_type_node, V2SF_type_node, NULL_TREE);
21179 tree v2si_ftype_v2sf_v2sf
21180 = build_function_type_list (V2SI_type_node,
21181 V2SF_type_node, V2SF_type_node, NULL_TREE);
21182 tree pint_type_node = build_pointer_type (integer_type_node);
21183 tree pdouble_type_node = build_pointer_type (double_type_node);
21184 tree pcdouble_type_node = build_pointer_type (
21185 build_type_variant (double_type_node, 1, 0));
21186 tree int_ftype_v2df_v2df
21187 = build_function_type_list (integer_type_node,
21188 V2DF_type_node, V2DF_type_node, NULL_TREE);
21190 tree void_ftype_pcvoid
21191 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
21192 tree v4sf_ftype_v4si
21193 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
21194 tree v4si_ftype_v4sf
21195 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
21196 tree v2df_ftype_v4si
21197 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
21198 tree v4si_ftype_v2df
21199 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
21200 tree v4si_ftype_v2df_v2df
21201 = build_function_type_list (V4SI_type_node,
21202 V2DF_type_node, V2DF_type_node, NULL_TREE);
21203 tree v2si_ftype_v2df
21204 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
21205 tree v4sf_ftype_v2df
21206 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
21207 tree v2df_ftype_v2si
21208 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
21209 tree v2df_ftype_v4sf
21210 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
21211 tree int_ftype_v2df
21212 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
21213 tree int64_ftype_v2df
21214 = build_function_type_list (long_long_integer_type_node,
21215 V2DF_type_node, NULL_TREE);
21216 tree v2df_ftype_v2df_int
21217 = build_function_type_list (V2DF_type_node,
21218 V2DF_type_node, integer_type_node, NULL_TREE);
21219 tree v2df_ftype_v2df_int64
21220 = build_function_type_list (V2DF_type_node,
21221 V2DF_type_node, long_long_integer_type_node,
21223 tree v4sf_ftype_v4sf_v2df
21224 = build_function_type_list (V4SF_type_node,
21225 V4SF_type_node, V2DF_type_node, NULL_TREE);
21226 tree v2df_ftype_v2df_v4sf
21227 = build_function_type_list (V2DF_type_node,
21228 V2DF_type_node, V4SF_type_node, NULL_TREE);
21229 tree v2df_ftype_v2df_v2df_int
21230 = build_function_type_list (V2DF_type_node,
21231 V2DF_type_node, V2DF_type_node,
21234 tree v2df_ftype_v2df_pcdouble
21235 = build_function_type_list (V2DF_type_node,
21236 V2DF_type_node, pcdouble_type_node, NULL_TREE);
21237 tree void_ftype_pdouble_v2df
21238 = build_function_type_list (void_type_node,
21239 pdouble_type_node, V2DF_type_node, NULL_TREE);
21240 tree void_ftype_pint_int
21241 = build_function_type_list (void_type_node,
21242 pint_type_node, integer_type_node, NULL_TREE);
21243 tree void_ftype_v16qi_v16qi_pchar
21244 = build_function_type_list (void_type_node,
21245 V16QI_type_node, V16QI_type_node,
21246 pchar_type_node, NULL_TREE);
21247 tree v2df_ftype_pcdouble
21248 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
21249 tree v2df_ftype_v2df_v2df
21250 = build_function_type_list (V2DF_type_node,
21251 V2DF_type_node, V2DF_type_node, NULL_TREE);
21252 tree v16qi_ftype_v16qi_v16qi
21253 = build_function_type_list (V16QI_type_node,
21254 V16QI_type_node, V16QI_type_node, NULL_TREE);
21255 tree v8hi_ftype_v8hi_v8hi
21256 = build_function_type_list (V8HI_type_node,
21257 V8HI_type_node, V8HI_type_node, NULL_TREE);
21258 tree v4si_ftype_v4si_v4si
21259 = build_function_type_list (V4SI_type_node,
21260 V4SI_type_node, V4SI_type_node, NULL_TREE);
21261 tree v2di_ftype_v2di_v2di
21262 = build_function_type_list (V2DI_type_node,
21263 V2DI_type_node, V2DI_type_node, NULL_TREE);
21264 tree v2di_ftype_v2df_v2df
21265 = build_function_type_list (V2DI_type_node,
21266 V2DF_type_node, V2DF_type_node, NULL_TREE);
21267 tree v2df_ftype_v2df
21268 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
21269 tree v2di_ftype_v2di_int
21270 = build_function_type_list (V2DI_type_node,
21271 V2DI_type_node, integer_type_node, NULL_TREE);
21272 tree v2di_ftype_v2di_v2di_int
21273 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21274 V2DI_type_node, integer_type_node, NULL_TREE);
21275 tree v4si_ftype_v4si_int
21276 = build_function_type_list (V4SI_type_node,
21277 V4SI_type_node, integer_type_node, NULL_TREE);
21278 tree v8hi_ftype_v8hi_int
21279 = build_function_type_list (V8HI_type_node,
21280 V8HI_type_node, integer_type_node, NULL_TREE);
21281 tree v4si_ftype_v8hi_v8hi
21282 = build_function_type_list (V4SI_type_node,
21283 V8HI_type_node, V8HI_type_node, NULL_TREE);
21284 tree v1di_ftype_v8qi_v8qi
21285 = build_function_type_list (V1DI_type_node,
21286 V8QI_type_node, V8QI_type_node, NULL_TREE);
21287 tree v1di_ftype_v2si_v2si
21288 = build_function_type_list (V1DI_type_node,
21289 V2SI_type_node, V2SI_type_node, NULL_TREE);
21290 tree v2di_ftype_v16qi_v16qi
21291 = build_function_type_list (V2DI_type_node,
21292 V16QI_type_node, V16QI_type_node, NULL_TREE);
21293 tree v2di_ftype_v4si_v4si
21294 = build_function_type_list (V2DI_type_node,
21295 V4SI_type_node, V4SI_type_node, NULL_TREE);
21296 tree int_ftype_v16qi
21297 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
21298 tree v16qi_ftype_pcchar
21299 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
21300 tree void_ftype_pchar_v16qi
21301 = build_function_type_list (void_type_node,
21302 pchar_type_node, V16QI_type_node, NULL_TREE);
21304 tree v2di_ftype_v2di_unsigned_unsigned
21305 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21306 unsigned_type_node, unsigned_type_node,
21308 tree v2di_ftype_v2di_v2di_unsigned_unsigned
21309 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
21310 unsigned_type_node, unsigned_type_node,
21312 tree v2di_ftype_v2di_v16qi
21313 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
21315 tree v2df_ftype_v2df_v2df_v2df
21316 = build_function_type_list (V2DF_type_node,
21317 V2DF_type_node, V2DF_type_node,
21318 V2DF_type_node, NULL_TREE);
21319 tree v4sf_ftype_v4sf_v4sf_v4sf
21320 = build_function_type_list (V4SF_type_node,
21321 V4SF_type_node, V4SF_type_node,
21322 V4SF_type_node, NULL_TREE);
21323 tree v8hi_ftype_v16qi
21324 = build_function_type_list (V8HI_type_node, V16QI_type_node,
21326 tree v4si_ftype_v16qi
21327 = build_function_type_list (V4SI_type_node, V16QI_type_node,
21329 tree v2di_ftype_v16qi
21330 = build_function_type_list (V2DI_type_node, V16QI_type_node,
21332 tree v4si_ftype_v8hi
21333 = build_function_type_list (V4SI_type_node, V8HI_type_node,
21335 tree v2di_ftype_v8hi
21336 = build_function_type_list (V2DI_type_node, V8HI_type_node,
21338 tree v2di_ftype_v4si
21339 = build_function_type_list (V2DI_type_node, V4SI_type_node,
21341 tree v2di_ftype_pv2di
21342 = build_function_type_list (V2DI_type_node, pv2di_type_node,
21344 tree v16qi_ftype_v16qi_v16qi_int
21345 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21346 V16QI_type_node, integer_type_node,
21348 tree v16qi_ftype_v16qi_v16qi_v16qi
21349 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21350 V16QI_type_node, V16QI_type_node,
21352 tree v8hi_ftype_v8hi_v8hi_int
21353 = build_function_type_list (V8HI_type_node, V8HI_type_node,
21354 V8HI_type_node, integer_type_node,
21356 tree v4si_ftype_v4si_v4si_int
21357 = build_function_type_list (V4SI_type_node, V4SI_type_node,
21358 V4SI_type_node, integer_type_node,
21360 tree int_ftype_v2di_v2di
21361 = build_function_type_list (integer_type_node,
21362 V2DI_type_node, V2DI_type_node,
21364 tree int_ftype_v16qi_int_v16qi_int_int
21365 = build_function_type_list (integer_type_node,
21372 tree v16qi_ftype_v16qi_int_v16qi_int_int
21373 = build_function_type_list (V16QI_type_node,
21380 tree int_ftype_v16qi_v16qi_int
21381 = build_function_type_list (integer_type_node,
21387 /* SSE5 instructions */
21388 tree v2di_ftype_v2di_v2di_v2di
21389 = build_function_type_list (V2DI_type_node,
21395 tree v4si_ftype_v4si_v4si_v4si
21396 = build_function_type_list (V4SI_type_node,
21402 tree v4si_ftype_v4si_v4si_v2di
21403 = build_function_type_list (V4SI_type_node,
21409 tree v8hi_ftype_v8hi_v8hi_v8hi
21410 = build_function_type_list (V8HI_type_node,
21416 tree v8hi_ftype_v8hi_v8hi_v4si
21417 = build_function_type_list (V8HI_type_node,
21423 tree v2df_ftype_v2df_v2df_v16qi
21424 = build_function_type_list (V2DF_type_node,
21430 tree v4sf_ftype_v4sf_v4sf_v16qi
21431 = build_function_type_list (V4SF_type_node,
21437 tree v2di_ftype_v2di_si
21438 = build_function_type_list (V2DI_type_node,
21443 tree v4si_ftype_v4si_si
21444 = build_function_type_list (V4SI_type_node,
21449 tree v8hi_ftype_v8hi_si
21450 = build_function_type_list (V8HI_type_node,
21455 tree v16qi_ftype_v16qi_si
21456 = build_function_type_list (V16QI_type_node,
21460 tree v4sf_ftype_v4hi
21461 = build_function_type_list (V4SF_type_node,
21465 tree v4hi_ftype_v4sf
21466 = build_function_type_list (V4HI_type_node,
21470 tree v2di_ftype_v2di
21471 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
21473 tree v16qi_ftype_v8hi_v8hi
21474 = build_function_type_list (V16QI_type_node,
21475 V8HI_type_node, V8HI_type_node,
21477 tree v8hi_ftype_v4si_v4si
21478 = build_function_type_list (V8HI_type_node,
21479 V4SI_type_node, V4SI_type_node,
21481 tree v8hi_ftype_v16qi_v16qi
21482 = build_function_type_list (V8HI_type_node,
21483 V16QI_type_node, V16QI_type_node,
21485 tree v4hi_ftype_v8qi_v8qi
21486 = build_function_type_list (V4HI_type_node,
21487 V8QI_type_node, V8QI_type_node,
21489 tree unsigned_ftype_unsigned_uchar
21490 = build_function_type_list (unsigned_type_node,
21491 unsigned_type_node,
21492 unsigned_char_type_node,
21494 tree unsigned_ftype_unsigned_ushort
21495 = build_function_type_list (unsigned_type_node,
21496 unsigned_type_node,
21497 short_unsigned_type_node,
21499 tree unsigned_ftype_unsigned_unsigned
21500 = build_function_type_list (unsigned_type_node,
21501 unsigned_type_node,
21502 unsigned_type_node,
21504 tree uint64_ftype_uint64_uint64
21505 = build_function_type_list (long_long_unsigned_type_node,
21506 long_long_unsigned_type_node,
21507 long_long_unsigned_type_node,
21509 tree float_ftype_float
21510 = build_function_type_list (float_type_node,
21515 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
21517 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
21519 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
21521 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
21523 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
21525 tree v8sf_ftype_v8sf
21526 = build_function_type_list (V8SF_type_node,
21529 tree v8si_ftype_v8sf
21530 = build_function_type_list (V8SI_type_node,
21533 tree v8sf_ftype_v8si
21534 = build_function_type_list (V8SF_type_node,
21537 tree v4si_ftype_v4df
21538 = build_function_type_list (V4SI_type_node,
21541 tree v4df_ftype_v4df
21542 = build_function_type_list (V4DF_type_node,
21545 tree v4df_ftype_v4si
21546 = build_function_type_list (V4DF_type_node,
21549 tree v4df_ftype_v4sf
21550 = build_function_type_list (V4DF_type_node,
21553 tree v4sf_ftype_v4df
21554 = build_function_type_list (V4SF_type_node,
21557 tree v8sf_ftype_v8sf_v8sf
21558 = build_function_type_list (V8SF_type_node,
21559 V8SF_type_node, V8SF_type_node,
21561 tree v4df_ftype_v4df_v4df
21562 = build_function_type_list (V4DF_type_node,
21563 V4DF_type_node, V4DF_type_node,
21565 tree v8sf_ftype_v8sf_int
21566 = build_function_type_list (V8SF_type_node,
21567 V8SF_type_node, integer_type_node,
21569 tree v4si_ftype_v8si_int
21570 = build_function_type_list (V4SI_type_node,
21571 V8SI_type_node, integer_type_node,
21573 tree v4df_ftype_v4df_int
21574 = build_function_type_list (V4DF_type_node,
21575 V4DF_type_node, integer_type_node,
21577 tree v4sf_ftype_v8sf_int
21578 = build_function_type_list (V4SF_type_node,
21579 V8SF_type_node, integer_type_node,
21581 tree v2df_ftype_v4df_int
21582 = build_function_type_list (V2DF_type_node,
21583 V4DF_type_node, integer_type_node,
21585 tree v8sf_ftype_v8sf_v8sf_int
21586 = build_function_type_list (V8SF_type_node,
21587 V8SF_type_node, V8SF_type_node,
21590 tree v8sf_ftype_v8sf_v8sf_v8sf
21591 = build_function_type_list (V8SF_type_node,
21592 V8SF_type_node, V8SF_type_node,
21595 tree v4df_ftype_v4df_v4df_v4df
21596 = build_function_type_list (V4DF_type_node,
21597 V4DF_type_node, V4DF_type_node,
21600 tree v8si_ftype_v8si_v8si_int
21601 = build_function_type_list (V8SI_type_node,
21602 V8SI_type_node, V8SI_type_node,
21605 tree v4df_ftype_v4df_v4df_int
21606 = build_function_type_list (V4DF_type_node,
21607 V4DF_type_node, V4DF_type_node,
21610 tree v8sf_ftype_v8sf_v8sf_v8si_int
21611 = build_function_type_list (V8SF_type_node,
21612 V8SF_type_node, V8SF_type_node,
21613 V8SI_type_node, integer_type_node,
21615 tree v4df_ftype_v4df_v4df_v4di_int
21616 = build_function_type_list (V4DF_type_node,
21617 V4DF_type_node, V4DF_type_node,
21618 V4DI_type_node, integer_type_node,
21620 tree v4sf_ftype_v4sf_v4sf_v4si_int
21621 = build_function_type_list (V4SF_type_node,
21622 V4SF_type_node, V4SF_type_node,
21623 V4SI_type_node, integer_type_node,
21625 tree v2df_ftype_v2df_v2df_v2di_int
21626 = build_function_type_list (V2DF_type_node,
21627 V2DF_type_node, V2DF_type_node,
21628 V2DI_type_node, integer_type_node,
21630 tree v8sf_ftype_pcfloat
21631 = build_function_type_list (V8SF_type_node,
21634 tree v4df_ftype_pcdouble
21635 = build_function_type_list (V4DF_type_node,
21636 pcdouble_type_node,
21638 tree pcv4sf_type_node
21639 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
21640 tree pcv2df_type_node
21641 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
21642 tree v8sf_ftype_pcv4sf
21643 = build_function_type_list (V8SF_type_node,
21646 tree v4df_ftype_pcv2df
21647 = build_function_type_list (V4DF_type_node,
21650 tree v32qi_ftype_pcchar
21651 = build_function_type_list (V32QI_type_node,
21654 tree void_ftype_pchar_v32qi
21655 = build_function_type_list (void_type_node,
21656 pchar_type_node, V32QI_type_node,
21658 tree v8si_ftype_v8si_v4si_int
21659 = build_function_type_list (V8SI_type_node,
21660 V8SI_type_node, V4SI_type_node,
21663 tree v8sf_ftype_v8sf_v4sf_int
21664 = build_function_type_list (V8SF_type_node,
21665 V8SF_type_node, V4SF_type_node,
21668 tree v4df_ftype_v4df_v2df_int
21669 = build_function_type_list (V4DF_type_node,
21670 V4DF_type_node, V2DF_type_node,
21673 tree void_ftype_pfloat_v8sf
21674 = build_function_type_list (void_type_node,
21675 pfloat_type_node, V8SF_type_node,
21677 tree void_ftype_pdouble_v4df
21678 = build_function_type_list (void_type_node,
21679 pdouble_type_node, V4DF_type_node,
21681 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
21682 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
21683 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
21684 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
21685 tree pcv8sf_type_node
21686 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
21687 tree pcv4df_type_node
21688 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
21689 tree v8sf_ftype_pcv8sf_v8sf
21690 = build_function_type_list (V8SF_type_node,
21691 pcv8sf_type_node, V8SF_type_node,
21693 tree v4df_ftype_pcv4df_v4df
21694 = build_function_type_list (V4DF_type_node,
21695 pcv4df_type_node, V4DF_type_node,
21697 tree v4sf_ftype_pcv4sf_v4sf
21698 = build_function_type_list (V4SF_type_node,
21699 pcv4sf_type_node, V4SF_type_node,
21701 tree v2df_ftype_pcv2df_v2df
21702 = build_function_type_list (V2DF_type_node,
21703 pcv2df_type_node, V2DF_type_node,
21705 tree void_ftype_pv8sf_v8sf_v8sf
21706 = build_function_type_list (void_type_node,
21707 pv8sf_type_node, V8SF_type_node,
21710 tree void_ftype_pv4df_v4df_v4df
21711 = build_function_type_list (void_type_node,
21712 pv4df_type_node, V4DF_type_node,
21715 tree void_ftype_pv4sf_v4sf_v4sf
21716 = build_function_type_list (void_type_node,
21717 pv4sf_type_node, V4SF_type_node,
21720 tree void_ftype_pv2df_v2df_v2df
21721 = build_function_type_list (void_type_node,
21722 pv2df_type_node, V2DF_type_node,
21725 tree v4df_ftype_v2df
21726 = build_function_type_list (V4DF_type_node,
21729 tree v8sf_ftype_v4sf
21730 = build_function_type_list (V8SF_type_node,
21733 tree v8si_ftype_v4si
21734 = build_function_type_list (V8SI_type_node,
21737 tree v2df_ftype_v4df
21738 = build_function_type_list (V2DF_type_node,
21741 tree v4sf_ftype_v8sf
21742 = build_function_type_list (V4SF_type_node,
21745 tree v4si_ftype_v8si
21746 = build_function_type_list (V4SI_type_node,
21749 tree int_ftype_v4df
21750 = build_function_type_list (integer_type_node,
21753 tree int_ftype_v8sf
21754 = build_function_type_list (integer_type_node,
21757 tree int_ftype_v8sf_v8sf
21758 = build_function_type_list (integer_type_node,
21759 V8SF_type_node, V8SF_type_node,
21761 tree int_ftype_v4di_v4di
21762 = build_function_type_list (integer_type_node,
21763 V4DI_type_node, V4DI_type_node,
21765 tree int_ftype_v4df_v4df
21766 = build_function_type_list (integer_type_node,
21767 V4DF_type_node, V4DF_type_node,
21769 tree v8sf_ftype_v8sf_v8si
21770 = build_function_type_list (V8SF_type_node,
21771 V8SF_type_node, V8SI_type_node,
21773 tree v4df_ftype_v4df_v4di
21774 = build_function_type_list (V4DF_type_node,
21775 V4DF_type_node, V4DI_type_node,
21777 tree v4sf_ftype_v4sf_v4si
21778 = build_function_type_list (V4SF_type_node,
21779 V4SF_type_node, V4SI_type_node, NULL_TREE);
21780 tree v2df_ftype_v2df_v2di
21781 = build_function_type_list (V2DF_type_node,
21782 V2DF_type_node, V2DI_type_node, NULL_TREE);
21786 /* Add all special builtins with variable number of operands. */
21787 for (i = 0, d = bdesc_special_args;
21788 i < ARRAY_SIZE (bdesc_special_args);
21796 switch ((enum ix86_special_builtin_type) d->flag)
21798 case VOID_FTYPE_VOID:
21799 type = void_ftype_void;
21801 case V32QI_FTYPE_PCCHAR:
21802 type = v32qi_ftype_pcchar;
21804 case V16QI_FTYPE_PCCHAR:
21805 type = v16qi_ftype_pcchar;
21807 case V8SF_FTYPE_PCV4SF:
21808 type = v8sf_ftype_pcv4sf;
21810 case V8SF_FTYPE_PCFLOAT:
21811 type = v8sf_ftype_pcfloat;
21813 case V4DF_FTYPE_PCV2DF:
21814 type = v4df_ftype_pcv2df;
21816 case V4DF_FTYPE_PCDOUBLE:
21817 type = v4df_ftype_pcdouble;
21819 case V4SF_FTYPE_PCFLOAT:
21820 type = v4sf_ftype_pcfloat;
21822 case V2DI_FTYPE_PV2DI:
21823 type = v2di_ftype_pv2di;
21825 case V2DF_FTYPE_PCDOUBLE:
21826 type = v2df_ftype_pcdouble;
21828 case V8SF_FTYPE_PCV8SF_V8SF:
21829 type = v8sf_ftype_pcv8sf_v8sf;
21831 case V4DF_FTYPE_PCV4DF_V4DF:
21832 type = v4df_ftype_pcv4df_v4df;
21834 case V4SF_FTYPE_V4SF_PCV2SF:
21835 type = v4sf_ftype_v4sf_pcv2sf;
21837 case V4SF_FTYPE_PCV4SF_V4SF:
21838 type = v4sf_ftype_pcv4sf_v4sf;
21840 case V2DF_FTYPE_V2DF_PCDOUBLE:
21841 type = v2df_ftype_v2df_pcdouble;
21843 case V2DF_FTYPE_PCV2DF_V2DF:
21844 type = v2df_ftype_pcv2df_v2df;
21846 case VOID_FTYPE_PV2SF_V4SF:
21847 type = void_ftype_pv2sf_v4sf;
21849 case VOID_FTYPE_PV2DI_V2DI:
21850 type = void_ftype_pv2di_v2di;
21852 case VOID_FTYPE_PCHAR_V32QI:
21853 type = void_ftype_pchar_v32qi;
21855 case VOID_FTYPE_PCHAR_V16QI:
21856 type = void_ftype_pchar_v16qi;
21858 case VOID_FTYPE_PFLOAT_V8SF:
21859 type = void_ftype_pfloat_v8sf;
21861 case VOID_FTYPE_PFLOAT_V4SF:
21862 type = void_ftype_pfloat_v4sf;
21864 case VOID_FTYPE_PDOUBLE_V4DF:
21865 type = void_ftype_pdouble_v4df;
21867 case VOID_FTYPE_PDOUBLE_V2DF:
21868 type = void_ftype_pdouble_v2df;
21870 case VOID_FTYPE_PDI_DI:
21871 type = void_ftype_pdi_di;
21873 case VOID_FTYPE_PINT_INT:
21874 type = void_ftype_pint_int;
21876 case VOID_FTYPE_PV8SF_V8SF_V8SF:
21877 type = void_ftype_pv8sf_v8sf_v8sf;
21879 case VOID_FTYPE_PV4DF_V4DF_V4DF:
21880 type = void_ftype_pv4df_v4df_v4df;
21882 case VOID_FTYPE_PV4SF_V4SF_V4SF:
21883 type = void_ftype_pv4sf_v4sf_v4sf;
21885 case VOID_FTYPE_PV2DF_V2DF_V2DF:
21886 type = void_ftype_pv2df_v2df_v2df;
21889 gcc_unreachable ();
21892 def_builtin (d->mask, d->name, type, d->code);
21895 /* Add all builtins with variable number of operands. */
21896 for (i = 0, d = bdesc_args;
21897 i < ARRAY_SIZE (bdesc_args);
21905 switch ((enum ix86_builtin_type) d->flag)
21907 case FLOAT_FTYPE_FLOAT:
21908 type = float_ftype_float;
21910 case INT_FTYPE_V8SF_V8SF_PTEST:
21911 type = int_ftype_v8sf_v8sf;
21913 case INT_FTYPE_V4DI_V4DI_PTEST:
21914 type = int_ftype_v4di_v4di;
21916 case INT_FTYPE_V4DF_V4DF_PTEST:
21917 type = int_ftype_v4df_v4df;
21919 case INT_FTYPE_V4SF_V4SF_PTEST:
21920 type = int_ftype_v4sf_v4sf;
21922 case INT_FTYPE_V2DI_V2DI_PTEST:
21923 type = int_ftype_v2di_v2di;
21925 case INT_FTYPE_V2DF_V2DF_PTEST:
21926 type = int_ftype_v2df_v2df;
21928 case INT64_FTYPE_V4SF:
21929 type = int64_ftype_v4sf;
21931 case INT64_FTYPE_V2DF:
21932 type = int64_ftype_v2df;
21934 case INT_FTYPE_V16QI:
21935 type = int_ftype_v16qi;
21937 case INT_FTYPE_V8QI:
21938 type = int_ftype_v8qi;
21940 case INT_FTYPE_V8SF:
21941 type = int_ftype_v8sf;
21943 case INT_FTYPE_V4DF:
21944 type = int_ftype_v4df;
21946 case INT_FTYPE_V4SF:
21947 type = int_ftype_v4sf;
21949 case INT_FTYPE_V2DF:
21950 type = int_ftype_v2df;
21952 case V16QI_FTYPE_V16QI:
21953 type = v16qi_ftype_v16qi;
21955 case V8SI_FTYPE_V8SF:
21956 type = v8si_ftype_v8sf;
21958 case V8SI_FTYPE_V4SI:
21959 type = v8si_ftype_v4si;
21961 case V8HI_FTYPE_V8HI:
21962 type = v8hi_ftype_v8hi;
21964 case V8HI_FTYPE_V16QI:
21965 type = v8hi_ftype_v16qi;
21967 case V8QI_FTYPE_V8QI:
21968 type = v8qi_ftype_v8qi;
21970 case V8SF_FTYPE_V8SF:
21971 type = v8sf_ftype_v8sf;
21973 case V8SF_FTYPE_V8SI:
21974 type = v8sf_ftype_v8si;
21976 case V8SF_FTYPE_V4SF:
21977 type = v8sf_ftype_v4sf;
21979 case V4SI_FTYPE_V4DF:
21980 type = v4si_ftype_v4df;
21982 case V4SI_FTYPE_V4SI:
21983 type = v4si_ftype_v4si;
21985 case V4SI_FTYPE_V16QI:
21986 type = v4si_ftype_v16qi;
21988 case V4SI_FTYPE_V8SI:
21989 type = v4si_ftype_v8si;
21991 case V4SI_FTYPE_V8HI:
21992 type = v4si_ftype_v8hi;
21994 case V4SI_FTYPE_V4SF:
21995 type = v4si_ftype_v4sf;
21997 case V4SI_FTYPE_V2DF:
21998 type = v4si_ftype_v2df;
22000 case V4HI_FTYPE_V4HI:
22001 type = v4hi_ftype_v4hi;
22003 case V4DF_FTYPE_V4DF:
22004 type = v4df_ftype_v4df;
22006 case V4DF_FTYPE_V4SI:
22007 type = v4df_ftype_v4si;
22009 case V4DF_FTYPE_V4SF:
22010 type = v4df_ftype_v4sf;
22012 case V4DF_FTYPE_V2DF:
22013 type = v4df_ftype_v2df;
22015 case V4SF_FTYPE_V4SF:
22016 case V4SF_FTYPE_V4SF_VEC_MERGE:
22017 type = v4sf_ftype_v4sf;
22019 case V4SF_FTYPE_V8SF:
22020 type = v4sf_ftype_v8sf;
22022 case V4SF_FTYPE_V4SI:
22023 type = v4sf_ftype_v4si;
22025 case V4SF_FTYPE_V4DF:
22026 type = v4sf_ftype_v4df;
22028 case V4SF_FTYPE_V2DF:
22029 type = v4sf_ftype_v2df;
22031 case V2DI_FTYPE_V2DI:
22032 type = v2di_ftype_v2di;
22034 case V2DI_FTYPE_V16QI:
22035 type = v2di_ftype_v16qi;
22037 case V2DI_FTYPE_V8HI:
22038 type = v2di_ftype_v8hi;
22040 case V2DI_FTYPE_V4SI:
22041 type = v2di_ftype_v4si;
22043 case V2SI_FTYPE_V2SI:
22044 type = v2si_ftype_v2si;
22046 case V2SI_FTYPE_V4SF:
22047 type = v2si_ftype_v4sf;
22049 case V2SI_FTYPE_V2DF:
22050 type = v2si_ftype_v2df;
22052 case V2SI_FTYPE_V2SF:
22053 type = v2si_ftype_v2sf;
22055 case V2DF_FTYPE_V4DF:
22056 type = v2df_ftype_v4df;
22058 case V2DF_FTYPE_V4SF:
22059 type = v2df_ftype_v4sf;
22061 case V2DF_FTYPE_V2DF:
22062 case V2DF_FTYPE_V2DF_VEC_MERGE:
22063 type = v2df_ftype_v2df;
22065 case V2DF_FTYPE_V2SI:
22066 type = v2df_ftype_v2si;
22068 case V2DF_FTYPE_V4SI:
22069 type = v2df_ftype_v4si;
22071 case V2SF_FTYPE_V2SF:
22072 type = v2sf_ftype_v2sf;
22074 case V2SF_FTYPE_V2SI:
22075 type = v2sf_ftype_v2si;
22077 case V16QI_FTYPE_V16QI_V16QI:
22078 type = v16qi_ftype_v16qi_v16qi;
22080 case V16QI_FTYPE_V8HI_V8HI:
22081 type = v16qi_ftype_v8hi_v8hi;
22083 case V8QI_FTYPE_V8QI_V8QI:
22084 type = v8qi_ftype_v8qi_v8qi;
22086 case V8QI_FTYPE_V4HI_V4HI:
22087 type = v8qi_ftype_v4hi_v4hi;
22089 case V8HI_FTYPE_V8HI_V8HI:
22090 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22091 type = v8hi_ftype_v8hi_v8hi;
22093 case V8HI_FTYPE_V16QI_V16QI:
22094 type = v8hi_ftype_v16qi_v16qi;
22096 case V8HI_FTYPE_V4SI_V4SI:
22097 type = v8hi_ftype_v4si_v4si;
22099 case V8HI_FTYPE_V8HI_SI_COUNT:
22100 type = v8hi_ftype_v8hi_int;
22102 case V8SF_FTYPE_V8SF_V8SF:
22103 type = v8sf_ftype_v8sf_v8sf;
22105 case V8SF_FTYPE_V8SF_V8SI:
22106 type = v8sf_ftype_v8sf_v8si;
22108 case V4SI_FTYPE_V4SI_V4SI:
22109 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22110 type = v4si_ftype_v4si_v4si;
22112 case V4SI_FTYPE_V8HI_V8HI:
22113 type = v4si_ftype_v8hi_v8hi;
22115 case V4SI_FTYPE_V4SF_V4SF:
22116 type = v4si_ftype_v4sf_v4sf;
22118 case V4SI_FTYPE_V2DF_V2DF:
22119 type = v4si_ftype_v2df_v2df;
22121 case V4SI_FTYPE_V4SI_SI_COUNT:
22122 type = v4si_ftype_v4si_int;
22124 case V4HI_FTYPE_V4HI_V4HI:
22125 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22126 type = v4hi_ftype_v4hi_v4hi;
22128 case V4HI_FTYPE_V8QI_V8QI:
22129 type = v4hi_ftype_v8qi_v8qi;
22131 case V4HI_FTYPE_V2SI_V2SI:
22132 type = v4hi_ftype_v2si_v2si;
22134 case V4HI_FTYPE_V4HI_SI_COUNT:
22135 type = v4hi_ftype_v4hi_int;
22137 case V4DF_FTYPE_V4DF_V4DF:
22138 type = v4df_ftype_v4df_v4df;
22140 case V4DF_FTYPE_V4DF_V4DI:
22141 type = v4df_ftype_v4df_v4di;
22143 case V4SF_FTYPE_V4SF_V4SF:
22144 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22145 type = v4sf_ftype_v4sf_v4sf;
22147 case V4SF_FTYPE_V4SF_V4SI:
22148 type = v4sf_ftype_v4sf_v4si;
22150 case V4SF_FTYPE_V4SF_V2SI:
22151 type = v4sf_ftype_v4sf_v2si;
22153 case V4SF_FTYPE_V4SF_V2DF:
22154 type = v4sf_ftype_v4sf_v2df;
22156 case V4SF_FTYPE_V4SF_DI:
22157 type = v4sf_ftype_v4sf_int64;
22159 case V4SF_FTYPE_V4SF_SI:
22160 type = v4sf_ftype_v4sf_int;
22162 case V2DI_FTYPE_V2DI_V2DI:
22163 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22164 type = v2di_ftype_v2di_v2di;
22166 case V2DI_FTYPE_V16QI_V16QI:
22167 type = v2di_ftype_v16qi_v16qi;
22169 case V2DI_FTYPE_V4SI_V4SI:
22170 type = v2di_ftype_v4si_v4si;
22172 case V2DI_FTYPE_V2DI_V16QI:
22173 type = v2di_ftype_v2di_v16qi;
22175 case V2DI_FTYPE_V2DF_V2DF:
22176 type = v2di_ftype_v2df_v2df;
22178 case V2DI_FTYPE_V2DI_SI_COUNT:
22179 type = v2di_ftype_v2di_int;
22181 case V2SI_FTYPE_V2SI_V2SI:
22182 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22183 type = v2si_ftype_v2si_v2si;
22185 case V2SI_FTYPE_V4HI_V4HI:
22186 type = v2si_ftype_v4hi_v4hi;
22188 case V2SI_FTYPE_V2SF_V2SF:
22189 type = v2si_ftype_v2sf_v2sf;
22191 case V2SI_FTYPE_V2SI_SI_COUNT:
22192 type = v2si_ftype_v2si_int;
22194 case V2DF_FTYPE_V2DF_V2DF:
22195 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22196 type = v2df_ftype_v2df_v2df;
22198 case V2DF_FTYPE_V2DF_V4SF:
22199 type = v2df_ftype_v2df_v4sf;
22201 case V2DF_FTYPE_V2DF_V2DI:
22202 type = v2df_ftype_v2df_v2di;
22204 case V2DF_FTYPE_V2DF_DI:
22205 type = v2df_ftype_v2df_int64;
22207 case V2DF_FTYPE_V2DF_SI:
22208 type = v2df_ftype_v2df_int;
22210 case V2SF_FTYPE_V2SF_V2SF:
22211 type = v2sf_ftype_v2sf_v2sf;
22213 case V1DI_FTYPE_V1DI_V1DI:
22214 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22215 type = v1di_ftype_v1di_v1di;
22217 case V1DI_FTYPE_V8QI_V8QI:
22218 type = v1di_ftype_v8qi_v8qi;
22220 case V1DI_FTYPE_V2SI_V2SI:
22221 type = v1di_ftype_v2si_v2si;
22223 case V1DI_FTYPE_V1DI_SI_COUNT:
22224 type = v1di_ftype_v1di_int;
22226 case UINT64_FTYPE_UINT64_UINT64:
22227 type = uint64_ftype_uint64_uint64;
22229 case UINT_FTYPE_UINT_UINT:
22230 type = unsigned_ftype_unsigned_unsigned;
22232 case UINT_FTYPE_UINT_USHORT:
22233 type = unsigned_ftype_unsigned_ushort;
22235 case UINT_FTYPE_UINT_UCHAR:
22236 type = unsigned_ftype_unsigned_uchar;
22238 case V8HI_FTYPE_V8HI_INT:
22239 type = v8hi_ftype_v8hi_int;
22241 case V8SF_FTYPE_V8SF_INT:
22242 type = v8sf_ftype_v8sf_int;
22244 case V4SI_FTYPE_V4SI_INT:
22245 type = v4si_ftype_v4si_int;
22247 case V4SI_FTYPE_V8SI_INT:
22248 type = v4si_ftype_v8si_int;
22250 case V4HI_FTYPE_V4HI_INT:
22251 type = v4hi_ftype_v4hi_int;
22253 case V4DF_FTYPE_V4DF_INT:
22254 type = v4df_ftype_v4df_int;
22256 case V4SF_FTYPE_V4SF_INT:
22257 type = v4sf_ftype_v4sf_int;
22259 case V4SF_FTYPE_V8SF_INT:
22260 type = v4sf_ftype_v8sf_int;
22262 case V2DI_FTYPE_V2DI_INT:
22263 case V2DI2TI_FTYPE_V2DI_INT:
22264 type = v2di_ftype_v2di_int;
22266 case V2DF_FTYPE_V2DF_INT:
22267 type = v2df_ftype_v2df_int;
22269 case V2DF_FTYPE_V4DF_INT:
22270 type = v2df_ftype_v4df_int;
22272 case V16QI_FTYPE_V16QI_V16QI_V16QI:
22273 type = v16qi_ftype_v16qi_v16qi_v16qi;
22275 case V8SF_FTYPE_V8SF_V8SF_V8SF:
22276 type = v8sf_ftype_v8sf_v8sf_v8sf;
22278 case V4DF_FTYPE_V4DF_V4DF_V4DF:
22279 type = v4df_ftype_v4df_v4df_v4df;
22281 case V4SF_FTYPE_V4SF_V4SF_V4SF:
22282 type = v4sf_ftype_v4sf_v4sf_v4sf;
22284 case V2DF_FTYPE_V2DF_V2DF_V2DF:
22285 type = v2df_ftype_v2df_v2df_v2df;
22287 case V16QI_FTYPE_V16QI_V16QI_INT:
22288 type = v16qi_ftype_v16qi_v16qi_int;
22290 case V8SI_FTYPE_V8SI_V8SI_INT:
22291 type = v8si_ftype_v8si_v8si_int;
22293 case V8SI_FTYPE_V8SI_V4SI_INT:
22294 type = v8si_ftype_v8si_v4si_int;
22296 case V8HI_FTYPE_V8HI_V8HI_INT:
22297 type = v8hi_ftype_v8hi_v8hi_int;
22299 case V8SF_FTYPE_V8SF_V8SF_INT:
22300 type = v8sf_ftype_v8sf_v8sf_int;
22302 case V8SF_FTYPE_V8SF_V4SF_INT:
22303 type = v8sf_ftype_v8sf_v4sf_int;
22305 case V4SI_FTYPE_V4SI_V4SI_INT:
22306 type = v4si_ftype_v4si_v4si_int;
22308 case V4DF_FTYPE_V4DF_V4DF_INT:
22309 type = v4df_ftype_v4df_v4df_int;
22311 case V4DF_FTYPE_V4DF_V2DF_INT:
22312 type = v4df_ftype_v4df_v2df_int;
22314 case V4SF_FTYPE_V4SF_V4SF_INT:
22315 type = v4sf_ftype_v4sf_v4sf_int;
22317 case V2DI_FTYPE_V2DI_V2DI_INT:
22318 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
22319 type = v2di_ftype_v2di_v2di_int;
22321 case V2DF_FTYPE_V2DF_V2DF_INT:
22322 type = v2df_ftype_v2df_v2df_int;
22324 case V2DI_FTYPE_V2DI_UINT_UINT:
22325 type = v2di_ftype_v2di_unsigned_unsigned;
22327 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
22328 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
22330 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
22331 type = v1di_ftype_v1di_v1di_int;
22333 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
22334 type = v8sf_ftype_v8sf_v8sf_v8si_int;
22336 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
22337 type = v4df_ftype_v4df_v4df_v4di_int;
22339 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
22340 type = v4sf_ftype_v4sf_v4sf_v4si_int;
22342 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
22343 type = v2df_ftype_v2df_v2df_v2di_int;
22346 gcc_unreachable ();
22349 def_builtin_const (d->mask, d->name, type, d->code);
22352 /* pcmpestr[im] insns. */
22353 for (i = 0, d = bdesc_pcmpestr;
22354 i < ARRAY_SIZE (bdesc_pcmpestr);
22357 if (d->code == IX86_BUILTIN_PCMPESTRM128)
22358 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
22360 ftype = int_ftype_v16qi_int_v16qi_int_int;
22361 def_builtin_const (d->mask, d->name, ftype, d->code);
22364 /* pcmpistr[im] insns. */
22365 for (i = 0, d = bdesc_pcmpistr;
22366 i < ARRAY_SIZE (bdesc_pcmpistr);
22369 if (d->code == IX86_BUILTIN_PCMPISTRM128)
22370 ftype = v16qi_ftype_v16qi_v16qi_int;
22372 ftype = int_ftype_v16qi_v16qi_int;
22373 def_builtin_const (d->mask, d->name, ftype, d->code);
22376 /* comi/ucomi insns. */
22377 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22378 if (d->mask == OPTION_MASK_ISA_SSE2)
22379 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
22381 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
22384 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
22385 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
22387 /* SSE or 3DNow!A */
22388 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
22391 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
22393 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
22394 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
22397 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
22398 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
22401 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
22402 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
22403 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
22404 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
22405 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
22406 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
22409 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
22412 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
22413 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
22415 /* Access to the vec_init patterns. */
22416 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
22417 integer_type_node, NULL_TREE);
22418 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
22420 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
22421 short_integer_type_node,
22422 short_integer_type_node,
22423 short_integer_type_node, NULL_TREE);
22424 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
22426 ftype = build_function_type_list (V8QI_type_node, char_type_node,
22427 char_type_node, char_type_node,
22428 char_type_node, char_type_node,
22429 char_type_node, char_type_node,
22430 char_type_node, NULL_TREE);
22431 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
22433 /* Access to the vec_extract patterns. */
22434 ftype = build_function_type_list (double_type_node, V2DF_type_node,
22435 integer_type_node, NULL_TREE);
22436 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
22438 ftype = build_function_type_list (long_long_integer_type_node,
22439 V2DI_type_node, integer_type_node,
22441 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
22443 ftype = build_function_type_list (float_type_node, V4SF_type_node,
22444 integer_type_node, NULL_TREE);
22445 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
22447 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
22448 integer_type_node, NULL_TREE);
22449 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
22451 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
22452 integer_type_node, NULL_TREE);
22453 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
22455 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
22456 integer_type_node, NULL_TREE);
22457 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
22459 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
22460 integer_type_node, NULL_TREE);
22461 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
22463 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
22464 integer_type_node, NULL_TREE);
22465 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
22467 /* Access to the vec_set patterns. */
22468 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
22470 integer_type_node, NULL_TREE);
22471 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
22473 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
22475 integer_type_node, NULL_TREE);
22476 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
22478 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
22480 integer_type_node, NULL_TREE);
22481 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
22483 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
22485 integer_type_node, NULL_TREE);
22486 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
22488 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
22490 integer_type_node, NULL_TREE);
22491 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
22493 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
22495 integer_type_node, NULL_TREE);
22496 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
22498 /* Add SSE5 multi-arg argument instructions */
22499 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
22501 tree mtype = NULL_TREE;
22506 switch ((enum multi_arg_type)d->flag)
22508 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
22509 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
22510 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
22511 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
22512 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
22513 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
22514 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
22515 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
22516 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
22517 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
22518 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
22519 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
22520 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
22521 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
22522 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
22523 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
22524 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
22525 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
22526 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
22527 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
22528 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
22529 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
22530 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
22531 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
22532 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
22533 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
22534 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
22535 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
22536 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
22537 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
22538 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
22539 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
22540 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
22541 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
22542 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
22543 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
22544 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
22545 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
22546 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
22547 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
22548 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
22549 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
22550 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
22551 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
22552 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
22553 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
22554 case MULTI_ARG_UNKNOWN:
22556 gcc_unreachable ();
22560 def_builtin_const (d->mask, d->name, mtype, d->code);
22564 /* Internal method for ix86_init_builtins. */
22567 ix86_init_builtins_va_builtins_abi (void)
22569 tree ms_va_ref, sysv_va_ref;
22570 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
22571 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
22572 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
22573 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
22577 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
22578 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
22579 ms_va_ref = build_reference_type (ms_va_list_type_node);
22581 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
22584 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22585 fnvoid_va_start_ms =
22586 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22587 fnvoid_va_end_sysv =
22588 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
22589 fnvoid_va_start_sysv =
22590 build_varargs_function_type_list (void_type_node, sysv_va_ref,
22592 fnvoid_va_copy_ms =
22593 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
22595 fnvoid_va_copy_sysv =
22596 build_function_type_list (void_type_node, sysv_va_ref,
22597 sysv_va_ref, NULL_TREE);
22599 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
22600 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
22601 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
22602 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
22603 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
22604 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
22605 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
22606 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22607 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
22608 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22609 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
22610 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22614 ix86_init_builtins (void)
22616 tree float128_type_node = make_node (REAL_TYPE);
22619 /* The __float80 type. */
22620 if (TYPE_MODE (long_double_type_node) == XFmode)
22621 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
22625 /* The __float80 type. */
22626 tree float80_type_node = make_node (REAL_TYPE);
22628 TYPE_PRECISION (float80_type_node) = 80;
22629 layout_type (float80_type_node);
22630 (*lang_hooks.types.register_builtin_type) (float80_type_node,
22634 /* The __float128 type. */
22635 TYPE_PRECISION (float128_type_node) = 128;
22636 layout_type (float128_type_node);
22637 (*lang_hooks.types.register_builtin_type) (float128_type_node,
22640 /* TFmode support builtins. */
22641 ftype = build_function_type (float128_type_node, void_list_node);
22642 decl = add_builtin_function ("__builtin_infq", ftype,
22643 IX86_BUILTIN_INFQ, BUILT_IN_MD,
22645 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
22647 /* We will expand them to normal call if SSE2 isn't available since
22648 they are used by libgcc. */
22649 ftype = build_function_type_list (float128_type_node,
22650 float128_type_node,
22652 decl = add_builtin_function ("__builtin_fabsq", ftype,
22653 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
22654 "__fabstf2", NULL_TREE);
22655 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
22656 TREE_READONLY (decl) = 1;
22658 ftype = build_function_type_list (float128_type_node,
22659 float128_type_node,
22660 float128_type_node,
22662 decl = add_builtin_function ("__builtin_copysignq", ftype,
22663 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
22664 "__copysigntf3", NULL_TREE);
22665 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
22666 TREE_READONLY (decl) = 1;
22668 ix86_init_mmx_sse_builtins ();
22670 ix86_init_builtins_va_builtins_abi ();
22673 /* Errors in the source file can cause expand_expr to return const0_rtx
22674 where we expect a vector. To avoid crashing, use one of the vector
22675 clear instructions. */
22677 safe_vector_operand (rtx x, enum machine_mode mode)
22679 if (x == const0_rtx)
22680 x = CONST0_RTX (mode);
22684 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
22687 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
22690 tree arg0 = CALL_EXPR_ARG (exp, 0);
22691 tree arg1 = CALL_EXPR_ARG (exp, 1);
22692 rtx op0 = expand_normal (arg0);
22693 rtx op1 = expand_normal (arg1);
22694 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22695 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22696 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22698 if (VECTOR_MODE_P (mode0))
22699 op0 = safe_vector_operand (op0, mode0);
22700 if (VECTOR_MODE_P (mode1))
22701 op1 = safe_vector_operand (op1, mode1);
22703 if (optimize || !target
22704 || GET_MODE (target) != tmode
22705 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22706 target = gen_reg_rtx (tmode);
22708 if (GET_MODE (op1) == SImode && mode1 == TImode)
22710 rtx x = gen_reg_rtx (V4SImode);
22711 emit_insn (gen_sse2_loadd (x, op1));
22712 op1 = gen_lowpart (TImode, x);
22715 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22716 op0 = copy_to_mode_reg (mode0, op0);
22717 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22718 op1 = copy_to_mode_reg (mode1, op1);
22720 pat = GEN_FCN (icode) (target, op0, op1);
22729 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
22732 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
22733 enum multi_arg_type m_type,
22734 enum insn_code sub_code)
22739 bool comparison_p = false;
22741 bool last_arg_constant = false;
22742 int num_memory = 0;
22745 enum machine_mode mode;
22748 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22752 case MULTI_ARG_3_SF:
22753 case MULTI_ARG_3_DF:
22754 case MULTI_ARG_3_DI:
22755 case MULTI_ARG_3_SI:
22756 case MULTI_ARG_3_SI_DI:
22757 case MULTI_ARG_3_HI:
22758 case MULTI_ARG_3_HI_SI:
22759 case MULTI_ARG_3_QI:
22760 case MULTI_ARG_3_PERMPS:
22761 case MULTI_ARG_3_PERMPD:
22765 case MULTI_ARG_2_SF:
22766 case MULTI_ARG_2_DF:
22767 case MULTI_ARG_2_DI:
22768 case MULTI_ARG_2_SI:
22769 case MULTI_ARG_2_HI:
22770 case MULTI_ARG_2_QI:
22774 case MULTI_ARG_2_DI_IMM:
22775 case MULTI_ARG_2_SI_IMM:
22776 case MULTI_ARG_2_HI_IMM:
22777 case MULTI_ARG_2_QI_IMM:
22779 last_arg_constant = true;
22782 case MULTI_ARG_1_SF:
22783 case MULTI_ARG_1_DF:
22784 case MULTI_ARG_1_DI:
22785 case MULTI_ARG_1_SI:
22786 case MULTI_ARG_1_HI:
22787 case MULTI_ARG_1_QI:
22788 case MULTI_ARG_1_SI_DI:
22789 case MULTI_ARG_1_HI_DI:
22790 case MULTI_ARG_1_HI_SI:
22791 case MULTI_ARG_1_QI_DI:
22792 case MULTI_ARG_1_QI_SI:
22793 case MULTI_ARG_1_QI_HI:
22794 case MULTI_ARG_1_PH2PS:
22795 case MULTI_ARG_1_PS2PH:
22799 case MULTI_ARG_2_SF_CMP:
22800 case MULTI_ARG_2_DF_CMP:
22801 case MULTI_ARG_2_DI_CMP:
22802 case MULTI_ARG_2_SI_CMP:
22803 case MULTI_ARG_2_HI_CMP:
22804 case MULTI_ARG_2_QI_CMP:
22806 comparison_p = true;
22809 case MULTI_ARG_2_SF_TF:
22810 case MULTI_ARG_2_DF_TF:
22811 case MULTI_ARG_2_DI_TF:
22812 case MULTI_ARG_2_SI_TF:
22813 case MULTI_ARG_2_HI_TF:
22814 case MULTI_ARG_2_QI_TF:
22819 case MULTI_ARG_UNKNOWN:
22821 gcc_unreachable ();
22824 if (optimize || !target
22825 || GET_MODE (target) != tmode
22826 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22827 target = gen_reg_rtx (tmode);
22829 gcc_assert (nargs <= 4);
22831 for (i = 0; i < nargs; i++)
22833 tree arg = CALL_EXPR_ARG (exp, i);
22834 rtx op = expand_normal (arg);
22835 int adjust = (comparison_p) ? 1 : 0;
22836 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
22838 if (last_arg_constant && i == nargs-1)
22840 if (GET_CODE (op) != CONST_INT)
22842 error ("last argument must be an immediate");
22843 return gen_reg_rtx (tmode);
22848 if (VECTOR_MODE_P (mode))
22849 op = safe_vector_operand (op, mode);
22851 /* If we aren't optimizing, only allow one memory operand to be
22853 if (memory_operand (op, mode))
22856 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
22859 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
22861 op = force_reg (mode, op);
22865 args[i].mode = mode;
22871 pat = GEN_FCN (icode) (target, args[0].op);
22876 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
22877 GEN_INT ((int)sub_code));
22878 else if (! comparison_p)
22879 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
22882 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
22886 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
22891 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
22895 gcc_unreachable ();
22905 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
22906 insns with vec_merge. */
22909 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
22913 tree arg0 = CALL_EXPR_ARG (exp, 0);
22914 rtx op1, op0 = expand_normal (arg0);
22915 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22916 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22918 if (optimize || !target
22919 || GET_MODE (target) != tmode
22920 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22921 target = gen_reg_rtx (tmode);
22923 if (VECTOR_MODE_P (mode0))
22924 op0 = safe_vector_operand (op0, mode0);
22926 if ((optimize && !register_operand (op0, mode0))
22927 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22928 op0 = copy_to_mode_reg (mode0, op0);
22931 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
22932 op1 = copy_to_mode_reg (mode0, op1);
22934 pat = GEN_FCN (icode) (target, op0, op1);
22941 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
22944 ix86_expand_sse_compare (const struct builtin_description *d,
22945 tree exp, rtx target, bool swap)
22948 tree arg0 = CALL_EXPR_ARG (exp, 0);
22949 tree arg1 = CALL_EXPR_ARG (exp, 1);
22950 rtx op0 = expand_normal (arg0);
22951 rtx op1 = expand_normal (arg1);
22953 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
22954 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
22955 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
22956 enum rtx_code comparison = d->comparison;
22958 if (VECTOR_MODE_P (mode0))
22959 op0 = safe_vector_operand (op0, mode0);
22960 if (VECTOR_MODE_P (mode1))
22961 op1 = safe_vector_operand (op1, mode1);
22963 /* Swap operands if we have a comparison that isn't available in
22967 rtx tmp = gen_reg_rtx (mode1);
22968 emit_move_insn (tmp, op1);
22973 if (optimize || !target
22974 || GET_MODE (target) != tmode
22975 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
22976 target = gen_reg_rtx (tmode);
22978 if ((optimize && !register_operand (op0, mode0))
22979 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
22980 op0 = copy_to_mode_reg (mode0, op0);
22981 if ((optimize && !register_operand (op1, mode1))
22982 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
22983 op1 = copy_to_mode_reg (mode1, op1);
22985 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
22986 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
22993 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
22996 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23000 tree arg0 = CALL_EXPR_ARG (exp, 0);
23001 tree arg1 = CALL_EXPR_ARG (exp, 1);
23002 rtx op0 = expand_normal (arg0);
23003 rtx op1 = expand_normal (arg1);
23004 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23005 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23006 enum rtx_code comparison = d->comparison;
23008 if (VECTOR_MODE_P (mode0))
23009 op0 = safe_vector_operand (op0, mode0);
23010 if (VECTOR_MODE_P (mode1))
23011 op1 = safe_vector_operand (op1, mode1);
23013 /* Swap operands if we have a comparison that isn't available in
23015 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23022 target = gen_reg_rtx (SImode);
23023 emit_move_insn (target, const0_rtx);
23024 target = gen_rtx_SUBREG (QImode, target, 0);
23026 if ((optimize && !register_operand (op0, mode0))
23027 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23028 op0 = copy_to_mode_reg (mode0, op0);
23029 if ((optimize && !register_operand (op1, mode1))
23030 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23031 op1 = copy_to_mode_reg (mode1, op1);
23033 pat = GEN_FCN (d->icode) (op0, op1);
23037 emit_insn (gen_rtx_SET (VOIDmode,
23038 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23039 gen_rtx_fmt_ee (comparison, QImode,
23043 return SUBREG_REG (target);
23046 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
23049 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23053 tree arg0 = CALL_EXPR_ARG (exp, 0);
23054 tree arg1 = CALL_EXPR_ARG (exp, 1);
23055 rtx op0 = expand_normal (arg0);
23056 rtx op1 = expand_normal (arg1);
23057 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23058 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23059 enum rtx_code comparison = d->comparison;
23061 if (VECTOR_MODE_P (mode0))
23062 op0 = safe_vector_operand (op0, mode0);
23063 if (VECTOR_MODE_P (mode1))
23064 op1 = safe_vector_operand (op1, mode1);
23066 target = gen_reg_rtx (SImode);
23067 emit_move_insn (target, const0_rtx);
23068 target = gen_rtx_SUBREG (QImode, target, 0);
23070 if ((optimize && !register_operand (op0, mode0))
23071 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23072 op0 = copy_to_mode_reg (mode0, op0);
23073 if ((optimize && !register_operand (op1, mode1))
23074 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23075 op1 = copy_to_mode_reg (mode1, op1);
23077 pat = GEN_FCN (d->icode) (op0, op1);
23081 emit_insn (gen_rtx_SET (VOIDmode,
23082 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23083 gen_rtx_fmt_ee (comparison, QImode,
23087 return SUBREG_REG (target);
23090 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
23093 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23094 tree exp, rtx target)
23097 tree arg0 = CALL_EXPR_ARG (exp, 0);
23098 tree arg1 = CALL_EXPR_ARG (exp, 1);
23099 tree arg2 = CALL_EXPR_ARG (exp, 2);
23100 tree arg3 = CALL_EXPR_ARG (exp, 3);
23101 tree arg4 = CALL_EXPR_ARG (exp, 4);
23102 rtx scratch0, scratch1;
23103 rtx op0 = expand_normal (arg0);
23104 rtx op1 = expand_normal (arg1);
23105 rtx op2 = expand_normal (arg2);
23106 rtx op3 = expand_normal (arg3);
23107 rtx op4 = expand_normal (arg4);
23108 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23110 tmode0 = insn_data[d->icode].operand[0].mode;
23111 tmode1 = insn_data[d->icode].operand[1].mode;
23112 modev2 = insn_data[d->icode].operand[2].mode;
23113 modei3 = insn_data[d->icode].operand[3].mode;
23114 modev4 = insn_data[d->icode].operand[4].mode;
23115 modei5 = insn_data[d->icode].operand[5].mode;
23116 modeimm = insn_data[d->icode].operand[6].mode;
23118 if (VECTOR_MODE_P (modev2))
23119 op0 = safe_vector_operand (op0, modev2);
23120 if (VECTOR_MODE_P (modev4))
23121 op2 = safe_vector_operand (op2, modev4);
23123 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23124 op0 = copy_to_mode_reg (modev2, op0);
23125 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23126 op1 = copy_to_mode_reg (modei3, op1);
23127 if ((optimize && !register_operand (op2, modev4))
23128 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23129 op2 = copy_to_mode_reg (modev4, op2);
23130 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23131 op3 = copy_to_mode_reg (modei5, op3);
23133 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23135 error ("the fifth argument must be a 8-bit immediate");
23139 if (d->code == IX86_BUILTIN_PCMPESTRI128)
23141 if (optimize || !target
23142 || GET_MODE (target) != tmode0
23143 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23144 target = gen_reg_rtx (tmode0);
23146 scratch1 = gen_reg_rtx (tmode1);
23148 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23150 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23152 if (optimize || !target
23153 || GET_MODE (target) != tmode1
23154 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23155 target = gen_reg_rtx (tmode1);
23157 scratch0 = gen_reg_rtx (tmode0);
23159 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23163 gcc_assert (d->flag);
23165 scratch0 = gen_reg_rtx (tmode0);
23166 scratch1 = gen_reg_rtx (tmode1);
23168 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23178 target = gen_reg_rtx (SImode);
23179 emit_move_insn (target, const0_rtx);
23180 target = gen_rtx_SUBREG (QImode, target, 0);
23183 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23184 gen_rtx_fmt_ee (EQ, QImode,
23185 gen_rtx_REG ((enum machine_mode) d->flag,
23188 return SUBREG_REG (target);
23195 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
23198 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23199 tree exp, rtx target)
23202 tree arg0 = CALL_EXPR_ARG (exp, 0);
23203 tree arg1 = CALL_EXPR_ARG (exp, 1);
23204 tree arg2 = CALL_EXPR_ARG (exp, 2);
23205 rtx scratch0, scratch1;
23206 rtx op0 = expand_normal (arg0);
23207 rtx op1 = expand_normal (arg1);
23208 rtx op2 = expand_normal (arg2);
23209 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23211 tmode0 = insn_data[d->icode].operand[0].mode;
23212 tmode1 = insn_data[d->icode].operand[1].mode;
23213 modev2 = insn_data[d->icode].operand[2].mode;
23214 modev3 = insn_data[d->icode].operand[3].mode;
23215 modeimm = insn_data[d->icode].operand[4].mode;
23217 if (VECTOR_MODE_P (modev2))
23218 op0 = safe_vector_operand (op0, modev2);
23219 if (VECTOR_MODE_P (modev3))
23220 op1 = safe_vector_operand (op1, modev3);
23222 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23223 op0 = copy_to_mode_reg (modev2, op0);
23224 if ((optimize && !register_operand (op1, modev3))
23225 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23226 op1 = copy_to_mode_reg (modev3, op1);
23228 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
23230 error ("the third argument must be a 8-bit immediate");
23234 if (d->code == IX86_BUILTIN_PCMPISTRI128)
23236 if (optimize || !target
23237 || GET_MODE (target) != tmode0
23238 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23239 target = gen_reg_rtx (tmode0);
23241 scratch1 = gen_reg_rtx (tmode1);
23243 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
23245 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
23247 if (optimize || !target
23248 || GET_MODE (target) != tmode1
23249 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23250 target = gen_reg_rtx (tmode1);
23252 scratch0 = gen_reg_rtx (tmode0);
23254 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
23258 gcc_assert (d->flag);
23260 scratch0 = gen_reg_rtx (tmode0);
23261 scratch1 = gen_reg_rtx (tmode1);
23263 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
23273 target = gen_reg_rtx (SImode);
23274 emit_move_insn (target, const0_rtx);
23275 target = gen_rtx_SUBREG (QImode, target, 0);
23278 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23279 gen_rtx_fmt_ee (EQ, QImode,
23280 gen_rtx_REG ((enum machine_mode) d->flag,
23283 return SUBREG_REG (target);
23289 /* Subroutine of ix86_expand_builtin to take care of insns with
23290 variable number of operands. */
23293 ix86_expand_args_builtin (const struct builtin_description *d,
23294 tree exp, rtx target)
23296 rtx pat, real_target;
23297 unsigned int i, nargs;
23298 unsigned int nargs_constant = 0;
23299 int num_memory = 0;
23303 enum machine_mode mode;
23305 bool last_arg_count = false;
23306 enum insn_code icode = d->icode;
23307 const struct insn_data *insn_p = &insn_data[icode];
23308 enum machine_mode tmode = insn_p->operand[0].mode;
23309 enum machine_mode rmode = VOIDmode;
23311 enum rtx_code comparison = d->comparison;
23313 switch ((enum ix86_builtin_type) d->flag)
23315 case INT_FTYPE_V8SF_V8SF_PTEST:
23316 case INT_FTYPE_V4DI_V4DI_PTEST:
23317 case INT_FTYPE_V4DF_V4DF_PTEST:
23318 case INT_FTYPE_V4SF_V4SF_PTEST:
23319 case INT_FTYPE_V2DI_V2DI_PTEST:
23320 case INT_FTYPE_V2DF_V2DF_PTEST:
23321 return ix86_expand_sse_ptest (d, exp, target);
23322 case FLOAT128_FTYPE_FLOAT128:
23323 case FLOAT_FTYPE_FLOAT:
23324 case INT64_FTYPE_V4SF:
23325 case INT64_FTYPE_V2DF:
23326 case INT_FTYPE_V16QI:
23327 case INT_FTYPE_V8QI:
23328 case INT_FTYPE_V8SF:
23329 case INT_FTYPE_V4DF:
23330 case INT_FTYPE_V4SF:
23331 case INT_FTYPE_V2DF:
23332 case V16QI_FTYPE_V16QI:
23333 case V8SI_FTYPE_V8SF:
23334 case V8SI_FTYPE_V4SI:
23335 case V8HI_FTYPE_V8HI:
23336 case V8HI_FTYPE_V16QI:
23337 case V8QI_FTYPE_V8QI:
23338 case V8SF_FTYPE_V8SF:
23339 case V8SF_FTYPE_V8SI:
23340 case V8SF_FTYPE_V4SF:
23341 case V4SI_FTYPE_V4SI:
23342 case V4SI_FTYPE_V16QI:
23343 case V4SI_FTYPE_V4SF:
23344 case V4SI_FTYPE_V8SI:
23345 case V4SI_FTYPE_V8HI:
23346 case V4SI_FTYPE_V4DF:
23347 case V4SI_FTYPE_V2DF:
23348 case V4HI_FTYPE_V4HI:
23349 case V4DF_FTYPE_V4DF:
23350 case V4DF_FTYPE_V4SI:
23351 case V4DF_FTYPE_V4SF:
23352 case V4DF_FTYPE_V2DF:
23353 case V4SF_FTYPE_V4SF:
23354 case V4SF_FTYPE_V4SI:
23355 case V4SF_FTYPE_V8SF:
23356 case V4SF_FTYPE_V4DF:
23357 case V4SF_FTYPE_V2DF:
23358 case V2DI_FTYPE_V2DI:
23359 case V2DI_FTYPE_V16QI:
23360 case V2DI_FTYPE_V8HI:
23361 case V2DI_FTYPE_V4SI:
23362 case V2DF_FTYPE_V2DF:
23363 case V2DF_FTYPE_V4SI:
23364 case V2DF_FTYPE_V4DF:
23365 case V2DF_FTYPE_V4SF:
23366 case V2DF_FTYPE_V2SI:
23367 case V2SI_FTYPE_V2SI:
23368 case V2SI_FTYPE_V4SF:
23369 case V2SI_FTYPE_V2SF:
23370 case V2SI_FTYPE_V2DF:
23371 case V2SF_FTYPE_V2SF:
23372 case V2SF_FTYPE_V2SI:
23375 case V4SF_FTYPE_V4SF_VEC_MERGE:
23376 case V2DF_FTYPE_V2DF_VEC_MERGE:
23377 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
23378 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
23379 case V16QI_FTYPE_V16QI_V16QI:
23380 case V16QI_FTYPE_V8HI_V8HI:
23381 case V8QI_FTYPE_V8QI_V8QI:
23382 case V8QI_FTYPE_V4HI_V4HI:
23383 case V8HI_FTYPE_V8HI_V8HI:
23384 case V8HI_FTYPE_V16QI_V16QI:
23385 case V8HI_FTYPE_V4SI_V4SI:
23386 case V8SF_FTYPE_V8SF_V8SF:
23387 case V8SF_FTYPE_V8SF_V8SI:
23388 case V4SI_FTYPE_V4SI_V4SI:
23389 case V4SI_FTYPE_V8HI_V8HI:
23390 case V4SI_FTYPE_V4SF_V4SF:
23391 case V4SI_FTYPE_V2DF_V2DF:
23392 case V4HI_FTYPE_V4HI_V4HI:
23393 case V4HI_FTYPE_V8QI_V8QI:
23394 case V4HI_FTYPE_V2SI_V2SI:
23395 case V4DF_FTYPE_V4DF_V4DF:
23396 case V4DF_FTYPE_V4DF_V4DI:
23397 case V4SF_FTYPE_V4SF_V4SF:
23398 case V4SF_FTYPE_V4SF_V4SI:
23399 case V4SF_FTYPE_V4SF_V2SI:
23400 case V4SF_FTYPE_V4SF_V2DF:
23401 case V4SF_FTYPE_V4SF_DI:
23402 case V4SF_FTYPE_V4SF_SI:
23403 case V2DI_FTYPE_V2DI_V2DI:
23404 case V2DI_FTYPE_V16QI_V16QI:
23405 case V2DI_FTYPE_V4SI_V4SI:
23406 case V2DI_FTYPE_V2DI_V16QI:
23407 case V2DI_FTYPE_V2DF_V2DF:
23408 case V2SI_FTYPE_V2SI_V2SI:
23409 case V2SI_FTYPE_V4HI_V4HI:
23410 case V2SI_FTYPE_V2SF_V2SF:
23411 case V2DF_FTYPE_V2DF_V2DF:
23412 case V2DF_FTYPE_V2DF_V4SF:
23413 case V2DF_FTYPE_V2DF_V2DI:
23414 case V2DF_FTYPE_V2DF_DI:
23415 case V2DF_FTYPE_V2DF_SI:
23416 case V2SF_FTYPE_V2SF_V2SF:
23417 case V1DI_FTYPE_V1DI_V1DI:
23418 case V1DI_FTYPE_V8QI_V8QI:
23419 case V1DI_FTYPE_V2SI_V2SI:
23420 if (comparison == UNKNOWN)
23421 return ix86_expand_binop_builtin (icode, exp, target);
23424 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23425 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23426 gcc_assert (comparison != UNKNOWN);
23430 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23431 case V8HI_FTYPE_V8HI_SI_COUNT:
23432 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23433 case V4SI_FTYPE_V4SI_SI_COUNT:
23434 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23435 case V4HI_FTYPE_V4HI_SI_COUNT:
23436 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23437 case V2DI_FTYPE_V2DI_SI_COUNT:
23438 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23439 case V2SI_FTYPE_V2SI_SI_COUNT:
23440 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23441 case V1DI_FTYPE_V1DI_SI_COUNT:
23443 last_arg_count = true;
23445 case UINT64_FTYPE_UINT64_UINT64:
23446 case UINT_FTYPE_UINT_UINT:
23447 case UINT_FTYPE_UINT_USHORT:
23448 case UINT_FTYPE_UINT_UCHAR:
23451 case V2DI2TI_FTYPE_V2DI_INT:
23454 nargs_constant = 1;
23456 case V8HI_FTYPE_V8HI_INT:
23457 case V8SF_FTYPE_V8SF_INT:
23458 case V4SI_FTYPE_V4SI_INT:
23459 case V4SI_FTYPE_V8SI_INT:
23460 case V4HI_FTYPE_V4HI_INT:
23461 case V4DF_FTYPE_V4DF_INT:
23462 case V4SF_FTYPE_V4SF_INT:
23463 case V4SF_FTYPE_V8SF_INT:
23464 case V2DI_FTYPE_V2DI_INT:
23465 case V2DF_FTYPE_V2DF_INT:
23466 case V2DF_FTYPE_V4DF_INT:
23468 nargs_constant = 1;
23470 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23471 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23472 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23473 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23474 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23477 case V16QI_FTYPE_V16QI_V16QI_INT:
23478 case V8HI_FTYPE_V8HI_V8HI_INT:
23479 case V8SI_FTYPE_V8SI_V8SI_INT:
23480 case V8SI_FTYPE_V8SI_V4SI_INT:
23481 case V8SF_FTYPE_V8SF_V8SF_INT:
23482 case V8SF_FTYPE_V8SF_V4SF_INT:
23483 case V4SI_FTYPE_V4SI_V4SI_INT:
23484 case V4DF_FTYPE_V4DF_V4DF_INT:
23485 case V4DF_FTYPE_V4DF_V2DF_INT:
23486 case V4SF_FTYPE_V4SF_V4SF_INT:
23487 case V2DI_FTYPE_V2DI_V2DI_INT:
23488 case V2DF_FTYPE_V2DF_V2DF_INT:
23490 nargs_constant = 1;
23492 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23495 nargs_constant = 1;
23497 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23500 nargs_constant = 1;
23502 case V2DI_FTYPE_V2DI_UINT_UINT:
23504 nargs_constant = 2;
23506 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
23507 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
23508 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
23509 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
23511 nargs_constant = 1;
23513 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23515 nargs_constant = 2;
23518 gcc_unreachable ();
23521 gcc_assert (nargs <= ARRAY_SIZE (args));
23523 if (comparison != UNKNOWN)
23525 gcc_assert (nargs == 2);
23526 return ix86_expand_sse_compare (d, exp, target, swap);
23529 if (rmode == VOIDmode || rmode == tmode)
23533 || GET_MODE (target) != tmode
23534 || ! (*insn_p->operand[0].predicate) (target, tmode))
23535 target = gen_reg_rtx (tmode);
23536 real_target = target;
23540 target = gen_reg_rtx (rmode);
23541 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
23544 for (i = 0; i < nargs; i++)
23546 tree arg = CALL_EXPR_ARG (exp, i);
23547 rtx op = expand_normal (arg);
23548 enum machine_mode mode = insn_p->operand[i + 1].mode;
23549 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
23551 if (last_arg_count && (i + 1) == nargs)
23553 /* SIMD shift insns take either an 8-bit immediate or
23554 register as count. But builtin functions take int as
23555 count. If count doesn't match, we put it in register. */
23558 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
23559 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
23560 op = copy_to_reg (op);
23563 else if ((nargs - i) <= nargs_constant)
23568 case CODE_FOR_sse4_1_roundpd:
23569 case CODE_FOR_sse4_1_roundps:
23570 case CODE_FOR_sse4_1_roundsd:
23571 case CODE_FOR_sse4_1_roundss:
23572 case CODE_FOR_sse4_1_blendps:
23573 case CODE_FOR_avx_blendpd256:
23574 case CODE_FOR_avx_vpermilv4df:
23575 case CODE_FOR_avx_roundpd256:
23576 case CODE_FOR_avx_roundps256:
23577 error ("the last argument must be a 4-bit immediate");
23580 case CODE_FOR_sse4_1_blendpd:
23581 case CODE_FOR_avx_vpermilv2df:
23582 case CODE_FOR_avx_vpermil2v2df3:
23583 case CODE_FOR_avx_vpermil2v4sf3:
23584 case CODE_FOR_avx_vpermil2v4df3:
23585 case CODE_FOR_avx_vpermil2v8sf3:
23586 error ("the last argument must be a 2-bit immediate");
23589 case CODE_FOR_avx_vextractf128v4df:
23590 case CODE_FOR_avx_vextractf128v8sf:
23591 case CODE_FOR_avx_vextractf128v8si:
23592 case CODE_FOR_avx_vinsertf128v4df:
23593 case CODE_FOR_avx_vinsertf128v8sf:
23594 case CODE_FOR_avx_vinsertf128v8si:
23595 error ("the last argument must be a 1-bit immediate");
23598 case CODE_FOR_avx_cmpsdv2df3:
23599 case CODE_FOR_avx_cmpssv4sf3:
23600 case CODE_FOR_avx_cmppdv2df3:
23601 case CODE_FOR_avx_cmppsv4sf3:
23602 case CODE_FOR_avx_cmppdv4df3:
23603 case CODE_FOR_avx_cmppsv8sf3:
23604 error ("the last argument must be a 5-bit immediate");
23608 switch (nargs_constant)
23611 if ((nargs - i) == nargs_constant)
23613 error ("the next to last argument must be an 8-bit immediate");
23617 error ("the last argument must be an 8-bit immediate");
23620 gcc_unreachable ();
23627 if (VECTOR_MODE_P (mode))
23628 op = safe_vector_operand (op, mode);
23630 /* If we aren't optimizing, only allow one memory operand to
23632 if (memory_operand (op, mode))
23635 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
23637 if (optimize || !match || num_memory > 1)
23638 op = copy_to_mode_reg (mode, op);
23642 op = copy_to_reg (op);
23643 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
23648 args[i].mode = mode;
23654 pat = GEN_FCN (icode) (real_target, args[0].op);
23657 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
23660 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23664 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23665 args[2].op, args[3].op);
23668 gcc_unreachable ();
23678 /* Subroutine of ix86_expand_builtin to take care of special insns
23679 with variable number of operands. */
23682 ix86_expand_special_args_builtin (const struct builtin_description *d,
23683 tree exp, rtx target)
23687 unsigned int i, nargs, arg_adjust, memory;
23691 enum machine_mode mode;
23693 enum insn_code icode = d->icode;
23694 bool last_arg_constant = false;
23695 const struct insn_data *insn_p = &insn_data[icode];
23696 enum machine_mode tmode = insn_p->operand[0].mode;
23697 enum { load, store } klass;
23699 switch ((enum ix86_special_builtin_type) d->flag)
23701 case VOID_FTYPE_VOID:
23702 emit_insn (GEN_FCN (icode) (target));
23704 case V2DI_FTYPE_PV2DI:
23705 case V32QI_FTYPE_PCCHAR:
23706 case V16QI_FTYPE_PCCHAR:
23707 case V8SF_FTYPE_PCV4SF:
23708 case V8SF_FTYPE_PCFLOAT:
23709 case V4SF_FTYPE_PCFLOAT:
23710 case V4DF_FTYPE_PCV2DF:
23711 case V4DF_FTYPE_PCDOUBLE:
23712 case V2DF_FTYPE_PCDOUBLE:
23717 case VOID_FTYPE_PV2SF_V4SF:
23718 case VOID_FTYPE_PV2DI_V2DI:
23719 case VOID_FTYPE_PCHAR_V32QI:
23720 case VOID_FTYPE_PCHAR_V16QI:
23721 case VOID_FTYPE_PFLOAT_V8SF:
23722 case VOID_FTYPE_PFLOAT_V4SF:
23723 case VOID_FTYPE_PDOUBLE_V4DF:
23724 case VOID_FTYPE_PDOUBLE_V2DF:
23725 case VOID_FTYPE_PDI_DI:
23726 case VOID_FTYPE_PINT_INT:
23729 /* Reserve memory operand for target. */
23730 memory = ARRAY_SIZE (args);
23732 case V4SF_FTYPE_V4SF_PCV2SF:
23733 case V2DF_FTYPE_V2DF_PCDOUBLE:
23738 case V8SF_FTYPE_PCV8SF_V8SF:
23739 case V4DF_FTYPE_PCV4DF_V4DF:
23740 case V4SF_FTYPE_PCV4SF_V4SF:
23741 case V2DF_FTYPE_PCV2DF_V2DF:
23746 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23747 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23748 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23749 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23752 /* Reserve memory operand for target. */
23753 memory = ARRAY_SIZE (args);
23756 gcc_unreachable ();
23759 gcc_assert (nargs <= ARRAY_SIZE (args));
23761 if (klass == store)
23763 arg = CALL_EXPR_ARG (exp, 0);
23764 op = expand_normal (arg);
23765 gcc_assert (target == 0);
23766 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
23774 || GET_MODE (target) != tmode
23775 || ! (*insn_p->operand[0].predicate) (target, tmode))
23776 target = gen_reg_rtx (tmode);
23779 for (i = 0; i < nargs; i++)
23781 enum machine_mode mode = insn_p->operand[i + 1].mode;
23784 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
23785 op = expand_normal (arg);
23786 match = (*insn_p->operand[i + 1].predicate) (op, mode);
23788 if (last_arg_constant && (i + 1) == nargs)
23794 error ("the last argument must be an 8-bit immediate");
23802 /* This must be the memory operand. */
23803 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
23804 gcc_assert (GET_MODE (op) == mode
23805 || GET_MODE (op) == VOIDmode);
23809 /* This must be register. */
23810 if (VECTOR_MODE_P (mode))
23811 op = safe_vector_operand (op, mode);
23813 gcc_assert (GET_MODE (op) == mode
23814 || GET_MODE (op) == VOIDmode);
23815 op = copy_to_mode_reg (mode, op);
23820 args[i].mode = mode;
23826 pat = GEN_FCN (icode) (target, args[0].op);
23829 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23832 gcc_unreachable ();
23838 return klass == store ? 0 : target;
23841 /* Return the integer constant in ARG. Constrain it to be in the range
23842 of the subparts of VEC_TYPE; issue an error if not. */
23845 get_element_number (tree vec_type, tree arg)
23847 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
23849 if (!host_integerp (arg, 1)
23850 || (elt = tree_low_cst (arg, 1), elt > max))
23852 error ("selector must be an integer constant in the range 0..%wi", max);
23859 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
23860 ix86_expand_vector_init. We DO have language-level syntax for this, in
23861 the form of (type){ init-list }. Except that since we can't place emms
23862 instructions from inside the compiler, we can't allow the use of MMX
23863 registers unless the user explicitly asks for it. So we do *not* define
23864 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
23865 we have builtins invoked by mmintrin.h that gives us license to emit
23866 these sorts of instructions. */
23869 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
23871 enum machine_mode tmode = TYPE_MODE (type);
23872 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
23873 int i, n_elt = GET_MODE_NUNITS (tmode);
23874 rtvec v = rtvec_alloc (n_elt);
23876 gcc_assert (VECTOR_MODE_P (tmode));
23877 gcc_assert (call_expr_nargs (exp) == n_elt);
23879 for (i = 0; i < n_elt; ++i)
23881 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
23882 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
23885 if (!target || !register_operand (target, tmode))
23886 target = gen_reg_rtx (tmode);
23888 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
23892 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
23893 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
23894 had a language-level syntax for referencing vector elements. */
23897 ix86_expand_vec_ext_builtin (tree exp, rtx target)
23899 enum machine_mode tmode, mode0;
23904 arg0 = CALL_EXPR_ARG (exp, 0);
23905 arg1 = CALL_EXPR_ARG (exp, 1);
23907 op0 = expand_normal (arg0);
23908 elt = get_element_number (TREE_TYPE (arg0), arg1);
23910 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
23911 mode0 = TYPE_MODE (TREE_TYPE (arg0));
23912 gcc_assert (VECTOR_MODE_P (mode0));
23914 op0 = force_reg (mode0, op0);
23916 if (optimize || !target || !register_operand (target, tmode))
23917 target = gen_reg_rtx (tmode);
23919 ix86_expand_vector_extract (true, target, op0, elt);
23924 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
23925 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
23926 a language-level syntax for referencing vector elements. */
23929 ix86_expand_vec_set_builtin (tree exp)
23931 enum machine_mode tmode, mode1;
23932 tree arg0, arg1, arg2;
23934 rtx op0, op1, target;
23936 arg0 = CALL_EXPR_ARG (exp, 0);
23937 arg1 = CALL_EXPR_ARG (exp, 1);
23938 arg2 = CALL_EXPR_ARG (exp, 2);
23940 tmode = TYPE_MODE (TREE_TYPE (arg0));
23941 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
23942 gcc_assert (VECTOR_MODE_P (tmode));
23944 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
23945 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
23946 elt = get_element_number (TREE_TYPE (arg0), arg2);
23948 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
23949 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
23951 op0 = force_reg (tmode, op0);
23952 op1 = force_reg (mode1, op1);
23954 /* OP0 is the source of these builtin functions and shouldn't be
23955 modified. Create a copy, use it and return it as target. */
23956 target = gen_reg_rtx (tmode);
23957 emit_move_insn (target, op0);
23958 ix86_expand_vector_set (true, target, op1, elt);
23963 /* Expand an expression EXP that calls a built-in function,
23964 with result going to TARGET if that's convenient
23965 (and in mode MODE if that's convenient).
23966 SUBTARGET may be used as the target for computing one of EXP's operands.
23967 IGNORE is nonzero if the value is to be ignored. */
23970 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
23971 enum machine_mode mode ATTRIBUTE_UNUSED,
23972 int ignore ATTRIBUTE_UNUSED)
23974 const struct builtin_description *d;
23976 enum insn_code icode;
23977 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
23978 tree arg0, arg1, arg2;
23979 rtx op0, op1, op2, pat;
23980 enum machine_mode mode0, mode1, mode2;
23981 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
23983 /* Determine whether the builtin function is available under the current ISA.
23984 Originally the builtin was not created if it wasn't applicable to the
23985 current ISA based on the command line switches. With function specific
23986 options, we need to check in the context of the function making the call
23987 whether it is supported. */
23988 if (ix86_builtins_isa[fcode]
23989 && !(ix86_builtins_isa[fcode] & ix86_isa_flags))
23991 char *opts = ix86_target_string (ix86_builtins_isa[fcode], 0, NULL,
23992 NULL, NULL, false);
23995 error ("%qE needs unknown isa option", fndecl);
23998 gcc_assert (opts != NULL);
23999 error ("%qE needs isa option %s", fndecl, opts);
24007 case IX86_BUILTIN_MASKMOVQ:
24008 case IX86_BUILTIN_MASKMOVDQU:
24009 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24010 ? CODE_FOR_mmx_maskmovq
24011 : CODE_FOR_sse2_maskmovdqu);
24012 /* Note the arg order is different from the operand order. */
24013 arg1 = CALL_EXPR_ARG (exp, 0);
24014 arg2 = CALL_EXPR_ARG (exp, 1);
24015 arg0 = CALL_EXPR_ARG (exp, 2);
24016 op0 = expand_normal (arg0);
24017 op1 = expand_normal (arg1);
24018 op2 = expand_normal (arg2);
24019 mode0 = insn_data[icode].operand[0].mode;
24020 mode1 = insn_data[icode].operand[1].mode;
24021 mode2 = insn_data[icode].operand[2].mode;
24023 op0 = force_reg (Pmode, op0);
24024 op0 = gen_rtx_MEM (mode1, op0);
24026 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24027 op0 = copy_to_mode_reg (mode0, op0);
24028 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24029 op1 = copy_to_mode_reg (mode1, op1);
24030 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24031 op2 = copy_to_mode_reg (mode2, op2);
24032 pat = GEN_FCN (icode) (op0, op1, op2);
24038 case IX86_BUILTIN_LDMXCSR:
24039 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24040 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24041 emit_move_insn (target, op0);
24042 emit_insn (gen_sse_ldmxcsr (target));
24045 case IX86_BUILTIN_STMXCSR:
24046 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24047 emit_insn (gen_sse_stmxcsr (target));
24048 return copy_to_mode_reg (SImode, target);
24050 case IX86_BUILTIN_CLFLUSH:
24051 arg0 = CALL_EXPR_ARG (exp, 0);
24052 op0 = expand_normal (arg0);
24053 icode = CODE_FOR_sse2_clflush;
24054 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24055 op0 = copy_to_mode_reg (Pmode, op0);
24057 emit_insn (gen_sse2_clflush (op0));
24060 case IX86_BUILTIN_MONITOR:
24061 arg0 = CALL_EXPR_ARG (exp, 0);
24062 arg1 = CALL_EXPR_ARG (exp, 1);
24063 arg2 = CALL_EXPR_ARG (exp, 2);
24064 op0 = expand_normal (arg0);
24065 op1 = expand_normal (arg1);
24066 op2 = expand_normal (arg2);
24068 op0 = copy_to_mode_reg (Pmode, op0);
24070 op1 = copy_to_mode_reg (SImode, op1);
24072 op2 = copy_to_mode_reg (SImode, op2);
24073 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24076 case IX86_BUILTIN_MWAIT:
24077 arg0 = CALL_EXPR_ARG (exp, 0);
24078 arg1 = CALL_EXPR_ARG (exp, 1);
24079 op0 = expand_normal (arg0);
24080 op1 = expand_normal (arg1);
24082 op0 = copy_to_mode_reg (SImode, op0);
24084 op1 = copy_to_mode_reg (SImode, op1);
24085 emit_insn (gen_sse3_mwait (op0, op1));
24088 case IX86_BUILTIN_VEC_INIT_V2SI:
24089 case IX86_BUILTIN_VEC_INIT_V4HI:
24090 case IX86_BUILTIN_VEC_INIT_V8QI:
24091 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24093 case IX86_BUILTIN_VEC_EXT_V2DF:
24094 case IX86_BUILTIN_VEC_EXT_V2DI:
24095 case IX86_BUILTIN_VEC_EXT_V4SF:
24096 case IX86_BUILTIN_VEC_EXT_V4SI:
24097 case IX86_BUILTIN_VEC_EXT_V8HI:
24098 case IX86_BUILTIN_VEC_EXT_V2SI:
24099 case IX86_BUILTIN_VEC_EXT_V4HI:
24100 case IX86_BUILTIN_VEC_EXT_V16QI:
24101 return ix86_expand_vec_ext_builtin (exp, target);
24103 case IX86_BUILTIN_VEC_SET_V2DI:
24104 case IX86_BUILTIN_VEC_SET_V4SF:
24105 case IX86_BUILTIN_VEC_SET_V4SI:
24106 case IX86_BUILTIN_VEC_SET_V8HI:
24107 case IX86_BUILTIN_VEC_SET_V4HI:
24108 case IX86_BUILTIN_VEC_SET_V16QI:
24109 return ix86_expand_vec_set_builtin (exp);
24111 case IX86_BUILTIN_INFQ:
24113 REAL_VALUE_TYPE inf;
24117 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24119 tmp = validize_mem (force_const_mem (mode, tmp));
24122 target = gen_reg_rtx (mode);
24124 emit_move_insn (target, tmp);
24132 for (i = 0, d = bdesc_special_args;
24133 i < ARRAY_SIZE (bdesc_special_args);
24135 if (d->code == fcode)
24136 return ix86_expand_special_args_builtin (d, exp, target);
24138 for (i = 0, d = bdesc_args;
24139 i < ARRAY_SIZE (bdesc_args);
24141 if (d->code == fcode)
24144 case IX86_BUILTIN_FABSQ:
24145 case IX86_BUILTIN_COPYSIGNQ:
24147 /* Emit a normal call if SSE2 isn't available. */
24148 return expand_call (exp, target, ignore);
24150 return ix86_expand_args_builtin (d, exp, target);
24153 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24154 if (d->code == fcode)
24155 return ix86_expand_sse_comi (d, exp, target);
24157 for (i = 0, d = bdesc_pcmpestr;
24158 i < ARRAY_SIZE (bdesc_pcmpestr);
24160 if (d->code == fcode)
24161 return ix86_expand_sse_pcmpestr (d, exp, target);
24163 for (i = 0, d = bdesc_pcmpistr;
24164 i < ARRAY_SIZE (bdesc_pcmpistr);
24166 if (d->code == fcode)
24167 return ix86_expand_sse_pcmpistr (d, exp, target);
24169 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24170 if (d->code == fcode)
24171 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24172 (enum multi_arg_type)d->flag,
24175 gcc_unreachable ();
24178 /* Returns a function decl for a vectorized version of the builtin function
24179 with builtin function code FN and the result vector type TYPE, or NULL_TREE
24180 if it is not available. */
24183 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
24186 enum machine_mode in_mode, out_mode;
24189 if (TREE_CODE (type_out) != VECTOR_TYPE
24190 || TREE_CODE (type_in) != VECTOR_TYPE)
24193 out_mode = TYPE_MODE (TREE_TYPE (type_out));
24194 out_n = TYPE_VECTOR_SUBPARTS (type_out);
24195 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24196 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24200 case BUILT_IN_SQRT:
24201 if (out_mode == DFmode && out_n == 2
24202 && in_mode == DFmode && in_n == 2)
24203 return ix86_builtins[IX86_BUILTIN_SQRTPD];
24206 case BUILT_IN_SQRTF:
24207 if (out_mode == SFmode && out_n == 4
24208 && in_mode == SFmode && in_n == 4)
24209 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24212 case BUILT_IN_LRINT:
24213 if (out_mode == SImode && out_n == 4
24214 && in_mode == DFmode && in_n == 2)
24215 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24218 case BUILT_IN_LRINTF:
24219 if (out_mode == SImode && out_n == 4
24220 && in_mode == SFmode && in_n == 4)
24221 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24228 /* Dispatch to a handler for a vectorization library. */
24229 if (ix86_veclib_handler)
24230 return (*ix86_veclib_handler)(fn, type_out, type_in);
24235 /* Handler for an SVML-style interface to
24236 a library with vectorized intrinsics. */
24239 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
24242 tree fntype, new_fndecl, args;
24245 enum machine_mode el_mode, in_mode;
24248 /* The SVML is suitable for unsafe math only. */
24249 if (!flag_unsafe_math_optimizations)
24252 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24253 n = TYPE_VECTOR_SUBPARTS (type_out);
24254 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24255 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24256 if (el_mode != in_mode
24264 case BUILT_IN_LOG10:
24266 case BUILT_IN_TANH:
24268 case BUILT_IN_ATAN:
24269 case BUILT_IN_ATAN2:
24270 case BUILT_IN_ATANH:
24271 case BUILT_IN_CBRT:
24272 case BUILT_IN_SINH:
24274 case BUILT_IN_ASINH:
24275 case BUILT_IN_ASIN:
24276 case BUILT_IN_COSH:
24278 case BUILT_IN_ACOSH:
24279 case BUILT_IN_ACOS:
24280 if (el_mode != DFmode || n != 2)
24284 case BUILT_IN_EXPF:
24285 case BUILT_IN_LOGF:
24286 case BUILT_IN_LOG10F:
24287 case BUILT_IN_POWF:
24288 case BUILT_IN_TANHF:
24289 case BUILT_IN_TANF:
24290 case BUILT_IN_ATANF:
24291 case BUILT_IN_ATAN2F:
24292 case BUILT_IN_ATANHF:
24293 case BUILT_IN_CBRTF:
24294 case BUILT_IN_SINHF:
24295 case BUILT_IN_SINF:
24296 case BUILT_IN_ASINHF:
24297 case BUILT_IN_ASINF:
24298 case BUILT_IN_COSHF:
24299 case BUILT_IN_COSF:
24300 case BUILT_IN_ACOSHF:
24301 case BUILT_IN_ACOSF:
24302 if (el_mode != SFmode || n != 4)
24310 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24312 if (fn == BUILT_IN_LOGF)
24313 strcpy (name, "vmlsLn4");
24314 else if (fn == BUILT_IN_LOG)
24315 strcpy (name, "vmldLn2");
24318 sprintf (name, "vmls%s", bname+10);
24319 name[strlen (name)-1] = '4';
24322 sprintf (name, "vmld%s2", bname+10);
24324 /* Convert to uppercase. */
24328 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24329 args = TREE_CHAIN (args))
24333 fntype = build_function_type_list (type_out, type_in, NULL);
24335 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24337 /* Build a function declaration for the vectorized function. */
24338 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24339 TREE_PUBLIC (new_fndecl) = 1;
24340 DECL_EXTERNAL (new_fndecl) = 1;
24341 DECL_IS_NOVOPS (new_fndecl) = 1;
24342 TREE_READONLY (new_fndecl) = 1;
24347 /* Handler for an ACML-style interface to
24348 a library with vectorized intrinsics. */
24351 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
24353 char name[20] = "__vr.._";
24354 tree fntype, new_fndecl, args;
24357 enum machine_mode el_mode, in_mode;
24360 /* The ACML is 64bits only and suitable for unsafe math only as
24361 it does not correctly support parts of IEEE with the required
24362 precision such as denormals. */
24364 || !flag_unsafe_math_optimizations)
24367 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24368 n = TYPE_VECTOR_SUBPARTS (type_out);
24369 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24370 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24371 if (el_mode != in_mode
24381 case BUILT_IN_LOG2:
24382 case BUILT_IN_LOG10:
24385 if (el_mode != DFmode
24390 case BUILT_IN_SINF:
24391 case BUILT_IN_COSF:
24392 case BUILT_IN_EXPF:
24393 case BUILT_IN_POWF:
24394 case BUILT_IN_LOGF:
24395 case BUILT_IN_LOG2F:
24396 case BUILT_IN_LOG10F:
24399 if (el_mode != SFmode
24408 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24409 sprintf (name + 7, "%s", bname+10);
24412 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24413 args = TREE_CHAIN (args))
24417 fntype = build_function_type_list (type_out, type_in, NULL);
24419 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24421 /* Build a function declaration for the vectorized function. */
24422 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24423 TREE_PUBLIC (new_fndecl) = 1;
24424 DECL_EXTERNAL (new_fndecl) = 1;
24425 DECL_IS_NOVOPS (new_fndecl) = 1;
24426 TREE_READONLY (new_fndecl) = 1;
24432 /* Returns a decl of a function that implements conversion of an integer vector
24433 into a floating-point vector, or vice-versa. TYPE is the type of the integer
24434 side of the conversion.
24435 Return NULL_TREE if it is not available. */
24438 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
24440 if (TREE_CODE (type) != VECTOR_TYPE)
24446 switch (TYPE_MODE (type))
24449 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
24454 case FIX_TRUNC_EXPR:
24455 switch (TYPE_MODE (type))
24458 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
24468 /* Returns a code for a target-specific builtin that implements
24469 reciprocal of the function, or NULL_TREE if not available. */
24472 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
24473 bool sqrt ATTRIBUTE_UNUSED)
24475 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
24476 && flag_finite_math_only && !flag_trapping_math
24477 && flag_unsafe_math_optimizations))
24481 /* Machine dependent builtins. */
24484 /* Vectorized version of sqrt to rsqrt conversion. */
24485 case IX86_BUILTIN_SQRTPS_NR:
24486 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
24492 /* Normal builtins. */
24495 /* Sqrt to rsqrt conversion. */
24496 case BUILT_IN_SQRTF:
24497 return ix86_builtins[IX86_BUILTIN_RSQRTF];
24504 /* Store OPERAND to the memory after reload is completed. This means
24505 that we can't easily use assign_stack_local. */
24507 ix86_force_to_memory (enum machine_mode mode, rtx operand)
24511 gcc_assert (reload_completed);
24512 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
24514 result = gen_rtx_MEM (mode,
24515 gen_rtx_PLUS (Pmode,
24517 GEN_INT (-RED_ZONE_SIZE)));
24518 emit_move_insn (result, operand);
24520 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
24526 operand = gen_lowpart (DImode, operand);
24530 gen_rtx_SET (VOIDmode,
24531 gen_rtx_MEM (DImode,
24532 gen_rtx_PRE_DEC (DImode,
24533 stack_pointer_rtx)),
24537 gcc_unreachable ();
24539 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24548 split_di (&operand, 1, operands, operands + 1);
24550 gen_rtx_SET (VOIDmode,
24551 gen_rtx_MEM (SImode,
24552 gen_rtx_PRE_DEC (Pmode,
24553 stack_pointer_rtx)),
24556 gen_rtx_SET (VOIDmode,
24557 gen_rtx_MEM (SImode,
24558 gen_rtx_PRE_DEC (Pmode,
24559 stack_pointer_rtx)),
24564 /* Store HImodes as SImodes. */
24565 operand = gen_lowpart (SImode, operand);
24569 gen_rtx_SET (VOIDmode,
24570 gen_rtx_MEM (GET_MODE (operand),
24571 gen_rtx_PRE_DEC (SImode,
24572 stack_pointer_rtx)),
24576 gcc_unreachable ();
24578 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24583 /* Free operand from the memory. */
24585 ix86_free_from_memory (enum machine_mode mode)
24587 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
24591 if (mode == DImode || TARGET_64BIT)
24595 /* Use LEA to deallocate stack space. In peephole2 it will be converted
24596 to pop or add instruction if registers are available. */
24597 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24598 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
24603 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
24604 QImode must go into class Q_REGS.
24605 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
24606 movdf to do mem-to-mem moves through integer regs. */
24608 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
24610 enum machine_mode mode = GET_MODE (x);
24612 /* We're only allowed to return a subclass of CLASS. Many of the
24613 following checks fail for NO_REGS, so eliminate that early. */
24614 if (regclass == NO_REGS)
24617 /* All classes can load zeros. */
24618 if (x == CONST0_RTX (mode))
24621 /* Force constants into memory if we are loading a (nonzero) constant into
24622 an MMX or SSE register. This is because there are no MMX/SSE instructions
24623 to load from a constant. */
24625 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
24628 /* Prefer SSE regs only, if we can use them for math. */
24629 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
24630 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
24632 /* Floating-point constants need more complex checks. */
24633 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
24635 /* General regs can load everything. */
24636 if (reg_class_subset_p (regclass, GENERAL_REGS))
24639 /* Floats can load 0 and 1 plus some others. Note that we eliminated
24640 zero above. We only want to wind up preferring 80387 registers if
24641 we plan on doing computation with them. */
24643 && standard_80387_constant_p (x))
24645 /* Limit class to non-sse. */
24646 if (regclass == FLOAT_SSE_REGS)
24648 if (regclass == FP_TOP_SSE_REGS)
24650 if (regclass == FP_SECOND_SSE_REGS)
24651 return FP_SECOND_REG;
24652 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
24659 /* Generally when we see PLUS here, it's the function invariant
24660 (plus soft-fp const_int). Which can only be computed into general
24662 if (GET_CODE (x) == PLUS)
24663 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
24665 /* QImode constants are easy to load, but non-constant QImode data
24666 must go into Q_REGS. */
24667 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
24669 if (reg_class_subset_p (regclass, Q_REGS))
24671 if (reg_class_subset_p (Q_REGS, regclass))
24679 /* Discourage putting floating-point values in SSE registers unless
24680 SSE math is being used, and likewise for the 387 registers. */
24682 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
24684 enum machine_mode mode = GET_MODE (x);
24686 /* Restrict the output reload class to the register bank that we are doing
24687 math on. If we would like not to return a subset of CLASS, reject this
24688 alternative: if reload cannot do this, it will still use its choice. */
24689 mode = GET_MODE (x);
24690 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
24691 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
24693 if (X87_FLOAT_MODE_P (mode))
24695 if (regclass == FP_TOP_SSE_REGS)
24697 else if (regclass == FP_SECOND_SSE_REGS)
24698 return FP_SECOND_REG;
24700 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
24706 static enum reg_class
24707 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
24708 enum machine_mode mode,
24709 secondary_reload_info *sri ATTRIBUTE_UNUSED)
24711 /* QImode spills from non-QI registers require
24712 intermediate register on 32bit targets. */
24713 if (!in_p && mode == QImode && !TARGET_64BIT
24714 && (rclass == GENERAL_REGS
24715 || rclass == LEGACY_REGS
24716 || rclass == INDEX_REGS))
24725 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
24726 regno = true_regnum (x);
24728 /* Return Q_REGS if the operand is in memory. */
24736 /* If we are copying between general and FP registers, we need a memory
24737 location. The same is true for SSE and MMX registers.
24739 To optimize register_move_cost performance, allow inline variant.
24741 The macro can't work reliably when one of the CLASSES is class containing
24742 registers from multiple units (SSE, MMX, integer). We avoid this by never
24743 combining those units in single alternative in the machine description.
24744 Ensure that this constraint holds to avoid unexpected surprises.
24746 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
24747 enforce these sanity checks. */
24750 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
24751 enum machine_mode mode, int strict)
24753 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
24754 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
24755 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
24756 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
24757 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
24758 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
24760 gcc_assert (!strict);
24764 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
24767 /* ??? This is a lie. We do have moves between mmx/general, and for
24768 mmx/sse2. But by saying we need secondary memory we discourage the
24769 register allocator from using the mmx registers unless needed. */
24770 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
24773 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
24775 /* SSE1 doesn't have any direct moves from other classes. */
24779 /* If the target says that inter-unit moves are more expensive
24780 than moving through memory, then don't generate them. */
24781 if (!TARGET_INTER_UNIT_MOVES)
24784 /* Between SSE and general, we have moves no larger than word size. */
24785 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
24793 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
24794 enum machine_mode mode, int strict)
24796 return inline_secondary_memory_needed (class1, class2, mode, strict);
24799 /* Return true if the registers in CLASS cannot represent the change from
24800 modes FROM to TO. */
24803 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
24804 enum reg_class regclass)
24809 /* x87 registers can't do subreg at all, as all values are reformatted
24810 to extended precision. */
24811 if (MAYBE_FLOAT_CLASS_P (regclass))
24814 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
24816 /* Vector registers do not support QI or HImode loads. If we don't
24817 disallow a change to these modes, reload will assume it's ok to
24818 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
24819 the vec_dupv4hi pattern. */
24820 if (GET_MODE_SIZE (from) < 4)
24823 /* Vector registers do not support subreg with nonzero offsets, which
24824 are otherwise valid for integer registers. Since we can't see
24825 whether we have a nonzero offset from here, prohibit all
24826 nonparadoxical subregs changing size. */
24827 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
24834 /* Return the cost of moving data of mode M between a
24835 register and memory. A value of 2 is the default; this cost is
24836 relative to those in `REGISTER_MOVE_COST'.
24838 This function is used extensively by register_move_cost that is used to
24839 build tables at startup. Make it inline in this case.
24840 When IN is 2, return maximum of in and out move cost.
24842 If moving between registers and memory is more expensive than
24843 between two registers, you should define this macro to express the
24846 Model also increased moving costs of QImode registers in non
24850 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
24854 if (FLOAT_CLASS_P (regclass))
24872 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
24873 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
24875 if (SSE_CLASS_P (regclass))
24878 switch (GET_MODE_SIZE (mode))
24893 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
24894 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
24896 if (MMX_CLASS_P (regclass))
24899 switch (GET_MODE_SIZE (mode))
24911 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
24912 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
24914 switch (GET_MODE_SIZE (mode))
24917 if (Q_CLASS_P (regclass) || TARGET_64BIT)
24920 return ix86_cost->int_store[0];
24921 if (TARGET_PARTIAL_REG_DEPENDENCY
24922 && optimize_function_for_speed_p (cfun))
24923 cost = ix86_cost->movzbl_load;
24925 cost = ix86_cost->int_load[0];
24927 return MAX (cost, ix86_cost->int_store[0]);
24933 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
24935 return ix86_cost->movzbl_load;
24937 return ix86_cost->int_store[0] + 4;
24942 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
24943 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
24945 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
24946 if (mode == TFmode)
24949 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
24951 cost = ix86_cost->int_load[2];
24953 cost = ix86_cost->int_store[2];
24954 return (cost * (((int) GET_MODE_SIZE (mode)
24955 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
24960 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
24962 return inline_memory_move_cost (mode, regclass, in);
24966 /* Return the cost of moving data from a register in class CLASS1 to
24967 one in class CLASS2.
24969 It is not required that the cost always equal 2 when FROM is the same as TO;
24970 on some machines it is expensive to move between registers if they are not
24971 general registers. */
24974 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
24975 enum reg_class class2)
24977 /* In case we require secondary memory, compute cost of the store followed
24978 by load. In order to avoid bad register allocation choices, we need
24979 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
24981 if (inline_secondary_memory_needed (class1, class2, mode, 0))
24985 cost += inline_memory_move_cost (mode, class1, 2);
24986 cost += inline_memory_move_cost (mode, class2, 2);
24988 /* In case of copying from general_purpose_register we may emit multiple
24989 stores followed by single load causing memory size mismatch stall.
24990 Count this as arbitrarily high cost of 20. */
24991 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
24994 /* In the case of FP/MMX moves, the registers actually overlap, and we
24995 have to switch modes in order to treat them differently. */
24996 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
24997 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25003 /* Moves between SSE/MMX and integer unit are expensive. */
25004 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25005 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25007 /* ??? By keeping returned value relatively high, we limit the number
25008 of moves between integer and MMX/SSE registers for all targets.
25009 Additionally, high value prevents problem with x86_modes_tieable_p(),
25010 where integer modes in MMX/SSE registers are not tieable
25011 because of missing QImode and HImode moves to, from or between
25012 MMX/SSE registers. */
25013 return MAX (8, ix86_cost->mmxsse_to_integer);
25015 if (MAYBE_FLOAT_CLASS_P (class1))
25016 return ix86_cost->fp_move;
25017 if (MAYBE_SSE_CLASS_P (class1))
25018 return ix86_cost->sse_move;
25019 if (MAYBE_MMX_CLASS_P (class1))
25020 return ix86_cost->mmx_move;
25024 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
25027 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25029 /* Flags and only flags can only hold CCmode values. */
25030 if (CC_REGNO_P (regno))
25031 return GET_MODE_CLASS (mode) == MODE_CC;
25032 if (GET_MODE_CLASS (mode) == MODE_CC
25033 || GET_MODE_CLASS (mode) == MODE_RANDOM
25034 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25036 if (FP_REGNO_P (regno))
25037 return VALID_FP_MODE_P (mode);
25038 if (SSE_REGNO_P (regno))
25040 /* We implement the move patterns for all vector modes into and
25041 out of SSE registers, even when no operation instructions
25042 are available. OImode move is available only when AVX is
25044 return ((TARGET_AVX && mode == OImode)
25045 || VALID_AVX256_REG_MODE (mode)
25046 || VALID_SSE_REG_MODE (mode)
25047 || VALID_SSE2_REG_MODE (mode)
25048 || VALID_MMX_REG_MODE (mode)
25049 || VALID_MMX_REG_MODE_3DNOW (mode));
25051 if (MMX_REGNO_P (regno))
25053 /* We implement the move patterns for 3DNOW modes even in MMX mode,
25054 so if the register is available at all, then we can move data of
25055 the given mode into or out of it. */
25056 return (VALID_MMX_REG_MODE (mode)
25057 || VALID_MMX_REG_MODE_3DNOW (mode));
25060 if (mode == QImode)
25062 /* Take care for QImode values - they can be in non-QI regs,
25063 but then they do cause partial register stalls. */
25064 if (regno < 4 || TARGET_64BIT)
25066 if (!TARGET_PARTIAL_REG_STALL)
25068 return reload_in_progress || reload_completed;
25070 /* We handle both integer and floats in the general purpose registers. */
25071 else if (VALID_INT_MODE_P (mode))
25073 else if (VALID_FP_MODE_P (mode))
25075 else if (VALID_DFP_MODE_P (mode))
25077 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
25078 on to use that value in smaller contexts, this can easily force a
25079 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
25080 supporting DImode, allow it. */
25081 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25087 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
25088 tieable integer mode. */
25091 ix86_tieable_integer_mode_p (enum machine_mode mode)
25100 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25103 return TARGET_64BIT;
25110 /* Return true if MODE1 is accessible in a register that can hold MODE2
25111 without copying. That is, all register classes that can hold MODE2
25112 can also hold MODE1. */
25115 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25117 if (mode1 == mode2)
25120 if (ix86_tieable_integer_mode_p (mode1)
25121 && ix86_tieable_integer_mode_p (mode2))
25124 /* MODE2 being XFmode implies fp stack or general regs, which means we
25125 can tie any smaller floating point modes to it. Note that we do not
25126 tie this with TFmode. */
25127 if (mode2 == XFmode)
25128 return mode1 == SFmode || mode1 == DFmode;
25130 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25131 that we can tie it with SFmode. */
25132 if (mode2 == DFmode)
25133 return mode1 == SFmode;
25135 /* If MODE2 is only appropriate for an SSE register, then tie with
25136 any other mode acceptable to SSE registers. */
25137 if (GET_MODE_SIZE (mode2) == 16
25138 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25139 return (GET_MODE_SIZE (mode1) == 16
25140 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25142 /* If MODE2 is appropriate for an MMX register, then tie
25143 with any other mode acceptable to MMX registers. */
25144 if (GET_MODE_SIZE (mode2) == 8
25145 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25146 return (GET_MODE_SIZE (mode1) == 8
25147 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25152 /* Compute a (partial) cost for rtx X. Return true if the complete
25153 cost has been computed, and false if subexpressions should be
25154 scanned. In either case, *TOTAL contains the cost result. */
25157 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
25159 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25160 enum machine_mode mode = GET_MODE (x);
25168 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25170 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25172 else if (flag_pic && SYMBOLIC_CONST (x)
25174 || (!GET_CODE (x) != LABEL_REF
25175 && (GET_CODE (x) != SYMBOL_REF
25176 || !SYMBOL_REF_LOCAL_P (x)))))
25183 if (mode == VOIDmode)
25186 switch (standard_80387_constant_p (x))
25191 default: /* Other constants */
25196 /* Start with (MEM (SYMBOL_REF)), since that's where
25197 it'll probably end up. Add a penalty for size. */
25198 *total = (COSTS_N_INSNS (1)
25199 + (flag_pic != 0 && !TARGET_64BIT)
25200 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25206 /* The zero extensions is often completely free on x86_64, so make
25207 it as cheap as possible. */
25208 if (TARGET_64BIT && mode == DImode
25209 && GET_MODE (XEXP (x, 0)) == SImode)
25211 else if (TARGET_ZERO_EXTEND_WITH_AND)
25212 *total = ix86_cost->add;
25214 *total = ix86_cost->movzx;
25218 *total = ix86_cost->movsx;
25222 if (CONST_INT_P (XEXP (x, 1))
25223 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25225 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25228 *total = ix86_cost->add;
25231 if ((value == 2 || value == 3)
25232 && ix86_cost->lea <= ix86_cost->shift_const)
25234 *total = ix86_cost->lea;
25244 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
25246 if (CONST_INT_P (XEXP (x, 1)))
25248 if (INTVAL (XEXP (x, 1)) > 32)
25249 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
25251 *total = ix86_cost->shift_const * 2;
25255 if (GET_CODE (XEXP (x, 1)) == AND)
25256 *total = ix86_cost->shift_var * 2;
25258 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
25263 if (CONST_INT_P (XEXP (x, 1)))
25264 *total = ix86_cost->shift_const;
25266 *total = ix86_cost->shift_var;
25271 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25273 /* ??? SSE scalar cost should be used here. */
25274 *total = ix86_cost->fmul;
25277 else if (X87_FLOAT_MODE_P (mode))
25279 *total = ix86_cost->fmul;
25282 else if (FLOAT_MODE_P (mode))
25284 /* ??? SSE vector cost should be used here. */
25285 *total = ix86_cost->fmul;
25290 rtx op0 = XEXP (x, 0);
25291 rtx op1 = XEXP (x, 1);
25293 if (CONST_INT_P (XEXP (x, 1)))
25295 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25296 for (nbits = 0; value != 0; value &= value - 1)
25300 /* This is arbitrary. */
25303 /* Compute costs correctly for widening multiplication. */
25304 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
25305 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
25306 == GET_MODE_SIZE (mode))
25308 int is_mulwiden = 0;
25309 enum machine_mode inner_mode = GET_MODE (op0);
25311 if (GET_CODE (op0) == GET_CODE (op1))
25312 is_mulwiden = 1, op1 = XEXP (op1, 0);
25313 else if (CONST_INT_P (op1))
25315 if (GET_CODE (op0) == SIGN_EXTEND)
25316 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
25319 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
25323 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
25326 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
25327 + nbits * ix86_cost->mult_bit
25328 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
25337 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25338 /* ??? SSE cost should be used here. */
25339 *total = ix86_cost->fdiv;
25340 else if (X87_FLOAT_MODE_P (mode))
25341 *total = ix86_cost->fdiv;
25342 else if (FLOAT_MODE_P (mode))
25343 /* ??? SSE vector cost should be used here. */
25344 *total = ix86_cost->fdiv;
25346 *total = ix86_cost->divide[MODE_INDEX (mode)];
25350 if (GET_MODE_CLASS (mode) == MODE_INT
25351 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
25353 if (GET_CODE (XEXP (x, 0)) == PLUS
25354 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
25355 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
25356 && CONSTANT_P (XEXP (x, 1)))
25358 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
25359 if (val == 2 || val == 4 || val == 8)
25361 *total = ix86_cost->lea;
25362 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
25363 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
25365 *total += rtx_cost (XEXP (x, 1), outer_code);
25369 else if (GET_CODE (XEXP (x, 0)) == MULT
25370 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
25372 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
25373 if (val == 2 || val == 4 || val == 8)
25375 *total = ix86_cost->lea;
25376 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
25377 *total += rtx_cost (XEXP (x, 1), outer_code);
25381 else if (GET_CODE (XEXP (x, 0)) == PLUS)
25383 *total = ix86_cost->lea;
25384 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
25385 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
25386 *total += rtx_cost (XEXP (x, 1), outer_code);
25393 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25395 /* ??? SSE cost should be used here. */
25396 *total = ix86_cost->fadd;
25399 else if (X87_FLOAT_MODE_P (mode))
25401 *total = ix86_cost->fadd;
25404 else if (FLOAT_MODE_P (mode))
25406 /* ??? SSE vector cost should be used here. */
25407 *total = ix86_cost->fadd;
25415 if (!TARGET_64BIT && mode == DImode)
25417 *total = (ix86_cost->add * 2
25418 + (rtx_cost (XEXP (x, 0), outer_code)
25419 << (GET_MODE (XEXP (x, 0)) != DImode))
25420 + (rtx_cost (XEXP (x, 1), outer_code)
25421 << (GET_MODE (XEXP (x, 1)) != DImode)));
25427 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25429 /* ??? SSE cost should be used here. */
25430 *total = ix86_cost->fchs;
25433 else if (X87_FLOAT_MODE_P (mode))
25435 *total = ix86_cost->fchs;
25438 else if (FLOAT_MODE_P (mode))
25440 /* ??? SSE vector cost should be used here. */
25441 *total = ix86_cost->fchs;
25447 if (!TARGET_64BIT && mode == DImode)
25448 *total = ix86_cost->add * 2;
25450 *total = ix86_cost->add;
25454 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
25455 && XEXP (XEXP (x, 0), 1) == const1_rtx
25456 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
25457 && XEXP (x, 1) == const0_rtx)
25459 /* This kind of construct is implemented using test[bwl].
25460 Treat it as if we had an AND. */
25461 *total = (ix86_cost->add
25462 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
25463 + rtx_cost (const1_rtx, outer_code));
25469 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
25474 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25475 /* ??? SSE cost should be used here. */
25476 *total = ix86_cost->fabs;
25477 else if (X87_FLOAT_MODE_P (mode))
25478 *total = ix86_cost->fabs;
25479 else if (FLOAT_MODE_P (mode))
25480 /* ??? SSE vector cost should be used here. */
25481 *total = ix86_cost->fabs;
25485 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25486 /* ??? SSE cost should be used here. */
25487 *total = ix86_cost->fsqrt;
25488 else if (X87_FLOAT_MODE_P (mode))
25489 *total = ix86_cost->fsqrt;
25490 else if (FLOAT_MODE_P (mode))
25491 /* ??? SSE vector cost should be used here. */
25492 *total = ix86_cost->fsqrt;
25496 if (XINT (x, 1) == UNSPEC_TP)
25507 static int current_machopic_label_num;
25509 /* Given a symbol name and its associated stub, write out the
25510 definition of the stub. */
25513 machopic_output_stub (FILE *file, const char *symb, const char *stub)
25515 unsigned int length;
25516 char *binder_name, *symbol_name, lazy_ptr_name[32];
25517 int label = ++current_machopic_label_num;
25519 /* For 64-bit we shouldn't get here. */
25520 gcc_assert (!TARGET_64BIT);
25522 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
25523 symb = (*targetm.strip_name_encoding) (symb);
25525 length = strlen (stub);
25526 binder_name = XALLOCAVEC (char, length + 32);
25527 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
25529 length = strlen (symb);
25530 symbol_name = XALLOCAVEC (char, length + 32);
25531 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
25533 sprintf (lazy_ptr_name, "L%d$lz", label);
25536 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
25538 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
25540 fprintf (file, "%s:\n", stub);
25541 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25545 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
25546 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
25547 fprintf (file, "\tjmp\t*%%edx\n");
25550 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
25552 fprintf (file, "%s:\n", binder_name);
25556 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
25557 fprintf (file, "\tpushl\t%%eax\n");
25560 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
25562 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
25564 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
25565 fprintf (file, "%s:\n", lazy_ptr_name);
25566 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25567 fprintf (file, "\t.long %s\n", binder_name);
25571 darwin_x86_file_end (void)
25573 darwin_file_end ();
25576 #endif /* TARGET_MACHO */
25578 /* Order the registers for register allocator. */
25581 x86_order_regs_for_local_alloc (void)
25586 /* First allocate the local general purpose registers. */
25587 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25588 if (GENERAL_REGNO_P (i) && call_used_regs[i])
25589 reg_alloc_order [pos++] = i;
25591 /* Global general purpose registers. */
25592 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25593 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
25594 reg_alloc_order [pos++] = i;
25596 /* x87 registers come first in case we are doing FP math
25598 if (!TARGET_SSE_MATH)
25599 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25600 reg_alloc_order [pos++] = i;
25602 /* SSE registers. */
25603 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
25604 reg_alloc_order [pos++] = i;
25605 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
25606 reg_alloc_order [pos++] = i;
25608 /* x87 registers. */
25609 if (TARGET_SSE_MATH)
25610 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25611 reg_alloc_order [pos++] = i;
25613 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
25614 reg_alloc_order [pos++] = i;
25616 /* Initialize the rest of array as we do not allocate some registers
25618 while (pos < FIRST_PSEUDO_REGISTER)
25619 reg_alloc_order [pos++] = 0;
25622 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
25623 struct attribute_spec.handler. */
25625 ix86_handle_abi_attribute (tree *node, tree name,
25626 tree args ATTRIBUTE_UNUSED,
25627 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25629 if (TREE_CODE (*node) != FUNCTION_TYPE
25630 && TREE_CODE (*node) != METHOD_TYPE
25631 && TREE_CODE (*node) != FIELD_DECL
25632 && TREE_CODE (*node) != TYPE_DECL)
25634 warning (OPT_Wattributes, "%qs attribute only applies to functions",
25635 IDENTIFIER_POINTER (name));
25636 *no_add_attrs = true;
25641 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
25642 IDENTIFIER_POINTER (name));
25643 *no_add_attrs = true;
25647 /* Can combine regparm with all attributes but fastcall. */
25648 if (is_attribute_p ("ms_abi", name))
25650 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
25652 error ("ms_abi and sysv_abi attributes are not compatible");
25657 else if (is_attribute_p ("sysv_abi", name))
25659 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
25661 error ("ms_abi and sysv_abi attributes are not compatible");
25670 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
25671 struct attribute_spec.handler. */
25673 ix86_handle_struct_attribute (tree *node, tree name,
25674 tree args ATTRIBUTE_UNUSED,
25675 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25678 if (DECL_P (*node))
25680 if (TREE_CODE (*node) == TYPE_DECL)
25681 type = &TREE_TYPE (*node);
25686 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
25687 || TREE_CODE (*type) == UNION_TYPE)))
25689 warning (OPT_Wattributes, "%qs attribute ignored",
25690 IDENTIFIER_POINTER (name));
25691 *no_add_attrs = true;
25694 else if ((is_attribute_p ("ms_struct", name)
25695 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
25696 || ((is_attribute_p ("gcc_struct", name)
25697 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
25699 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
25700 IDENTIFIER_POINTER (name));
25701 *no_add_attrs = true;
25708 ix86_ms_bitfield_layout_p (const_tree record_type)
25710 return (TARGET_MS_BITFIELD_LAYOUT &&
25711 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
25712 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
25715 /* Returns an expression indicating where the this parameter is
25716 located on entry to the FUNCTION. */
25719 x86_this_parameter (tree function)
25721 tree type = TREE_TYPE (function);
25722 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
25727 const int *parm_regs;
25729 if (ix86_function_type_abi (type) == MS_ABI)
25730 parm_regs = x86_64_ms_abi_int_parameter_registers;
25732 parm_regs = x86_64_int_parameter_registers;
25733 return gen_rtx_REG (DImode, parm_regs[aggr]);
25736 nregs = ix86_function_regparm (type, function);
25738 if (nregs > 0 && !stdarg_p (type))
25742 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
25743 regno = aggr ? DX_REG : CX_REG;
25751 return gen_rtx_MEM (SImode,
25752 plus_constant (stack_pointer_rtx, 4));
25755 return gen_rtx_REG (SImode, regno);
25758 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
25761 /* Determine whether x86_output_mi_thunk can succeed. */
25764 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
25765 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
25766 HOST_WIDE_INT vcall_offset, const_tree function)
25768 /* 64-bit can handle anything. */
25772 /* For 32-bit, everything's fine if we have one free register. */
25773 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
25776 /* Need a free register for vcall_offset. */
25780 /* Need a free register for GOT references. */
25781 if (flag_pic && !(*targetm.binds_local_p) (function))
25784 /* Otherwise ok. */
25788 /* Output the assembler code for a thunk function. THUNK_DECL is the
25789 declaration for the thunk function itself, FUNCTION is the decl for
25790 the target function. DELTA is an immediate constant offset to be
25791 added to THIS. If VCALL_OFFSET is nonzero, the word at
25792 *(*this + vcall_offset) should be added to THIS. */
25795 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
25796 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
25797 HOST_WIDE_INT vcall_offset, tree function)
25800 rtx this_param = x86_this_parameter (function);
25803 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
25804 pull it in now and let DELTA benefit. */
25805 if (REG_P (this_param))
25806 this_reg = this_param;
25807 else if (vcall_offset)
25809 /* Put the this parameter into %eax. */
25810 xops[0] = this_param;
25811 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
25812 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
25815 this_reg = NULL_RTX;
25817 /* Adjust the this parameter by a fixed constant. */
25820 xops[0] = GEN_INT (delta);
25821 xops[1] = this_reg ? this_reg : this_param;
25824 if (!x86_64_general_operand (xops[0], DImode))
25826 tmp = gen_rtx_REG (DImode, R10_REG);
25828 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
25830 xops[1] = this_param;
25832 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
25835 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
25838 /* Adjust the this parameter by a value stored in the vtable. */
25842 tmp = gen_rtx_REG (DImode, R10_REG);
25845 int tmp_regno = CX_REG;
25846 if (lookup_attribute ("fastcall",
25847 TYPE_ATTRIBUTES (TREE_TYPE (function))))
25848 tmp_regno = AX_REG;
25849 tmp = gen_rtx_REG (SImode, tmp_regno);
25852 xops[0] = gen_rtx_MEM (Pmode, this_reg);
25854 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
25856 /* Adjust the this parameter. */
25857 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
25858 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
25860 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
25861 xops[0] = GEN_INT (vcall_offset);
25863 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
25864 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
25866 xops[1] = this_reg;
25867 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
25870 /* If necessary, drop THIS back to its stack slot. */
25871 if (this_reg && this_reg != this_param)
25873 xops[0] = this_reg;
25874 xops[1] = this_param;
25875 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
25878 xops[0] = XEXP (DECL_RTL (function), 0);
25881 if (!flag_pic || (*targetm.binds_local_p) (function))
25882 output_asm_insn ("jmp\t%P0", xops);
25883 /* All thunks should be in the same object as their target,
25884 and thus binds_local_p should be true. */
25885 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
25886 gcc_unreachable ();
25889 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
25890 tmp = gen_rtx_CONST (Pmode, tmp);
25891 tmp = gen_rtx_MEM (QImode, tmp);
25893 output_asm_insn ("jmp\t%A0", xops);
25898 if (!flag_pic || (*targetm.binds_local_p) (function))
25899 output_asm_insn ("jmp\t%P0", xops);
25904 rtx sym_ref = XEXP (DECL_RTL (function), 0);
25905 tmp = (gen_rtx_SYMBOL_REF
25907 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
25908 tmp = gen_rtx_MEM (QImode, tmp);
25910 output_asm_insn ("jmp\t%0", xops);
25913 #endif /* TARGET_MACHO */
25915 tmp = gen_rtx_REG (SImode, CX_REG);
25916 output_set_got (tmp, NULL_RTX);
25919 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
25920 output_asm_insn ("jmp\t{*}%1", xops);
25926 x86_file_start (void)
25928 default_file_start ();
25930 darwin_file_start ();
25932 if (X86_FILE_START_VERSION_DIRECTIVE)
25933 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
25934 if (X86_FILE_START_FLTUSED)
25935 fputs ("\t.global\t__fltused\n", asm_out_file);
25936 if (ix86_asm_dialect == ASM_INTEL)
25937 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
25941 x86_field_alignment (tree field, int computed)
25943 enum machine_mode mode;
25944 tree type = TREE_TYPE (field);
25946 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
25948 mode = TYPE_MODE (strip_array_types (type));
25949 if (mode == DFmode || mode == DCmode
25950 || GET_MODE_CLASS (mode) == MODE_INT
25951 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
25952 return MIN (32, computed);
25956 /* Output assembler code to FILE to increment profiler label # LABELNO
25957 for profiling a function entry. */
25959 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
25963 #ifndef NO_PROFILE_COUNTERS
25964 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
25967 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
25968 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
25970 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
25974 #ifndef NO_PROFILE_COUNTERS
25975 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
25976 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
25978 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
25982 #ifndef NO_PROFILE_COUNTERS
25983 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
25984 PROFILE_COUNT_REGISTER);
25986 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
25990 /* We don't have exact information about the insn sizes, but we may assume
25991 quite safely that we are informed about all 1 byte insns and memory
25992 address sizes. This is enough to eliminate unnecessary padding in
25996 min_insn_size (rtx insn)
26000 if (!INSN_P (insn) || !active_insn_p (insn))
26003 /* Discard alignments we've emit and jump instructions. */
26004 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26005 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26008 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26009 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26012 /* Important case - calls are always 5 bytes.
26013 It is common to have many calls in the row. */
26015 && symbolic_reference_mentioned_p (PATTERN (insn))
26016 && !SIBLING_CALL_P (insn))
26018 if (get_attr_length (insn) <= 1)
26021 /* For normal instructions we may rely on the sizes of addresses
26022 and the presence of symbol to require 4 bytes of encoding.
26023 This is not the case for jumps where references are PC relative. */
26024 if (!JUMP_P (insn))
26026 l = get_attr_length_address (insn);
26027 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26036 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26040 ix86_avoid_jump_misspredicts (void)
26042 rtx insn, start = get_insns ();
26043 int nbytes = 0, njumps = 0;
26046 /* Look for all minimal intervals of instructions containing 4 jumps.
26047 The intervals are bounded by START and INSN. NBYTES is the total
26048 size of instructions in the interval including INSN and not including
26049 START. When the NBYTES is smaller than 16 bytes, it is possible
26050 that the end of START and INSN ends up in the same 16byte page.
26052 The smallest offset in the page INSN can start is the case where START
26053 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
26054 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
26056 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26059 nbytes += min_insn_size (insn);
26061 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
26062 INSN_UID (insn), min_insn_size (insn));
26064 && GET_CODE (PATTERN (insn)) != ADDR_VEC
26065 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26073 start = NEXT_INSN (start);
26074 if ((JUMP_P (start)
26075 && GET_CODE (PATTERN (start)) != ADDR_VEC
26076 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26078 njumps--, isjump = 1;
26081 nbytes -= min_insn_size (start);
26083 gcc_assert (njumps >= 0);
26085 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26086 INSN_UID (start), INSN_UID (insn), nbytes);
26088 if (njumps == 3 && isjump && nbytes < 16)
26090 int padsize = 15 - nbytes + min_insn_size (insn);
26093 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26094 INSN_UID (insn), padsize);
26095 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
26100 /* AMD Athlon works faster
26101 when RET is not destination of conditional jump or directly preceded
26102 by other jump instruction. We avoid the penalty by inserting NOP just
26103 before the RET instructions in such cases. */
26105 ix86_pad_returns (void)
26110 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26112 basic_block bb = e->src;
26113 rtx ret = BB_END (bb);
26115 bool replace = false;
26117 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26118 || optimize_bb_for_size_p (bb))
26120 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26121 if (active_insn_p (prev) || LABEL_P (prev))
26123 if (prev && LABEL_P (prev))
26128 FOR_EACH_EDGE (e, ei, bb->preds)
26129 if (EDGE_FREQUENCY (e) && e->src->index >= 0
26130 && !(e->flags & EDGE_FALLTHRU))
26135 prev = prev_active_insn (ret);
26137 && ((JUMP_P (prev) && any_condjump_p (prev))
26140 /* Empty functions get branch mispredict even when the jump destination
26141 is not visible to us. */
26142 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26147 emit_insn_before (gen_return_internal_long (), ret);
26153 /* Implement machine specific optimizations. We implement padding of returns
26154 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
26158 if (TARGET_PAD_RETURNS && optimize
26159 && optimize_function_for_speed_p (cfun))
26160 ix86_pad_returns ();
26161 if (TARGET_FOUR_JUMP_LIMIT && optimize
26162 && optimize_function_for_speed_p (cfun))
26163 ix86_avoid_jump_misspredicts ();
26166 /* Return nonzero when QImode register that must be represented via REX prefix
26169 x86_extended_QIreg_mentioned_p (rtx insn)
26172 extract_insn_cached (insn);
26173 for (i = 0; i < recog_data.n_operands; i++)
26174 if (REG_P (recog_data.operand[i])
26175 && REGNO (recog_data.operand[i]) >= 4)
26180 /* Return nonzero when P points to register encoded via REX prefix.
26181 Called via for_each_rtx. */
26183 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26185 unsigned int regno;
26188 regno = REGNO (*p);
26189 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26192 /* Return true when INSN mentions register that must be encoded using REX
26195 x86_extended_reg_mentioned_p (rtx insn)
26197 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26198 extended_reg_mentioned_1, NULL);
26201 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
26202 optabs would emit if we didn't have TFmode patterns. */
26205 x86_emit_floatuns (rtx operands[2])
26207 rtx neglab, donelab, i0, i1, f0, in, out;
26208 enum machine_mode mode, inmode;
26210 inmode = GET_MODE (operands[1]);
26211 gcc_assert (inmode == SImode || inmode == DImode);
26214 in = force_reg (inmode, operands[1]);
26215 mode = GET_MODE (out);
26216 neglab = gen_label_rtx ();
26217 donelab = gen_label_rtx ();
26218 f0 = gen_reg_rtx (mode);
26220 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26222 expand_float (out, in, 0);
26224 emit_jump_insn (gen_jump (donelab));
26227 emit_label (neglab);
26229 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26231 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26233 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26235 expand_float (f0, i0, 0);
26237 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
26239 emit_label (donelab);
26242 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26243 with all elements equal to VAR. Return true if successful. */
26246 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
26247 rtx target, rtx val)
26249 enum machine_mode hmode, smode, wsmode, wvmode;
26264 val = force_reg (GET_MODE_INNER (mode), val);
26265 x = gen_rtx_VEC_DUPLICATE (mode, val);
26266 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26272 if (TARGET_SSE || TARGET_3DNOW_A)
26274 val = gen_lowpart (SImode, val);
26275 x = gen_rtx_TRUNCATE (HImode, val);
26276 x = gen_rtx_VEC_DUPLICATE (mode, x);
26277 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26299 /* Extend HImode to SImode using a paradoxical SUBREG. */
26300 tmp1 = gen_reg_rtx (SImode);
26301 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26302 /* Insert the SImode value as low element of V4SImode vector. */
26303 tmp2 = gen_reg_rtx (V4SImode);
26304 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26305 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26306 CONST0_RTX (V4SImode),
26308 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26309 /* Cast the V4SImode vector back to a V8HImode vector. */
26310 tmp1 = gen_reg_rtx (V8HImode);
26311 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
26312 /* Duplicate the low short through the whole low SImode word. */
26313 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
26314 /* Cast the V8HImode vector back to a V4SImode vector. */
26315 tmp2 = gen_reg_rtx (V4SImode);
26316 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26317 /* Replicate the low element of the V4SImode vector. */
26318 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26319 /* Cast the V2SImode back to V8HImode, and store in target. */
26320 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
26331 /* Extend QImode to SImode using a paradoxical SUBREG. */
26332 tmp1 = gen_reg_rtx (SImode);
26333 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26334 /* Insert the SImode value as low element of V4SImode vector. */
26335 tmp2 = gen_reg_rtx (V4SImode);
26336 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26337 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26338 CONST0_RTX (V4SImode),
26340 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26341 /* Cast the V4SImode vector back to a V16QImode vector. */
26342 tmp1 = gen_reg_rtx (V16QImode);
26343 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
26344 /* Duplicate the low byte through the whole low SImode word. */
26345 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26346 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26347 /* Cast the V16QImode vector back to a V4SImode vector. */
26348 tmp2 = gen_reg_rtx (V4SImode);
26349 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26350 /* Replicate the low element of the V4SImode vector. */
26351 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26352 /* Cast the V2SImode back to V16QImode, and store in target. */
26353 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
26361 /* Replicate the value once into the next wider mode and recurse. */
26362 val = convert_modes (wsmode, smode, val, true);
26363 x = expand_simple_binop (wsmode, ASHIFT, val,
26364 GEN_INT (GET_MODE_BITSIZE (smode)),
26365 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26366 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
26368 x = gen_reg_rtx (wvmode);
26369 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
26370 gcc_unreachable ();
26371 emit_move_insn (target, gen_lowpart (mode, x));
26394 rtx tmp = gen_reg_rtx (hmode);
26395 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
26396 emit_insn (gen_rtx_SET (VOIDmode, target,
26397 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
26406 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26407 whose ONE_VAR element is VAR, and other elements are zero. Return true
26411 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
26412 rtx target, rtx var, int one_var)
26414 enum machine_mode vsimode;
26417 bool use_vector_set = false;
26422 /* For SSE4.1, we normally use vector set. But if the second
26423 element is zero and inter-unit moves are OK, we use movq
26425 use_vector_set = (TARGET_64BIT
26427 && !(TARGET_INTER_UNIT_MOVES
26433 use_vector_set = TARGET_SSE4_1;
26436 use_vector_set = TARGET_SSE2;
26439 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
26447 use_vector_set = TARGET_AVX;
26453 if (use_vector_set)
26455 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
26456 var = force_reg (GET_MODE_INNER (mode), var);
26457 ix86_expand_vector_set (mmx_ok, target, var, one_var);
26473 var = force_reg (GET_MODE_INNER (mode), var);
26474 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
26475 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26480 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
26481 new_target = gen_reg_rtx (mode);
26483 new_target = target;
26484 var = force_reg (GET_MODE_INNER (mode), var);
26485 x = gen_rtx_VEC_DUPLICATE (mode, var);
26486 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
26487 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
26490 /* We need to shuffle the value to the correct position, so
26491 create a new pseudo to store the intermediate result. */
26493 /* With SSE2, we can use the integer shuffle insns. */
26494 if (mode != V4SFmode && TARGET_SSE2)
26496 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
26498 GEN_INT (one_var == 1 ? 0 : 1),
26499 GEN_INT (one_var == 2 ? 0 : 1),
26500 GEN_INT (one_var == 3 ? 0 : 1)));
26501 if (target != new_target)
26502 emit_move_insn (target, new_target);
26506 /* Otherwise convert the intermediate result to V4SFmode and
26507 use the SSE1 shuffle instructions. */
26508 if (mode != V4SFmode)
26510 tmp = gen_reg_rtx (V4SFmode);
26511 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
26516 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
26518 GEN_INT (one_var == 1 ? 0 : 1),
26519 GEN_INT (one_var == 2 ? 0+4 : 1+4),
26520 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
26522 if (mode != V4SFmode)
26523 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
26524 else if (tmp != target)
26525 emit_move_insn (target, tmp);
26527 else if (target != new_target)
26528 emit_move_insn (target, new_target);
26533 vsimode = V4SImode;
26539 vsimode = V2SImode;
26545 /* Zero extend the variable element to SImode and recurse. */
26546 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
26548 x = gen_reg_rtx (vsimode);
26549 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
26551 gcc_unreachable ();
26553 emit_move_insn (target, gen_lowpart (mode, x));
26561 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26562 consisting of the values in VALS. It is known that all elements
26563 except ONE_VAR are constants. Return true if successful. */
26566 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
26567 rtx target, rtx vals, int one_var)
26569 rtx var = XVECEXP (vals, 0, one_var);
26570 enum machine_mode wmode;
26573 const_vec = copy_rtx (vals);
26574 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
26575 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
26583 /* For the two element vectors, it's just as easy to use
26584 the general case. */
26608 /* There's no way to set one QImode entry easily. Combine
26609 the variable value with its adjacent constant value, and
26610 promote to an HImode set. */
26611 x = XVECEXP (vals, 0, one_var ^ 1);
26614 var = convert_modes (HImode, QImode, var, true);
26615 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
26616 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26617 x = GEN_INT (INTVAL (x) & 0xff);
26621 var = convert_modes (HImode, QImode, var, true);
26622 x = gen_int_mode (INTVAL (x) << 8, HImode);
26624 if (x != const0_rtx)
26625 var = expand_simple_binop (HImode, IOR, var, x, var,
26626 1, OPTAB_LIB_WIDEN);
26628 x = gen_reg_rtx (wmode);
26629 emit_move_insn (x, gen_lowpart (wmode, const_vec));
26630 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
26632 emit_move_insn (target, gen_lowpart (mode, x));
26639 emit_move_insn (target, const_vec);
26640 ix86_expand_vector_set (mmx_ok, target, var, one_var);
26644 /* A subroutine of ix86_expand_vector_init_general. Use vector
26645 concatenate to handle the most general case: all values variable,
26646 and none identical. */
26649 ix86_expand_vector_init_concat (enum machine_mode mode,
26650 rtx target, rtx *ops, int n)
26652 enum machine_mode cmode, hmode = VOIDmode;
26653 rtx first[8], second[4];
26693 gcc_unreachable ();
26696 if (!register_operand (ops[1], cmode))
26697 ops[1] = force_reg (cmode, ops[1]);
26698 if (!register_operand (ops[0], cmode))
26699 ops[0] = force_reg (cmode, ops[0]);
26700 emit_insn (gen_rtx_SET (VOIDmode, target,
26701 gen_rtx_VEC_CONCAT (mode, ops[0],
26721 gcc_unreachable ();
26737 gcc_unreachable ();
26742 /* FIXME: We process inputs backward to help RA. PR 36222. */
26745 for (; i > 0; i -= 2, j--)
26747 first[j] = gen_reg_rtx (cmode);
26748 v = gen_rtvec (2, ops[i - 1], ops[i]);
26749 ix86_expand_vector_init (false, first[j],
26750 gen_rtx_PARALLEL (cmode, v));
26756 gcc_assert (hmode != VOIDmode);
26757 for (i = j = 0; i < n; i += 2, j++)
26759 second[j] = gen_reg_rtx (hmode);
26760 ix86_expand_vector_init_concat (hmode, second [j],
26764 ix86_expand_vector_init_concat (mode, target, second, n);
26767 ix86_expand_vector_init_concat (mode, target, first, n);
26771 gcc_unreachable ();
26775 /* A subroutine of ix86_expand_vector_init_general. Use vector
26776 interleave to handle the most general case: all values variable,
26777 and none identical. */
26780 ix86_expand_vector_init_interleave (enum machine_mode mode,
26781 rtx target, rtx *ops, int n)
26783 enum machine_mode first_imode, second_imode, third_imode;
26786 rtx (*gen_load_even) (rtx, rtx, rtx);
26787 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
26788 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
26793 gen_load_even = gen_vec_setv8hi;
26794 gen_interleave_first_low = gen_vec_interleave_lowv4si;
26795 gen_interleave_second_low = gen_vec_interleave_lowv2di;
26796 first_imode = V4SImode;
26797 second_imode = V2DImode;
26798 third_imode = VOIDmode;
26801 gen_load_even = gen_vec_setv16qi;
26802 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
26803 gen_interleave_second_low = gen_vec_interleave_lowv4si;
26804 first_imode = V8HImode;
26805 second_imode = V4SImode;
26806 third_imode = V2DImode;
26809 gcc_unreachable ();
26812 for (i = 0; i < n; i++)
26814 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
26815 op0 = gen_reg_rtx (SImode);
26816 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
26818 /* Insert the SImode value as low element of V4SImode vector. */
26819 op1 = gen_reg_rtx (V4SImode);
26820 op0 = gen_rtx_VEC_MERGE (V4SImode,
26821 gen_rtx_VEC_DUPLICATE (V4SImode,
26823 CONST0_RTX (V4SImode),
26825 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
26827 /* Cast the V4SImode vector back to a vector in orignal mode. */
26828 op0 = gen_reg_rtx (mode);
26829 emit_move_insn (op0, gen_lowpart (mode, op1));
26831 /* Load even elements into the second positon. */
26832 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
26835 /* Cast vector to FIRST_IMODE vector. */
26836 ops[i] = gen_reg_rtx (first_imode);
26837 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
26840 /* Interleave low FIRST_IMODE vectors. */
26841 for (i = j = 0; i < n; i += 2, j++)
26843 op0 = gen_reg_rtx (first_imode);
26844 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
26846 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
26847 ops[j] = gen_reg_rtx (second_imode);
26848 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
26851 /* Interleave low SECOND_IMODE vectors. */
26852 switch (second_imode)
26855 for (i = j = 0; i < n / 2; i += 2, j++)
26857 op0 = gen_reg_rtx (second_imode);
26858 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
26861 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
26863 ops[j] = gen_reg_rtx (third_imode);
26864 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
26866 second_imode = V2DImode;
26867 gen_interleave_second_low = gen_vec_interleave_lowv2di;
26871 op0 = gen_reg_rtx (second_imode);
26872 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
26875 /* Cast the SECOND_IMODE vector back to a vector on original
26877 emit_insn (gen_rtx_SET (VOIDmode, target,
26878 gen_lowpart (mode, op0)));
26882 gcc_unreachable ();
26886 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
26887 all values variable, and none identical. */
26890 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
26891 rtx target, rtx vals)
26893 rtx ops[32], op0, op1;
26894 enum machine_mode half_mode = VOIDmode;
26901 if (!mmx_ok && !TARGET_SSE)
26913 n = GET_MODE_NUNITS (mode);
26914 for (i = 0; i < n; i++)
26915 ops[i] = XVECEXP (vals, 0, i);
26916 ix86_expand_vector_init_concat (mode, target, ops, n);
26920 half_mode = V16QImode;
26924 half_mode = V8HImode;
26928 n = GET_MODE_NUNITS (mode);
26929 for (i = 0; i < n; i++)
26930 ops[i] = XVECEXP (vals, 0, i);
26931 op0 = gen_reg_rtx (half_mode);
26932 op1 = gen_reg_rtx (half_mode);
26933 ix86_expand_vector_init_interleave (half_mode, op0, ops,
26935 ix86_expand_vector_init_interleave (half_mode, op1,
26936 &ops [n >> 1], n >> 2);
26937 emit_insn (gen_rtx_SET (VOIDmode, target,
26938 gen_rtx_VEC_CONCAT (mode, op0, op1)));
26942 if (!TARGET_SSE4_1)
26950 n = GET_MODE_NUNITS (mode);
26951 for (i = 0; i < n; i++)
26952 ops[i] = XVECEXP (vals, 0, i);
26953 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
26961 gcc_unreachable ();
26965 int i, j, n_elts, n_words, n_elt_per_word;
26966 enum machine_mode inner_mode;
26967 rtx words[4], shift;
26969 inner_mode = GET_MODE_INNER (mode);
26970 n_elts = GET_MODE_NUNITS (mode);
26971 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
26972 n_elt_per_word = n_elts / n_words;
26973 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
26975 for (i = 0; i < n_words; ++i)
26977 rtx word = NULL_RTX;
26979 for (j = 0; j < n_elt_per_word; ++j)
26981 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
26982 elt = convert_modes (word_mode, inner_mode, elt, true);
26988 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
26989 word, 1, OPTAB_LIB_WIDEN);
26990 word = expand_simple_binop (word_mode, IOR, word, elt,
26991 word, 1, OPTAB_LIB_WIDEN);
26999 emit_move_insn (target, gen_lowpart (mode, words[0]));
27000 else if (n_words == 2)
27002 rtx tmp = gen_reg_rtx (mode);
27003 emit_clobber (tmp);
27004 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27005 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27006 emit_move_insn (target, tmp);
27008 else if (n_words == 4)
27010 rtx tmp = gen_reg_rtx (V4SImode);
27011 gcc_assert (word_mode == SImode);
27012 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27013 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27014 emit_move_insn (target, gen_lowpart (mode, tmp));
27017 gcc_unreachable ();
27021 /* Initialize vector TARGET via VALS. Suppress the use of MMX
27022 instructions unless MMX_OK is true. */
27025 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27027 enum machine_mode mode = GET_MODE (target);
27028 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27029 int n_elts = GET_MODE_NUNITS (mode);
27030 int n_var = 0, one_var = -1;
27031 bool all_same = true, all_const_zero = true;
27035 for (i = 0; i < n_elts; ++i)
27037 x = XVECEXP (vals, 0, i);
27038 if (!(CONST_INT_P (x)
27039 || GET_CODE (x) == CONST_DOUBLE
27040 || GET_CODE (x) == CONST_FIXED))
27041 n_var++, one_var = i;
27042 else if (x != CONST0_RTX (inner_mode))
27043 all_const_zero = false;
27044 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27048 /* Constants are best loaded from the constant pool. */
27051 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27055 /* If all values are identical, broadcast the value. */
27057 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27058 XVECEXP (vals, 0, 0)))
27061 /* Values where only one field is non-constant are best loaded from
27062 the pool and overwritten via move later. */
27066 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27067 XVECEXP (vals, 0, one_var),
27071 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27075 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27079 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27081 enum machine_mode mode = GET_MODE (target);
27082 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27083 enum machine_mode half_mode;
27084 bool use_vec_merge = false;
27086 static rtx (*gen_extract[6][2]) (rtx, rtx)
27088 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27089 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27090 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27091 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27092 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27093 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27095 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27097 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27098 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27099 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27100 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27101 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27102 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27112 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27113 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27115 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27117 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27118 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27124 use_vec_merge = TARGET_SSE4_1;
27132 /* For the two element vectors, we implement a VEC_CONCAT with
27133 the extraction of the other element. */
27135 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27136 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27139 op0 = val, op1 = tmp;
27141 op0 = tmp, op1 = val;
27143 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27144 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27149 use_vec_merge = TARGET_SSE4_1;
27156 use_vec_merge = true;
27160 /* tmp = target = A B C D */
27161 tmp = copy_to_reg (target);
27162 /* target = A A B B */
27163 emit_insn (gen_sse_unpcklps (target, target, target));
27164 /* target = X A B B */
27165 ix86_expand_vector_set (false, target, val, 0);
27166 /* target = A X C D */
27167 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27168 GEN_INT (1), GEN_INT (0),
27169 GEN_INT (2+4), GEN_INT (3+4)));
27173 /* tmp = target = A B C D */
27174 tmp = copy_to_reg (target);
27175 /* tmp = X B C D */
27176 ix86_expand_vector_set (false, tmp, val, 0);
27177 /* target = A B X D */
27178 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27179 GEN_INT (0), GEN_INT (1),
27180 GEN_INT (0+4), GEN_INT (3+4)));
27184 /* tmp = target = A B C D */
27185 tmp = copy_to_reg (target);
27186 /* tmp = X B C D */
27187 ix86_expand_vector_set (false, tmp, val, 0);
27188 /* target = A B X D */
27189 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27190 GEN_INT (0), GEN_INT (1),
27191 GEN_INT (2+4), GEN_INT (0+4)));
27195 gcc_unreachable ();
27200 use_vec_merge = TARGET_SSE4_1;
27204 /* Element 0 handled by vec_merge below. */
27207 use_vec_merge = true;
27213 /* With SSE2, use integer shuffles to swap element 0 and ELT,
27214 store into element 0, then shuffle them back. */
27218 order[0] = GEN_INT (elt);
27219 order[1] = const1_rtx;
27220 order[2] = const2_rtx;
27221 order[3] = GEN_INT (3);
27222 order[elt] = const0_rtx;
27224 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27225 order[1], order[2], order[3]));
27227 ix86_expand_vector_set (false, target, val, 0);
27229 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27230 order[1], order[2], order[3]));
27234 /* For SSE1, we have to reuse the V4SF code. */
27235 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
27236 gen_lowpart (SFmode, val), elt);
27241 use_vec_merge = TARGET_SSE2;
27244 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27248 use_vec_merge = TARGET_SSE4_1;
27255 half_mode = V16QImode;
27261 half_mode = V8HImode;
27267 half_mode = V4SImode;
27273 half_mode = V2DImode;
27279 half_mode = V4SFmode;
27285 half_mode = V2DFmode;
27291 /* Compute offset. */
27295 gcc_assert (i <= 1);
27297 /* Extract the half. */
27298 tmp = gen_reg_rtx (half_mode);
27299 emit_insn ((*gen_extract[j][i]) (tmp, target));
27301 /* Put val in tmp at elt. */
27302 ix86_expand_vector_set (false, tmp, val, elt);
27305 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
27314 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
27315 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
27316 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27320 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27322 emit_move_insn (mem, target);
27324 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27325 emit_move_insn (tmp, val);
27327 emit_move_insn (target, mem);
27332 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
27334 enum machine_mode mode = GET_MODE (vec);
27335 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27336 bool use_vec_extr = false;
27349 use_vec_extr = true;
27353 use_vec_extr = TARGET_SSE4_1;
27365 tmp = gen_reg_rtx (mode);
27366 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
27367 GEN_INT (elt), GEN_INT (elt),
27368 GEN_INT (elt+4), GEN_INT (elt+4)));
27372 tmp = gen_reg_rtx (mode);
27373 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
27377 gcc_unreachable ();
27380 use_vec_extr = true;
27385 use_vec_extr = TARGET_SSE4_1;
27399 tmp = gen_reg_rtx (mode);
27400 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
27401 GEN_INT (elt), GEN_INT (elt),
27402 GEN_INT (elt), GEN_INT (elt)));
27406 tmp = gen_reg_rtx (mode);
27407 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
27411 gcc_unreachable ();
27414 use_vec_extr = true;
27419 /* For SSE1, we have to reuse the V4SF code. */
27420 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
27421 gen_lowpart (V4SFmode, vec), elt);
27427 use_vec_extr = TARGET_SSE2;
27430 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27434 use_vec_extr = TARGET_SSE4_1;
27438 /* ??? Could extract the appropriate HImode element and shift. */
27445 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
27446 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
27448 /* Let the rtl optimizers know about the zero extension performed. */
27449 if (inner_mode == QImode || inner_mode == HImode)
27451 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
27452 target = gen_lowpart (SImode, target);
27455 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27459 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27461 emit_move_insn (mem, vec);
27463 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27464 emit_move_insn (target, tmp);
27468 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
27469 pattern to reduce; DEST is the destination; IN is the input vector. */
27472 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
27474 rtx tmp1, tmp2, tmp3;
27476 tmp1 = gen_reg_rtx (V4SFmode);
27477 tmp2 = gen_reg_rtx (V4SFmode);
27478 tmp3 = gen_reg_rtx (V4SFmode);
27480 emit_insn (gen_sse_movhlps (tmp1, in, in));
27481 emit_insn (fn (tmp2, tmp1, in));
27483 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
27484 GEN_INT (1), GEN_INT (1),
27485 GEN_INT (1+4), GEN_INT (1+4)));
27486 emit_insn (fn (dest, tmp2, tmp3));
27489 /* Target hook for scalar_mode_supported_p. */
27491 ix86_scalar_mode_supported_p (enum machine_mode mode)
27493 if (DECIMAL_FLOAT_MODE_P (mode))
27495 else if (mode == TFmode)
27498 return default_scalar_mode_supported_p (mode);
27501 /* Implements target hook vector_mode_supported_p. */
27503 ix86_vector_mode_supported_p (enum machine_mode mode)
27505 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
27507 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
27509 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
27511 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
27513 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
27518 /* Target hook for c_mode_for_suffix. */
27519 static enum machine_mode
27520 ix86_c_mode_for_suffix (char suffix)
27530 /* Worker function for TARGET_MD_ASM_CLOBBERS.
27532 We do this in the new i386 backend to maintain source compatibility
27533 with the old cc0-based compiler. */
27536 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
27537 tree inputs ATTRIBUTE_UNUSED,
27540 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
27542 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
27547 /* Implements target vector targetm.asm.encode_section_info. This
27548 is not used by netware. */
27550 static void ATTRIBUTE_UNUSED
27551 ix86_encode_section_info (tree decl, rtx rtl, int first)
27553 default_encode_section_info (decl, rtl, first);
27555 if (TREE_CODE (decl) == VAR_DECL
27556 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
27557 && ix86_in_large_data_p (decl))
27558 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
27561 /* Worker function for REVERSE_CONDITION. */
27564 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
27566 return (mode != CCFPmode && mode != CCFPUmode
27567 ? reverse_condition (code)
27568 : reverse_condition_maybe_unordered (code));
27571 /* Output code to perform an x87 FP register move, from OPERANDS[1]
27575 output_387_reg_move (rtx insn, rtx *operands)
27577 if (REG_P (operands[0]))
27579 if (REG_P (operands[1])
27580 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27582 if (REGNO (operands[0]) == FIRST_STACK_REG)
27583 return output_387_ffreep (operands, 0);
27584 return "fstp\t%y0";
27586 if (STACK_TOP_P (operands[0]))
27587 return "fld%z1\t%y1";
27590 else if (MEM_P (operands[0]))
27592 gcc_assert (REG_P (operands[1]));
27593 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27594 return "fstp%z0\t%y0";
27597 /* There is no non-popping store to memory for XFmode.
27598 So if we need one, follow the store with a load. */
27599 if (GET_MODE (operands[0]) == XFmode)
27600 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
27602 return "fst%z0\t%y0";
27609 /* Output code to perform a conditional jump to LABEL, if C2 flag in
27610 FP status register is set. */
27613 ix86_emit_fp_unordered_jump (rtx label)
27615 rtx reg = gen_reg_rtx (HImode);
27618 emit_insn (gen_x86_fnstsw_1 (reg));
27620 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
27622 emit_insn (gen_x86_sahf_1 (reg));
27624 temp = gen_rtx_REG (CCmode, FLAGS_REG);
27625 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
27629 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
27631 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27632 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
27635 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
27636 gen_rtx_LABEL_REF (VOIDmode, label),
27638 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
27640 emit_jump_insn (temp);
27641 predict_jump (REG_BR_PROB_BASE * 10 / 100);
27644 /* Output code to perform a log1p XFmode calculation. */
27646 void ix86_emit_i387_log1p (rtx op0, rtx op1)
27648 rtx label1 = gen_label_rtx ();
27649 rtx label2 = gen_label_rtx ();
27651 rtx tmp = gen_reg_rtx (XFmode);
27652 rtx tmp2 = gen_reg_rtx (XFmode);
27654 emit_insn (gen_absxf2 (tmp, op1));
27655 emit_insn (gen_cmpxf (tmp,
27656 CONST_DOUBLE_FROM_REAL_VALUE (
27657 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
27659 emit_jump_insn (gen_bge (label1));
27661 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
27662 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
27663 emit_jump (label2);
27665 emit_label (label1);
27666 emit_move_insn (tmp, CONST1_RTX (XFmode));
27667 emit_insn (gen_addxf3 (tmp, op1, tmp));
27668 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
27669 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
27671 emit_label (label2);
27674 /* Output code to perform a Newton-Rhapson approximation of a single precision
27675 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
27677 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
27679 rtx x0, x1, e0, e1, two;
27681 x0 = gen_reg_rtx (mode);
27682 e0 = gen_reg_rtx (mode);
27683 e1 = gen_reg_rtx (mode);
27684 x1 = gen_reg_rtx (mode);
27686 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
27688 if (VECTOR_MODE_P (mode))
27689 two = ix86_build_const_vector (SFmode, true, two);
27691 two = force_reg (mode, two);
27693 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
27695 /* x0 = rcp(b) estimate */
27696 emit_insn (gen_rtx_SET (VOIDmode, x0,
27697 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
27700 emit_insn (gen_rtx_SET (VOIDmode, e0,
27701 gen_rtx_MULT (mode, x0, b)));
27703 emit_insn (gen_rtx_SET (VOIDmode, e1,
27704 gen_rtx_MINUS (mode, two, e0)));
27706 emit_insn (gen_rtx_SET (VOIDmode, x1,
27707 gen_rtx_MULT (mode, x0, e1)));
27709 emit_insn (gen_rtx_SET (VOIDmode, res,
27710 gen_rtx_MULT (mode, a, x1)));
27713 /* Output code to perform a Newton-Rhapson approximation of a
27714 single precision floating point [reciprocal] square root. */
27716 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
27719 rtx x0, e0, e1, e2, e3, mthree, mhalf;
27722 x0 = gen_reg_rtx (mode);
27723 e0 = gen_reg_rtx (mode);
27724 e1 = gen_reg_rtx (mode);
27725 e2 = gen_reg_rtx (mode);
27726 e3 = gen_reg_rtx (mode);
27728 real_from_integer (&r, VOIDmode, -3, -1, 0);
27729 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
27731 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
27732 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
27734 if (VECTOR_MODE_P (mode))
27736 mthree = ix86_build_const_vector (SFmode, true, mthree);
27737 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
27740 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
27741 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
27743 /* x0 = rsqrt(a) estimate */
27744 emit_insn (gen_rtx_SET (VOIDmode, x0,
27745 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
27748 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
27753 zero = gen_reg_rtx (mode);
27754 mask = gen_reg_rtx (mode);
27756 zero = force_reg (mode, CONST0_RTX(mode));
27757 emit_insn (gen_rtx_SET (VOIDmode, mask,
27758 gen_rtx_NE (mode, zero, a)));
27760 emit_insn (gen_rtx_SET (VOIDmode, x0,
27761 gen_rtx_AND (mode, x0, mask)));
27765 emit_insn (gen_rtx_SET (VOIDmode, e0,
27766 gen_rtx_MULT (mode, x0, a)));
27768 emit_insn (gen_rtx_SET (VOIDmode, e1,
27769 gen_rtx_MULT (mode, e0, x0)));
27772 mthree = force_reg (mode, mthree);
27773 emit_insn (gen_rtx_SET (VOIDmode, e2,
27774 gen_rtx_PLUS (mode, e1, mthree)));
27776 mhalf = force_reg (mode, mhalf);
27778 /* e3 = -.5 * x0 */
27779 emit_insn (gen_rtx_SET (VOIDmode, e3,
27780 gen_rtx_MULT (mode, x0, mhalf)));
27782 /* e3 = -.5 * e0 */
27783 emit_insn (gen_rtx_SET (VOIDmode, e3,
27784 gen_rtx_MULT (mode, e0, mhalf)));
27785 /* ret = e2 * e3 */
27786 emit_insn (gen_rtx_SET (VOIDmode, res,
27787 gen_rtx_MULT (mode, e2, e3)));
27790 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
27792 static void ATTRIBUTE_UNUSED
27793 i386_solaris_elf_named_section (const char *name, unsigned int flags,
27796 /* With Binutils 2.15, the "@unwind" marker must be specified on
27797 every occurrence of the ".eh_frame" section, not just the first
27800 && strcmp (name, ".eh_frame") == 0)
27802 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
27803 flags & SECTION_WRITE ? "aw" : "a");
27806 default_elf_asm_named_section (name, flags, decl);
27809 /* Return the mangling of TYPE if it is an extended fundamental type. */
27811 static const char *
27812 ix86_mangle_type (const_tree type)
27814 type = TYPE_MAIN_VARIANT (type);
27816 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
27817 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
27820 switch (TYPE_MODE (type))
27823 /* __float128 is "g". */
27826 /* "long double" or __float80 is "e". */
27833 /* For 32-bit code we can save PIC register setup by using
27834 __stack_chk_fail_local hidden function instead of calling
27835 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
27836 register, so it is better to call __stack_chk_fail directly. */
27839 ix86_stack_protect_fail (void)
27841 return TARGET_64BIT
27842 ? default_external_stack_protect_fail ()
27843 : default_hidden_stack_protect_fail ();
27846 /* Select a format to encode pointers in exception handling data. CODE
27847 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
27848 true if the symbol may be affected by dynamic relocations.
27850 ??? All x86 object file formats are capable of representing this.
27851 After all, the relocation needed is the same as for the call insn.
27852 Whether or not a particular assembler allows us to enter such, I
27853 guess we'll have to see. */
27855 asm_preferred_eh_data_format (int code, int global)
27859 int type = DW_EH_PE_sdata8;
27861 || ix86_cmodel == CM_SMALL_PIC
27862 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
27863 type = DW_EH_PE_sdata4;
27864 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
27866 if (ix86_cmodel == CM_SMALL
27867 || (ix86_cmodel == CM_MEDIUM && code))
27868 return DW_EH_PE_udata4;
27869 return DW_EH_PE_absptr;
27872 /* Expand copysign from SIGN to the positive value ABS_VALUE
27873 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
27876 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
27878 enum machine_mode mode = GET_MODE (sign);
27879 rtx sgn = gen_reg_rtx (mode);
27880 if (mask == NULL_RTX)
27882 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
27883 if (!VECTOR_MODE_P (mode))
27885 /* We need to generate a scalar mode mask in this case. */
27886 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
27887 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
27888 mask = gen_reg_rtx (mode);
27889 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
27893 mask = gen_rtx_NOT (mode, mask);
27894 emit_insn (gen_rtx_SET (VOIDmode, sgn,
27895 gen_rtx_AND (mode, mask, sign)));
27896 emit_insn (gen_rtx_SET (VOIDmode, result,
27897 gen_rtx_IOR (mode, abs_value, sgn)));
27900 /* Expand fabs (OP0) and return a new rtx that holds the result. The
27901 mask for masking out the sign-bit is stored in *SMASK, if that is
27904 ix86_expand_sse_fabs (rtx op0, rtx *smask)
27906 enum machine_mode mode = GET_MODE (op0);
27909 xa = gen_reg_rtx (mode);
27910 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
27911 if (!VECTOR_MODE_P (mode))
27913 /* We need to generate a scalar mode mask in this case. */
27914 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
27915 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
27916 mask = gen_reg_rtx (mode);
27917 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
27919 emit_insn (gen_rtx_SET (VOIDmode, xa,
27920 gen_rtx_AND (mode, op0, mask)));
27928 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
27929 swapping the operands if SWAP_OPERANDS is true. The expanded
27930 code is a forward jump to a newly created label in case the
27931 comparison is true. The generated label rtx is returned. */
27933 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
27934 bool swap_operands)
27945 label = gen_label_rtx ();
27946 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
27947 emit_insn (gen_rtx_SET (VOIDmode, tmp,
27948 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
27949 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
27950 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
27951 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
27952 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
27953 JUMP_LABEL (tmp) = label;
27958 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
27959 using comparison code CODE. Operands are swapped for the comparison if
27960 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
27962 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
27963 bool swap_operands)
27965 enum machine_mode mode = GET_MODE (op0);
27966 rtx mask = gen_reg_rtx (mode);
27975 if (mode == DFmode)
27976 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
27977 gen_rtx_fmt_ee (code, mode, op0, op1)));
27979 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
27980 gen_rtx_fmt_ee (code, mode, op0, op1)));
27985 /* Generate and return a rtx of mode MODE for 2**n where n is the number
27986 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
27988 ix86_gen_TWO52 (enum machine_mode mode)
27990 REAL_VALUE_TYPE TWO52r;
27993 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
27994 TWO52 = const_double_from_real_value (TWO52r, mode);
27995 TWO52 = force_reg (mode, TWO52);
28000 /* Expand SSE sequence for computing lround from OP1 storing
28003 ix86_expand_lround (rtx op0, rtx op1)
28005 /* C code for the stuff we're doing below:
28006 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28009 enum machine_mode mode = GET_MODE (op1);
28010 const struct real_format *fmt;
28011 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28014 /* load nextafter (0.5, 0.0) */
28015 fmt = REAL_MODE_FORMAT (mode);
28016 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28017 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28019 /* adj = copysign (0.5, op1) */
28020 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28021 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28023 /* adj = op1 + adj */
28024 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28026 /* op0 = (imode)adj */
28027 expand_fix (op0, adj, 0);
28030 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28033 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28035 /* C code for the stuff we're doing below (for do_floor):
28037 xi -= (double)xi > op1 ? 1 : 0;
28040 enum machine_mode fmode = GET_MODE (op1);
28041 enum machine_mode imode = GET_MODE (op0);
28042 rtx ireg, freg, label, tmp;
28044 /* reg = (long)op1 */
28045 ireg = gen_reg_rtx (imode);
28046 expand_fix (ireg, op1, 0);
28048 /* freg = (double)reg */
28049 freg = gen_reg_rtx (fmode);
28050 expand_float (freg, ireg, 0);
28052 /* ireg = (freg > op1) ? ireg - 1 : ireg */
28053 label = ix86_expand_sse_compare_and_jump (UNLE,
28054 freg, op1, !do_floor);
28055 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28056 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28057 emit_move_insn (ireg, tmp);
28059 emit_label (label);
28060 LABEL_NUSES (label) = 1;
28062 emit_move_insn (op0, ireg);
28065 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28066 result in OPERAND0. */
28068 ix86_expand_rint (rtx operand0, rtx operand1)
28070 /* C code for the stuff we're doing below:
28071 xa = fabs (operand1);
28072 if (!isless (xa, 2**52))
28074 xa = xa + 2**52 - 2**52;
28075 return copysign (xa, operand1);
28077 enum machine_mode mode = GET_MODE (operand0);
28078 rtx res, xa, label, TWO52, mask;
28080 res = gen_reg_rtx (mode);
28081 emit_move_insn (res, operand1);
28083 /* xa = abs (operand1) */
28084 xa = ix86_expand_sse_fabs (res, &mask);
28086 /* if (!isless (xa, TWO52)) goto label; */
28087 TWO52 = ix86_gen_TWO52 (mode);
28088 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28090 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28091 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28093 ix86_sse_copysign_to_positive (res, xa, res, mask);
28095 emit_label (label);
28096 LABEL_NUSES (label) = 1;
28098 emit_move_insn (operand0, res);
28101 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28104 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28106 /* C code for the stuff we expand below.
28107 double xa = fabs (x), x2;
28108 if (!isless (xa, TWO52))
28110 xa = xa + TWO52 - TWO52;
28111 x2 = copysign (xa, x);
28120 enum machine_mode mode = GET_MODE (operand0);
28121 rtx xa, TWO52, tmp, label, one, res, mask;
28123 TWO52 = ix86_gen_TWO52 (mode);
28125 /* Temporary for holding the result, initialized to the input
28126 operand to ease control flow. */
28127 res = gen_reg_rtx (mode);
28128 emit_move_insn (res, operand1);
28130 /* xa = abs (operand1) */
28131 xa = ix86_expand_sse_fabs (res, &mask);
28133 /* if (!isless (xa, TWO52)) goto label; */
28134 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28136 /* xa = xa + TWO52 - TWO52; */
28137 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28138 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28140 /* xa = copysign (xa, operand1) */
28141 ix86_sse_copysign_to_positive (xa, xa, res, mask);
28143 /* generate 1.0 or -1.0 */
28144 one = force_reg (mode,
28145 const_double_from_real_value (do_floor
28146 ? dconst1 : dconstm1, mode));
28148 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28149 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28150 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28151 gen_rtx_AND (mode, one, tmp)));
28152 /* We always need to subtract here to preserve signed zero. */
28153 tmp = expand_simple_binop (mode, MINUS,
28154 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28155 emit_move_insn (res, tmp);
28157 emit_label (label);
28158 LABEL_NUSES (label) = 1;
28160 emit_move_insn (operand0, res);
28163 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28166 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28168 /* C code for the stuff we expand below.
28169 double xa = fabs (x), x2;
28170 if (!isless (xa, TWO52))
28172 x2 = (double)(long)x;
28179 if (HONOR_SIGNED_ZEROS (mode))
28180 return copysign (x2, x);
28183 enum machine_mode mode = GET_MODE (operand0);
28184 rtx xa, xi, TWO52, tmp, label, one, res, mask;
28186 TWO52 = ix86_gen_TWO52 (mode);
28188 /* Temporary for holding the result, initialized to the input
28189 operand to ease control flow. */
28190 res = gen_reg_rtx (mode);
28191 emit_move_insn (res, operand1);
28193 /* xa = abs (operand1) */
28194 xa = ix86_expand_sse_fabs (res, &mask);
28196 /* if (!isless (xa, TWO52)) goto label; */
28197 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28199 /* xa = (double)(long)x */
28200 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28201 expand_fix (xi, res, 0);
28202 expand_float (xa, xi, 0);
28205 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28207 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28208 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28209 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28210 gen_rtx_AND (mode, one, tmp)));
28211 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28212 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28213 emit_move_insn (res, tmp);
28215 if (HONOR_SIGNED_ZEROS (mode))
28216 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28218 emit_label (label);
28219 LABEL_NUSES (label) = 1;
28221 emit_move_insn (operand0, res);
28224 /* Expand SSE sequence for computing round from OPERAND1 storing
28225 into OPERAND0. Sequence that works without relying on DImode truncation
28226 via cvttsd2siq that is only available on 64bit targets. */
28228 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
28230 /* C code for the stuff we expand below.
28231 double xa = fabs (x), xa2, x2;
28232 if (!isless (xa, TWO52))
28234 Using the absolute value and copying back sign makes
28235 -0.0 -> -0.0 correct.
28236 xa2 = xa + TWO52 - TWO52;
28241 else if (dxa > 0.5)
28243 x2 = copysign (xa2, x);
28246 enum machine_mode mode = GET_MODE (operand0);
28247 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
28249 TWO52 = ix86_gen_TWO52 (mode);
28251 /* Temporary for holding the result, initialized to the input
28252 operand to ease control flow. */
28253 res = gen_reg_rtx (mode);
28254 emit_move_insn (res, operand1);
28256 /* xa = abs (operand1) */
28257 xa = ix86_expand_sse_fabs (res, &mask);
28259 /* if (!isless (xa, TWO52)) goto label; */
28260 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28262 /* xa2 = xa + TWO52 - TWO52; */
28263 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28264 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
28266 /* dxa = xa2 - xa; */
28267 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
28269 /* generate 0.5, 1.0 and -0.5 */
28270 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
28271 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
28272 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
28276 tmp = gen_reg_rtx (mode);
28277 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
28278 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
28279 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28280 gen_rtx_AND (mode, one, tmp)));
28281 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28282 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
28283 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
28284 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28285 gen_rtx_AND (mode, one, tmp)));
28286 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28288 /* res = copysign (xa2, operand1) */
28289 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
28291 emit_label (label);
28292 LABEL_NUSES (label) = 1;
28294 emit_move_insn (operand0, res);
28297 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28300 ix86_expand_trunc (rtx operand0, rtx operand1)
28302 /* C code for SSE variant we expand below.
28303 double xa = fabs (x), x2;
28304 if (!isless (xa, TWO52))
28306 x2 = (double)(long)x;
28307 if (HONOR_SIGNED_ZEROS (mode))
28308 return copysign (x2, x);
28311 enum machine_mode mode = GET_MODE (operand0);
28312 rtx xa, xi, TWO52, label, res, mask;
28314 TWO52 = ix86_gen_TWO52 (mode);
28316 /* Temporary for holding the result, initialized to the input
28317 operand to ease control flow. */
28318 res = gen_reg_rtx (mode);
28319 emit_move_insn (res, operand1);
28321 /* xa = abs (operand1) */
28322 xa = ix86_expand_sse_fabs (res, &mask);
28324 /* if (!isless (xa, TWO52)) goto label; */
28325 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28327 /* x = (double)(long)x */
28328 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28329 expand_fix (xi, res, 0);
28330 expand_float (res, xi, 0);
28332 if (HONOR_SIGNED_ZEROS (mode))
28333 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28335 emit_label (label);
28336 LABEL_NUSES (label) = 1;
28338 emit_move_insn (operand0, res);
28341 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28344 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
28346 enum machine_mode mode = GET_MODE (operand0);
28347 rtx xa, mask, TWO52, label, one, res, smask, tmp;
28349 /* C code for SSE variant we expand below.
28350 double xa = fabs (x), x2;
28351 if (!isless (xa, TWO52))
28353 xa2 = xa + TWO52 - TWO52;
28357 x2 = copysign (xa2, x);
28361 TWO52 = ix86_gen_TWO52 (mode);
28363 /* Temporary for holding the result, initialized to the input
28364 operand to ease control flow. */
28365 res = gen_reg_rtx (mode);
28366 emit_move_insn (res, operand1);
28368 /* xa = abs (operand1) */
28369 xa = ix86_expand_sse_fabs (res, &smask);
28371 /* if (!isless (xa, TWO52)) goto label; */
28372 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28374 /* res = xa + TWO52 - TWO52; */
28375 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28376 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
28377 emit_move_insn (res, tmp);
28380 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28382 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
28383 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
28384 emit_insn (gen_rtx_SET (VOIDmode, mask,
28385 gen_rtx_AND (mode, mask, one)));
28386 tmp = expand_simple_binop (mode, MINUS,
28387 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
28388 emit_move_insn (res, tmp);
28390 /* res = copysign (res, operand1) */
28391 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
28393 emit_label (label);
28394 LABEL_NUSES (label) = 1;
28396 emit_move_insn (operand0, res);
28399 /* Expand SSE sequence for computing round from OPERAND1 storing
28402 ix86_expand_round (rtx operand0, rtx operand1)
28404 /* C code for the stuff we're doing below:
28405 double xa = fabs (x);
28406 if (!isless (xa, TWO52))
28408 xa = (double)(long)(xa + nextafter (0.5, 0.0));
28409 return copysign (xa, x);
28411 enum machine_mode mode = GET_MODE (operand0);
28412 rtx res, TWO52, xa, label, xi, half, mask;
28413 const struct real_format *fmt;
28414 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28416 /* Temporary for holding the result, initialized to the input
28417 operand to ease control flow. */
28418 res = gen_reg_rtx (mode);
28419 emit_move_insn (res, operand1);
28421 TWO52 = ix86_gen_TWO52 (mode);
28422 xa = ix86_expand_sse_fabs (res, &mask);
28423 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28425 /* load nextafter (0.5, 0.0) */
28426 fmt = REAL_MODE_FORMAT (mode);
28427 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28428 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28430 /* xa = xa + 0.5 */
28431 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
28432 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
28434 /* xa = (double)(int64_t)xa */
28435 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28436 expand_fix (xi, xa, 0);
28437 expand_float (xa, xi, 0);
28439 /* res = copysign (xa, operand1) */
28440 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
28442 emit_label (label);
28443 LABEL_NUSES (label) = 1;
28445 emit_move_insn (operand0, res);
28449 /* Validate whether a SSE5 instruction is valid or not.
28450 OPERANDS is the array of operands.
28451 NUM is the number of operands.
28452 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
28453 NUM_MEMORY is the maximum number of memory operands to accept.
28454 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
28457 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
28458 bool uses_oc0, int num_memory, bool commutative)
28464 /* Count the number of memory arguments */
28467 for (i = 0; i < num; i++)
28469 enum machine_mode mode = GET_MODE (operands[i]);
28470 if (register_operand (operands[i], mode))
28473 else if (memory_operand (operands[i], mode))
28475 mem_mask |= (1 << i);
28481 rtx pattern = PATTERN (insn);
28483 /* allow 0 for pcmov */
28484 if (GET_CODE (pattern) != SET
28485 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
28487 || operands[i] != CONST0_RTX (mode))
28492 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
28493 a memory operation. */
28494 if (num_memory < 0)
28496 num_memory = -num_memory;
28497 if ((mem_mask & (1 << (num-1))) != 0)
28499 mem_mask &= ~(1 << (num-1));
28504 /* If there were no memory operations, allow the insn */
28508 /* Do not allow the destination register to be a memory operand. */
28509 else if (mem_mask & (1 << 0))
28512 /* If there are too many memory operations, disallow the instruction. While
28513 the hardware only allows 1 memory reference, before register allocation
28514 for some insns, we allow two memory operations sometimes in order to allow
28515 code like the following to be optimized:
28517 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
28519 or similar cases that are vectorized into using the fmaddss
28521 else if (mem_count > num_memory)
28524 /* Don't allow more than one memory operation if not optimizing. */
28525 else if (mem_count > 1 && !optimize)
28528 else if (num == 4 && mem_count == 1)
28530 /* formats (destination is the first argument), example fmaddss:
28531 xmm1, xmm1, xmm2, xmm3/mem
28532 xmm1, xmm1, xmm2/mem, xmm3
28533 xmm1, xmm2, xmm3/mem, xmm1
28534 xmm1, xmm2/mem, xmm3, xmm1 */
28536 return ((mem_mask == (1 << 1))
28537 || (mem_mask == (1 << 2))
28538 || (mem_mask == (1 << 3)));
28540 /* format, example pmacsdd:
28541 xmm1, xmm2, xmm3/mem, xmm1 */
28543 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
28545 return (mem_mask == (1 << 2));
28548 else if (num == 4 && num_memory == 2)
28550 /* If there are two memory operations, we can load one of the memory ops
28551 into the destination register. This is for optimizing the
28552 multiply/add ops, which the combiner has optimized both the multiply
28553 and the add insns to have a memory operation. We have to be careful
28554 that the destination doesn't overlap with the inputs. */
28555 rtx op0 = operands[0];
28557 if (reg_mentioned_p (op0, operands[1])
28558 || reg_mentioned_p (op0, operands[2])
28559 || reg_mentioned_p (op0, operands[3]))
28562 /* formats (destination is the first argument), example fmaddss:
28563 xmm1, xmm1, xmm2, xmm3/mem
28564 xmm1, xmm1, xmm2/mem, xmm3
28565 xmm1, xmm2, xmm3/mem, xmm1
28566 xmm1, xmm2/mem, xmm3, xmm1
28568 For the oc0 case, we will load either operands[1] or operands[3] into
28569 operands[0], so any combination of 2 memory operands is ok. */
28573 /* format, example pmacsdd:
28574 xmm1, xmm2, xmm3/mem, xmm1
28576 For the integer multiply/add instructions be more restrictive and
28577 require operands[2] and operands[3] to be the memory operands. */
28579 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
28581 return (mem_mask == ((1 << 2) | (1 << 3)));
28584 else if (num == 3 && num_memory == 1)
28586 /* formats, example protb:
28587 xmm1, xmm2, xmm3/mem
28588 xmm1, xmm2/mem, xmm3 */
28590 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
28592 /* format, example comeq:
28593 xmm1, xmm2, xmm3/mem */
28595 return (mem_mask == (1 << 2));
28599 gcc_unreachable ();
28605 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
28606 hardware will allow by using the destination register to load one of the
28607 memory operations. Presently this is used by the multiply/add routines to
28608 allow 2 memory references. */
28611 ix86_expand_sse5_multiple_memory (rtx operands[],
28613 enum machine_mode mode)
28615 rtx op0 = operands[0];
28617 || memory_operand (op0, mode)
28618 || reg_mentioned_p (op0, operands[1])
28619 || reg_mentioned_p (op0, operands[2])
28620 || reg_mentioned_p (op0, operands[3]))
28621 gcc_unreachable ();
28623 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
28624 the destination register. */
28625 if (memory_operand (operands[1], mode))
28627 emit_move_insn (op0, operands[1]);
28630 else if (memory_operand (operands[3], mode))
28632 emit_move_insn (op0, operands[3]);
28636 gcc_unreachable ();
28642 /* Table of valid machine attributes. */
28643 static const struct attribute_spec ix86_attribute_table[] =
28645 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
28646 /* Stdcall attribute says callee is responsible for popping arguments
28647 if they are not variable. */
28648 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28649 /* Fastcall attribute says callee is responsible for popping arguments
28650 if they are not variable. */
28651 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28652 /* Cdecl attribute says the callee is a normal C declaration */
28653 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28654 /* Regparm attribute specifies how many integer arguments are to be
28655 passed in registers. */
28656 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
28657 /* Sseregparm attribute says we are using x86_64 calling conventions
28658 for FP arguments. */
28659 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28660 /* force_align_arg_pointer says this function realigns the stack at entry. */
28661 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
28662 false, true, true, ix86_handle_cconv_attribute },
28663 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28664 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
28665 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
28666 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
28668 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28669 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28670 #ifdef SUBTARGET_ATTRIBUTE_TABLE
28671 SUBTARGET_ATTRIBUTE_TABLE,
28673 /* ms_abi and sysv_abi calling convention function attributes. */
28674 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28675 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28677 { NULL, 0, 0, false, false, false, NULL }
28680 /* Implement targetm.vectorize.builtin_vectorization_cost. */
28682 x86_builtin_vectorization_cost (bool runtime_test)
28684 /* If the branch of the runtime test is taken - i.e. - the vectorized
28685 version is skipped - this incurs a misprediction cost (because the
28686 vectorized version is expected to be the fall-through). So we subtract
28687 the latency of a mispredicted branch from the costs that are incured
28688 when the vectorized version is executed.
28690 TODO: The values in individual target tables have to be tuned or new
28691 fields may be needed. For eg. on K8, the default branch path is the
28692 not-taken path. If the taken path is predicted correctly, the minimum
28693 penalty of going down the taken-path is 1 cycle. If the taken-path is
28694 not predicted correctly, then the minimum penalty is 10 cycles. */
28698 return (-(ix86_cost->cond_taken_branch_cost));
28704 /* This function returns the calling abi specific va_list type node.
28705 It returns the FNDECL specific va_list type. */
28708 ix86_fn_abi_va_list (tree fndecl)
28713 return va_list_type_node;
28714 gcc_assert (fndecl != NULL_TREE);
28715 abi = ix86_function_abi ((const_tree) fndecl);
28718 return ms_va_list_type_node;
28720 return sysv_va_list_type_node;
28723 /* Returns the canonical va_list type specified by TYPE. If there
28724 is no valid TYPE provided, it return NULL_TREE. */
28727 ix86_canonical_va_list_type (tree type)
28731 /* Resolve references and pointers to va_list type. */
28732 if (INDIRECT_REF_P (type))
28733 type = TREE_TYPE (type);
28734 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
28735 type = TREE_TYPE (type);
28739 wtype = va_list_type_node;
28740 gcc_assert (wtype != NULL_TREE);
28742 if (TREE_CODE (wtype) == ARRAY_TYPE)
28744 /* If va_list is an array type, the argument may have decayed
28745 to a pointer type, e.g. by being passed to another function.
28746 In that case, unwrap both types so that we can compare the
28747 underlying records. */
28748 if (TREE_CODE (htype) == ARRAY_TYPE
28749 || POINTER_TYPE_P (htype))
28751 wtype = TREE_TYPE (wtype);
28752 htype = TREE_TYPE (htype);
28755 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28756 return va_list_type_node;
28757 wtype = sysv_va_list_type_node;
28758 gcc_assert (wtype != NULL_TREE);
28760 if (TREE_CODE (wtype) == ARRAY_TYPE)
28762 /* If va_list is an array type, the argument may have decayed
28763 to a pointer type, e.g. by being passed to another function.
28764 In that case, unwrap both types so that we can compare the
28765 underlying records. */
28766 if (TREE_CODE (htype) == ARRAY_TYPE
28767 || POINTER_TYPE_P (htype))
28769 wtype = TREE_TYPE (wtype);
28770 htype = TREE_TYPE (htype);
28773 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28774 return sysv_va_list_type_node;
28775 wtype = ms_va_list_type_node;
28776 gcc_assert (wtype != NULL_TREE);
28778 if (TREE_CODE (wtype) == ARRAY_TYPE)
28780 /* If va_list is an array type, the argument may have decayed
28781 to a pointer type, e.g. by being passed to another function.
28782 In that case, unwrap both types so that we can compare the
28783 underlying records. */
28784 if (TREE_CODE (htype) == ARRAY_TYPE
28785 || POINTER_TYPE_P (htype))
28787 wtype = TREE_TYPE (wtype);
28788 htype = TREE_TYPE (htype);
28791 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28792 return ms_va_list_type_node;
28795 return std_canonical_va_list_type (type);
28798 /* Iterate through the target-specific builtin types for va_list.
28799 IDX denotes the iterator, *PTREE is set to the result type of
28800 the va_list builtin, and *PNAME to its internal type.
28801 Returns zero if there is no element for this index, otherwise
28802 IDX should be increased upon the next call.
28803 Note, do not iterate a base builtin's name like __builtin_va_list.
28804 Used from c_common_nodes_and_builtins. */
28807 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
28813 *ptree = ms_va_list_type_node;
28814 *pname = "__builtin_ms_va_list";
28817 *ptree = sysv_va_list_type_node;
28818 *pname = "__builtin_sysv_va_list";
28826 /* Initialize the GCC target structure. */
28827 #undef TARGET_RETURN_IN_MEMORY
28828 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
28830 #undef TARGET_ATTRIBUTE_TABLE
28831 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
28832 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28833 # undef TARGET_MERGE_DECL_ATTRIBUTES
28834 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
28837 #undef TARGET_COMP_TYPE_ATTRIBUTES
28838 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
28840 #undef TARGET_INIT_BUILTINS
28841 #define TARGET_INIT_BUILTINS ix86_init_builtins
28842 #undef TARGET_EXPAND_BUILTIN
28843 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
28845 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
28846 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
28847 ix86_builtin_vectorized_function
28849 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
28850 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
28852 #undef TARGET_BUILTIN_RECIPROCAL
28853 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
28855 #undef TARGET_ASM_FUNCTION_EPILOGUE
28856 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
28858 #undef TARGET_ENCODE_SECTION_INFO
28859 #ifndef SUBTARGET_ENCODE_SECTION_INFO
28860 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
28862 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
28865 #undef TARGET_ASM_OPEN_PAREN
28866 #define TARGET_ASM_OPEN_PAREN ""
28867 #undef TARGET_ASM_CLOSE_PAREN
28868 #define TARGET_ASM_CLOSE_PAREN ""
28870 #undef TARGET_ASM_ALIGNED_HI_OP
28871 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
28872 #undef TARGET_ASM_ALIGNED_SI_OP
28873 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
28875 #undef TARGET_ASM_ALIGNED_DI_OP
28876 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
28879 #undef TARGET_ASM_UNALIGNED_HI_OP
28880 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
28881 #undef TARGET_ASM_UNALIGNED_SI_OP
28882 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
28883 #undef TARGET_ASM_UNALIGNED_DI_OP
28884 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
28886 #undef TARGET_SCHED_ADJUST_COST
28887 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
28888 #undef TARGET_SCHED_ISSUE_RATE
28889 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
28890 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
28891 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
28892 ia32_multipass_dfa_lookahead
28894 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
28895 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
28898 #undef TARGET_HAVE_TLS
28899 #define TARGET_HAVE_TLS true
28901 #undef TARGET_CANNOT_FORCE_CONST_MEM
28902 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
28903 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
28904 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
28906 #undef TARGET_DELEGITIMIZE_ADDRESS
28907 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
28909 #undef TARGET_MS_BITFIELD_LAYOUT_P
28910 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
28913 #undef TARGET_BINDS_LOCAL_P
28914 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
28916 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28917 #undef TARGET_BINDS_LOCAL_P
28918 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
28921 #undef TARGET_ASM_OUTPUT_MI_THUNK
28922 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
28923 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
28924 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
28926 #undef TARGET_ASM_FILE_START
28927 #define TARGET_ASM_FILE_START x86_file_start
28929 #undef TARGET_DEFAULT_TARGET_FLAGS
28930 #define TARGET_DEFAULT_TARGET_FLAGS \
28932 | TARGET_SUBTARGET_DEFAULT \
28933 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
28935 #undef TARGET_HANDLE_OPTION
28936 #define TARGET_HANDLE_OPTION ix86_handle_option
28938 #undef TARGET_RTX_COSTS
28939 #define TARGET_RTX_COSTS ix86_rtx_costs
28940 #undef TARGET_ADDRESS_COST
28941 #define TARGET_ADDRESS_COST ix86_address_cost
28943 #undef TARGET_FIXED_CONDITION_CODE_REGS
28944 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
28945 #undef TARGET_CC_MODES_COMPATIBLE
28946 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
28948 #undef TARGET_MACHINE_DEPENDENT_REORG
28949 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
28951 #undef TARGET_BUILD_BUILTIN_VA_LIST
28952 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
28954 #undef TARGET_FN_ABI_VA_LIST
28955 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
28957 #undef TARGET_CANONICAL_VA_LIST_TYPE
28958 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
28960 #undef TARGET_EXPAND_BUILTIN_VA_START
28961 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
28963 #undef TARGET_MD_ASM_CLOBBERS
28964 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
28966 #undef TARGET_PROMOTE_PROTOTYPES
28967 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
28968 #undef TARGET_STRUCT_VALUE_RTX
28969 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
28970 #undef TARGET_SETUP_INCOMING_VARARGS
28971 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
28972 #undef TARGET_MUST_PASS_IN_STACK
28973 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
28974 #undef TARGET_PASS_BY_REFERENCE
28975 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
28976 #undef TARGET_INTERNAL_ARG_POINTER
28977 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
28978 #undef TARGET_UPDATE_STACK_BOUNDARY
28979 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
28980 #undef TARGET_GET_DRAP_RTX
28981 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
28982 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
28983 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
28984 #undef TARGET_STRICT_ARGUMENT_NAMING
28985 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
28987 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
28988 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
28990 #undef TARGET_SCALAR_MODE_SUPPORTED_P
28991 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
28993 #undef TARGET_VECTOR_MODE_SUPPORTED_P
28994 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
28996 #undef TARGET_C_MODE_FOR_SUFFIX
28997 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29000 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29001 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29004 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29005 #undef TARGET_INSERT_ATTRIBUTES
29006 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
29009 #undef TARGET_MANGLE_TYPE
29010 #define TARGET_MANGLE_TYPE ix86_mangle_type
29012 #undef TARGET_STACK_PROTECT_FAIL
29013 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
29015 #undef TARGET_FUNCTION_VALUE
29016 #define TARGET_FUNCTION_VALUE ix86_function_value
29018 #undef TARGET_SECONDARY_RELOAD
29019 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
29021 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
29022 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
29024 #undef TARGET_SET_CURRENT_FUNCTION
29025 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
29027 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
29028 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_option_attribute_p
29030 #undef TARGET_OPTION_SAVE
29031 #define TARGET_OPTION_SAVE ix86_function_specific_save
29033 #undef TARGET_OPTION_RESTORE
29034 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
29036 #undef TARGET_OPTION_PRINT
29037 #define TARGET_OPTION_PRINT ix86_function_specific_print
29039 #undef TARGET_OPTION_CAN_INLINE_P
29040 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
29042 #undef TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION
29043 #define TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION true
29045 #undef TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION
29046 #define TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION true
29048 struct gcc_target targetm = TARGET_INITIALIZER;
29050 #include "gt-i386.h"