1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1761 HOST_WIDE_INT frame;
1763 int outgoing_arguments_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1974 /* Define a set of ISAs which aren't available when a given ISA is
1975 disabled. MMX and SSE ISAs are handled separately. */
1977 #define OPTION_MASK_ISA_MMX_UNSET \
1978 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1979 #define OPTION_MASK_ISA_3DNOW_UNSET \
1980 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1981 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1983 #define OPTION_MASK_ISA_SSE_UNSET \
1984 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1985 #define OPTION_MASK_ISA_SSE2_UNSET \
1986 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1987 #define OPTION_MASK_ISA_SSE3_UNSET \
1988 (OPTION_MASK_ISA_SSE3 \
1989 | OPTION_MASK_ISA_SSSE3_UNSET \
1990 | OPTION_MASK_ISA_SSE4A_UNSET )
1991 #define OPTION_MASK_ISA_SSSE3_UNSET \
1992 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1993 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1994 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1995 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1996 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1997 #define OPTION_MASK_ISA_AVX_UNSET \
1998 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1999 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2001 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2003 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2005 #define OPTION_MASK_ISA_SSE4A_UNSET \
2006 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2007 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2008 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2009 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2010 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2011 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2012 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2013 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2014 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2016 /* Vectorization library interface and handlers. */
2017 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2018 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2019 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2021 /* Processor target table, indexed by processor number */
2024 const struct processor_costs *cost; /* Processor costs */
2025 const int align_loop; /* Default alignments. */
2026 const int align_loop_max_skip;
2027 const int align_jump;
2028 const int align_jump_max_skip;
2029 const int align_func;
2032 static const struct ptt processor_target_table[PROCESSOR_max] =
2034 {&i386_cost, 4, 3, 4, 3, 4},
2035 {&i486_cost, 16, 15, 16, 15, 16},
2036 {&pentium_cost, 16, 7, 16, 7, 16},
2037 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2038 {&geode_cost, 0, 0, 0, 0, 0},
2039 {&k6_cost, 32, 7, 32, 7, 32},
2040 {&athlon_cost, 16, 7, 16, 7, 16},
2041 {&pentium4_cost, 0, 0, 0, 0, 0},
2042 {&k8_cost, 16, 7, 16, 7, 16},
2043 {&nocona_cost, 0, 0, 0, 0, 0},
2044 {&core2_cost, 16, 10, 16, 10, 16},
2045 {&generic32_cost, 16, 7, 16, 7, 16},
2046 {&generic64_cost, 16, 10, 16, 10, 16},
2047 {&amdfam10_cost, 32, 24, 32, 7, 32},
2048 {&atom_cost, 16, 7, 16, 7, 16}
2051 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2077 /* Implement TARGET_HANDLE_OPTION. */
2080 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2087 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2088 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2092 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2093 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2100 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2101 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2105 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2106 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2116 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2117 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2121 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2122 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2129 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2134 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2135 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2142 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2143 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2147 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2148 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2155 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2156 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2160 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2161 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2168 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2169 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2173 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2174 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2181 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2182 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2186 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2187 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2194 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2195 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2199 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2200 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2207 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2208 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2212 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2213 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2218 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2219 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2223 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2224 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2230 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2231 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2235 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2236 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2243 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2244 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2248 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2249 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2256 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2257 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2261 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2262 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2269 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2270 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2274 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2275 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2282 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2283 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2287 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2288 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2295 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2296 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2300 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2301 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2308 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2309 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2313 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2314 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2321 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2322 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2326 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2327 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2334 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2335 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2339 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2340 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2349 /* Return a string the documents the current -m options. The caller is
2350 responsible for freeing the string. */
2353 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2354 const char *fpmath, bool add_nl_p)
2356 struct ix86_target_opts
2358 const char *option; /* option string */
2359 int mask; /* isa mask options */
2362 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2363 preceding options while match those first. */
2364 static struct ix86_target_opts isa_opts[] =
2366 { "-m64", OPTION_MASK_ISA_64BIT },
2367 { "-msse5", OPTION_MASK_ISA_SSE5 },
2368 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2369 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2370 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2371 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2372 { "-msse3", OPTION_MASK_ISA_SSE3 },
2373 { "-msse2", OPTION_MASK_ISA_SSE2 },
2374 { "-msse", OPTION_MASK_ISA_SSE },
2375 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2376 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2377 { "-mmmx", OPTION_MASK_ISA_MMX },
2378 { "-mabm", OPTION_MASK_ISA_ABM },
2379 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2380 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2381 { "-maes", OPTION_MASK_ISA_AES },
2382 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2386 static struct ix86_target_opts flag_opts[] =
2388 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2389 { "-m80387", MASK_80387 },
2390 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2391 { "-malign-double", MASK_ALIGN_DOUBLE },
2392 { "-mcld", MASK_CLD },
2393 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2394 { "-mieee-fp", MASK_IEEE_FP },
2395 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2396 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2397 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2398 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2399 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2400 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2401 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2402 { "-mno-red-zone", MASK_NO_RED_ZONE },
2403 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2404 { "-mrecip", MASK_RECIP },
2405 { "-mrtd", MASK_RTD },
2406 { "-msseregparm", MASK_SSEREGPARM },
2407 { "-mstack-arg-probe", MASK_STACK_PROBE },
2408 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2411 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2414 char target_other[40];
2423 memset (opts, '\0', sizeof (opts));
2425 /* Add -march= option. */
2428 opts[num][0] = "-march=";
2429 opts[num++][1] = arch;
2432 /* Add -mtune= option. */
2435 opts[num][0] = "-mtune=";
2436 opts[num++][1] = tune;
2439 /* Pick out the options in isa options. */
2440 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2442 if ((isa & isa_opts[i].mask) != 0)
2444 opts[num++][0] = isa_opts[i].option;
2445 isa &= ~ isa_opts[i].mask;
2449 if (isa && add_nl_p)
2451 opts[num++][0] = isa_other;
2452 sprintf (isa_other, "(other isa: 0x%x)", isa);
2455 /* Add flag options. */
2456 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2458 if ((flags & flag_opts[i].mask) != 0)
2460 opts[num++][0] = flag_opts[i].option;
2461 flags &= ~ flag_opts[i].mask;
2465 if (flags && add_nl_p)
2467 opts[num++][0] = target_other;
2468 sprintf (target_other, "(other flags: 0x%x)", isa);
2471 /* Add -fpmath= option. */
2474 opts[num][0] = "-mfpmath=";
2475 opts[num++][1] = fpmath;
2482 gcc_assert (num < ARRAY_SIZE (opts));
2484 /* Size the string. */
2486 sep_len = (add_nl_p) ? 3 : 1;
2487 for (i = 0; i < num; i++)
2490 for (j = 0; j < 2; j++)
2492 len += strlen (opts[i][j]);
2495 /* Build the string. */
2496 ret = ptr = (char *) xmalloc (len);
2499 for (i = 0; i < num; i++)
2503 for (j = 0; j < 2; j++)
2504 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2511 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2519 for (j = 0; j < 2; j++)
2522 memcpy (ptr, opts[i][j], len2[j]);
2524 line_len += len2[j];
2529 gcc_assert (ret + len >= ptr);
2534 /* Function that is callable from the debugger to print the current
2537 ix86_debug_options (void)
2539 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2540 ix86_arch_string, ix86_tune_string,
2541 ix86_fpmath_string, true);
2545 fprintf (stderr, "%s\n\n", opts);
2549 fprintf (stderr, "<no options>\n\n");
2554 /* Sometimes certain combinations of command options do not make
2555 sense on a particular target machine. You can define a macro
2556 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2557 defined, is executed once just after all the command options have
2560 Don't use this macro to turn on various extra optimizations for
2561 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2564 override_options (bool main_args_p)
2567 unsigned int ix86_arch_mask, ix86_tune_mask;
2572 /* Comes from final.c -- no real reason to change it. */
2573 #define MAX_CODE_ALIGN 16
2581 PTA_PREFETCH_SSE = 1 << 4,
2583 PTA_3DNOW_A = 1 << 6,
2587 PTA_POPCNT = 1 << 10,
2589 PTA_SSE4A = 1 << 12,
2590 PTA_NO_SAHF = 1 << 13,
2591 PTA_SSE4_1 = 1 << 14,
2592 PTA_SSE4_2 = 1 << 15,
2595 PTA_PCLMUL = 1 << 18,
2603 const char *const name; /* processor name or nickname. */
2604 const enum processor_type processor;
2605 const enum attr_cpu schedule;
2606 const unsigned /*enum pta_flags*/ flags;
2608 const processor_alias_table[] =
2610 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2611 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2612 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2613 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2614 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2615 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2616 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2617 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2618 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2619 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2620 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2621 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2622 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2624 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2626 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2627 PTA_MMX | PTA_SSE | PTA_SSE2},
2628 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2629 PTA_MMX |PTA_SSE | PTA_SSE2},
2630 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2631 PTA_MMX | PTA_SSE | PTA_SSE2},
2632 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2633 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2634 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2635 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2636 | PTA_CX16 | PTA_NO_SAHF},
2637 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2638 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2639 | PTA_SSSE3 | PTA_CX16},
2640 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2641 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2642 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2643 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2644 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2645 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2646 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2647 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2648 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2649 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2650 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2651 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2652 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2653 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2654 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2655 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2656 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2657 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2658 {"x86-64", PROCESSOR_K8, CPU_K8,
2659 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2660 {"k8", PROCESSOR_K8, CPU_K8,
2661 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2662 | PTA_SSE2 | PTA_NO_SAHF},
2663 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2664 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2665 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2666 {"opteron", PROCESSOR_K8, CPU_K8,
2667 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2668 | PTA_SSE2 | PTA_NO_SAHF},
2669 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2670 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2671 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2672 {"athlon64", PROCESSOR_K8, CPU_K8,
2673 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2674 | PTA_SSE2 | PTA_NO_SAHF},
2675 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2676 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2677 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2678 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2679 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2680 | PTA_SSE2 | PTA_NO_SAHF},
2681 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2682 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2683 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2684 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2685 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2686 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2687 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2688 0 /* flags are only used for -march switch. */ },
2689 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2690 PTA_64BIT /* flags are only used for -march switch. */ },
2693 int const pta_size = ARRAY_SIZE (processor_alias_table);
2695 /* Set up prefix/suffix so the error messages refer to either the command
2696 line argument, or the attribute(target). */
2705 prefix = "option(\"";
2710 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2711 SUBTARGET_OVERRIDE_OPTIONS;
2714 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2715 SUBSUBTARGET_OVERRIDE_OPTIONS;
2718 /* -fPIC is the default for x86_64. */
2719 if (TARGET_MACHO && TARGET_64BIT)
2722 /* Set the default values for switches whose default depends on TARGET_64BIT
2723 in case they weren't overwritten by command line options. */
2726 /* Mach-O doesn't support omitting the frame pointer for now. */
2727 if (flag_omit_frame_pointer == 2)
2728 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2729 if (flag_asynchronous_unwind_tables == 2)
2730 flag_asynchronous_unwind_tables = 1;
2731 if (flag_pcc_struct_return == 2)
2732 flag_pcc_struct_return = 0;
2736 if (flag_omit_frame_pointer == 2)
2737 flag_omit_frame_pointer = 0;
2738 if (flag_asynchronous_unwind_tables == 2)
2739 flag_asynchronous_unwind_tables = 0;
2740 if (flag_pcc_struct_return == 2)
2741 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2744 /* Need to check -mtune=generic first. */
2745 if (ix86_tune_string)
2747 if (!strcmp (ix86_tune_string, "generic")
2748 || !strcmp (ix86_tune_string, "i686")
2749 /* As special support for cross compilers we read -mtune=native
2750 as -mtune=generic. With native compilers we won't see the
2751 -mtune=native, as it was changed by the driver. */
2752 || !strcmp (ix86_tune_string, "native"))
2755 ix86_tune_string = "generic64";
2757 ix86_tune_string = "generic32";
2759 /* If this call is for setting the option attribute, allow the
2760 generic32/generic64 that was previously set. */
2761 else if (!main_args_p
2762 && (!strcmp (ix86_tune_string, "generic32")
2763 || !strcmp (ix86_tune_string, "generic64")))
2765 else if (!strncmp (ix86_tune_string, "generic", 7))
2766 error ("bad value (%s) for %stune=%s %s",
2767 ix86_tune_string, prefix, suffix, sw);
2771 if (ix86_arch_string)
2772 ix86_tune_string = ix86_arch_string;
2773 if (!ix86_tune_string)
2775 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2776 ix86_tune_defaulted = 1;
2779 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2780 need to use a sensible tune option. */
2781 if (!strcmp (ix86_tune_string, "generic")
2782 || !strcmp (ix86_tune_string, "x86-64")
2783 || !strcmp (ix86_tune_string, "i686"))
2786 ix86_tune_string = "generic64";
2788 ix86_tune_string = "generic32";
2791 if (ix86_stringop_string)
2793 if (!strcmp (ix86_stringop_string, "rep_byte"))
2794 stringop_alg = rep_prefix_1_byte;
2795 else if (!strcmp (ix86_stringop_string, "libcall"))
2796 stringop_alg = libcall;
2797 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2798 stringop_alg = rep_prefix_4_byte;
2799 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2801 /* rep; movq isn't available in 32-bit code. */
2802 stringop_alg = rep_prefix_8_byte;
2803 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2804 stringop_alg = loop_1_byte;
2805 else if (!strcmp (ix86_stringop_string, "loop"))
2806 stringop_alg = loop;
2807 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2808 stringop_alg = unrolled_loop;
2810 error ("bad value (%s) for %sstringop-strategy=%s %s",
2811 ix86_stringop_string, prefix, suffix, sw);
2813 if (!strcmp (ix86_tune_string, "x86-64"))
2814 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2815 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2816 prefix, suffix, prefix, suffix, prefix, suffix);
2818 if (!ix86_arch_string)
2819 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2821 ix86_arch_specified = 1;
2823 if (!strcmp (ix86_arch_string, "generic"))
2824 error ("generic CPU can be used only for %stune=%s %s",
2825 prefix, suffix, sw);
2826 if (!strncmp (ix86_arch_string, "generic", 7))
2827 error ("bad value (%s) for %sarch=%s %s",
2828 ix86_arch_string, prefix, suffix, sw);
2830 /* Validate -mabi= value. */
2831 if (ix86_abi_string)
2833 if (strcmp (ix86_abi_string, "sysv") == 0)
2834 ix86_abi = SYSV_ABI;
2835 else if (strcmp (ix86_abi_string, "ms") == 0)
2838 error ("unknown ABI (%s) for %sabi=%s %s",
2839 ix86_abi_string, prefix, suffix, sw);
2842 ix86_abi = DEFAULT_ABI;
2844 if (ix86_cmodel_string != 0)
2846 if (!strcmp (ix86_cmodel_string, "small"))
2847 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2848 else if (!strcmp (ix86_cmodel_string, "medium"))
2849 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2850 else if (!strcmp (ix86_cmodel_string, "large"))
2851 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2853 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2854 else if (!strcmp (ix86_cmodel_string, "32"))
2855 ix86_cmodel = CM_32;
2856 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2857 ix86_cmodel = CM_KERNEL;
2859 error ("bad value (%s) for %scmodel=%s %s",
2860 ix86_cmodel_string, prefix, suffix, sw);
2864 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2865 use of rip-relative addressing. This eliminates fixups that
2866 would otherwise be needed if this object is to be placed in a
2867 DLL, and is essentially just as efficient as direct addressing. */
2868 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2869 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2870 else if (TARGET_64BIT)
2871 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2873 ix86_cmodel = CM_32;
2875 if (ix86_asm_string != 0)
2878 && !strcmp (ix86_asm_string, "intel"))
2879 ix86_asm_dialect = ASM_INTEL;
2880 else if (!strcmp (ix86_asm_string, "att"))
2881 ix86_asm_dialect = ASM_ATT;
2883 error ("bad value (%s) for %sasm=%s %s",
2884 ix86_asm_string, prefix, suffix, sw);
2886 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2887 error ("code model %qs not supported in the %s bit mode",
2888 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2889 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2890 sorry ("%i-bit mode not compiled in",
2891 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2893 for (i = 0; i < pta_size; i++)
2894 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2896 ix86_schedule = processor_alias_table[i].schedule;
2897 ix86_arch = processor_alias_table[i].processor;
2898 /* Default cpu tuning to the architecture. */
2899 ix86_tune = ix86_arch;
2901 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2902 error ("CPU you selected does not support x86-64 "
2905 if (processor_alias_table[i].flags & PTA_MMX
2906 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2907 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2908 if (processor_alias_table[i].flags & PTA_3DNOW
2909 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2910 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2911 if (processor_alias_table[i].flags & PTA_3DNOW_A
2912 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2913 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2914 if (processor_alias_table[i].flags & PTA_SSE
2915 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2916 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2917 if (processor_alias_table[i].flags & PTA_SSE2
2918 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2919 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2920 if (processor_alias_table[i].flags & PTA_SSE3
2921 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2922 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2923 if (processor_alias_table[i].flags & PTA_SSSE3
2924 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2925 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2926 if (processor_alias_table[i].flags & PTA_SSE4_1
2927 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2928 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2929 if (processor_alias_table[i].flags & PTA_SSE4_2
2930 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2931 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2932 if (processor_alias_table[i].flags & PTA_AVX
2933 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2934 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2935 if (processor_alias_table[i].flags & PTA_FMA
2936 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2937 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2938 if (processor_alias_table[i].flags & PTA_SSE4A
2939 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2940 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2941 if (processor_alias_table[i].flags & PTA_SSE5
2942 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2943 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2944 if (processor_alias_table[i].flags & PTA_ABM
2945 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2946 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2947 if (processor_alias_table[i].flags & PTA_CX16
2948 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2949 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2950 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2951 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2952 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2953 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2954 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2955 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2956 if (processor_alias_table[i].flags & PTA_MOVBE
2957 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
2958 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
2959 if (processor_alias_table[i].flags & PTA_AES
2960 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2961 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2962 if (processor_alias_table[i].flags & PTA_PCLMUL
2963 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2964 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2965 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2966 x86_prefetch_sse = true;
2972 error ("bad value (%s) for %sarch=%s %s",
2973 ix86_arch_string, prefix, suffix, sw);
2975 ix86_arch_mask = 1u << ix86_arch;
2976 for (i = 0; i < X86_ARCH_LAST; ++i)
2977 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2979 for (i = 0; i < pta_size; i++)
2980 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2982 ix86_schedule = processor_alias_table[i].schedule;
2983 ix86_tune = processor_alias_table[i].processor;
2984 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2986 if (ix86_tune_defaulted)
2988 ix86_tune_string = "x86-64";
2989 for (i = 0; i < pta_size; i++)
2990 if (! strcmp (ix86_tune_string,
2991 processor_alias_table[i].name))
2993 ix86_schedule = processor_alias_table[i].schedule;
2994 ix86_tune = processor_alias_table[i].processor;
2997 error ("CPU you selected does not support x86-64 "
3000 /* Intel CPUs have always interpreted SSE prefetch instructions as
3001 NOPs; so, we can enable SSE prefetch instructions even when
3002 -mtune (rather than -march) points us to a processor that has them.
3003 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3004 higher processors. */
3006 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3007 x86_prefetch_sse = true;
3011 error ("bad value (%s) for %stune=%s %s",
3012 ix86_tune_string, prefix, suffix, sw);
3014 ix86_tune_mask = 1u << ix86_tune;
3015 for (i = 0; i < X86_TUNE_LAST; ++i)
3016 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3019 ix86_cost = &ix86_size_cost;
3021 ix86_cost = processor_target_table[ix86_tune].cost;
3023 /* Arrange to set up i386_stack_locals for all functions. */
3024 init_machine_status = ix86_init_machine_status;
3026 /* Validate -mregparm= value. */
3027 if (ix86_regparm_string)
3030 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3031 i = atoi (ix86_regparm_string);
3032 if (i < 0 || i > REGPARM_MAX)
3033 error ("%sregparm=%d%s is not between 0 and %d",
3034 prefix, i, suffix, REGPARM_MAX);
3039 ix86_regparm = REGPARM_MAX;
3041 /* If the user has provided any of the -malign-* options,
3042 warn and use that value only if -falign-* is not set.
3043 Remove this code in GCC 3.2 or later. */
3044 if (ix86_align_loops_string)
3046 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3047 prefix, suffix, suffix);
3048 if (align_loops == 0)
3050 i = atoi (ix86_align_loops_string);
3051 if (i < 0 || i > MAX_CODE_ALIGN)
3052 error ("%salign-loops=%d%s is not between 0 and %d",
3053 prefix, i, suffix, MAX_CODE_ALIGN);
3055 align_loops = 1 << i;
3059 if (ix86_align_jumps_string)
3061 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3062 prefix, suffix, suffix);
3063 if (align_jumps == 0)
3065 i = atoi (ix86_align_jumps_string);
3066 if (i < 0 || i > MAX_CODE_ALIGN)
3067 error ("%salign-loops=%d%s is not between 0 and %d",
3068 prefix, i, suffix, MAX_CODE_ALIGN);
3070 align_jumps = 1 << i;
3074 if (ix86_align_funcs_string)
3076 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3077 prefix, suffix, suffix);
3078 if (align_functions == 0)
3080 i = atoi (ix86_align_funcs_string);
3081 if (i < 0 || i > MAX_CODE_ALIGN)
3082 error ("%salign-loops=%d%s is not between 0 and %d",
3083 prefix, i, suffix, MAX_CODE_ALIGN);
3085 align_functions = 1 << i;
3089 /* Default align_* from the processor table. */
3090 if (align_loops == 0)
3092 align_loops = processor_target_table[ix86_tune].align_loop;
3093 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3095 if (align_jumps == 0)
3097 align_jumps = processor_target_table[ix86_tune].align_jump;
3098 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3100 if (align_functions == 0)
3102 align_functions = processor_target_table[ix86_tune].align_func;
3105 /* Validate -mbranch-cost= value, or provide default. */
3106 ix86_branch_cost = ix86_cost->branch_cost;
3107 if (ix86_branch_cost_string)
3109 i = atoi (ix86_branch_cost_string);
3111 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3113 ix86_branch_cost = i;
3115 if (ix86_section_threshold_string)
3117 i = atoi (ix86_section_threshold_string);
3119 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3121 ix86_section_threshold = i;
3124 if (ix86_tls_dialect_string)
3126 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3127 ix86_tls_dialect = TLS_DIALECT_GNU;
3128 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3129 ix86_tls_dialect = TLS_DIALECT_GNU2;
3130 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3131 ix86_tls_dialect = TLS_DIALECT_SUN;
3133 error ("bad value (%s) for %stls-dialect=%s %s",
3134 ix86_tls_dialect_string, prefix, suffix, sw);
3137 if (ix87_precision_string)
3139 i = atoi (ix87_precision_string);
3140 if (i != 32 && i != 64 && i != 80)
3141 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3146 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3148 /* Enable by default the SSE and MMX builtins. Do allow the user to
3149 explicitly disable any of these. In particular, disabling SSE and
3150 MMX for kernel code is extremely useful. */
3151 if (!ix86_arch_specified)
3153 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3154 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3157 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3161 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3163 if (!ix86_arch_specified)
3165 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3167 /* i386 ABI does not specify red zone. It still makes sense to use it
3168 when programmer takes care to stack from being destroyed. */
3169 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3170 target_flags |= MASK_NO_RED_ZONE;
3173 /* Keep nonleaf frame pointers. */
3174 if (flag_omit_frame_pointer)
3175 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3176 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3177 flag_omit_frame_pointer = 1;
3179 /* If we're doing fast math, we don't care about comparison order
3180 wrt NaNs. This lets us use a shorter comparison sequence. */
3181 if (flag_finite_math_only)
3182 target_flags &= ~MASK_IEEE_FP;
3184 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3185 since the insns won't need emulation. */
3186 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3187 target_flags &= ~MASK_NO_FANCY_MATH_387;
3189 /* Likewise, if the target doesn't have a 387, or we've specified
3190 software floating point, don't use 387 inline intrinsics. */
3192 target_flags |= MASK_NO_FANCY_MATH_387;
3194 /* Turn on MMX builtins for -msse. */
3197 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3198 x86_prefetch_sse = true;
3201 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3202 if (TARGET_SSE4_2 || TARGET_ABM)
3203 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3205 /* Validate -mpreferred-stack-boundary= value or default it to
3206 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3207 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3208 if (ix86_preferred_stack_boundary_string)
3210 i = atoi (ix86_preferred_stack_boundary_string);
3211 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3212 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3213 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3215 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3218 /* Set the default value for -mstackrealign. */
3219 if (ix86_force_align_arg_pointer == -1)
3220 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3222 /* Validate -mincoming-stack-boundary= value or default it to
3223 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3224 if (ix86_force_align_arg_pointer)
3225 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3227 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3228 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3229 if (ix86_incoming_stack_boundary_string)
3231 i = atoi (ix86_incoming_stack_boundary_string);
3232 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3233 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3234 i, TARGET_64BIT ? 4 : 2);
3237 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3238 ix86_incoming_stack_boundary
3239 = ix86_user_incoming_stack_boundary;
3243 /* Accept -msseregparm only if at least SSE support is enabled. */
3244 if (TARGET_SSEREGPARM
3246 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3248 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3249 if (ix86_fpmath_string != 0)
3251 if (! strcmp (ix86_fpmath_string, "387"))
3252 ix86_fpmath = FPMATH_387;
3253 else if (! strcmp (ix86_fpmath_string, "sse"))
3257 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3258 ix86_fpmath = FPMATH_387;
3261 ix86_fpmath = FPMATH_SSE;
3263 else if (! strcmp (ix86_fpmath_string, "387,sse")
3264 || ! strcmp (ix86_fpmath_string, "387+sse")
3265 || ! strcmp (ix86_fpmath_string, "sse,387")
3266 || ! strcmp (ix86_fpmath_string, "sse+387")
3267 || ! strcmp (ix86_fpmath_string, "both"))
3271 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3272 ix86_fpmath = FPMATH_387;
3274 else if (!TARGET_80387)
3276 warning (0, "387 instruction set disabled, using SSE arithmetics");
3277 ix86_fpmath = FPMATH_SSE;
3280 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3283 error ("bad value (%s) for %sfpmath=%s %s",
3284 ix86_fpmath_string, prefix, suffix, sw);
3287 /* If the i387 is disabled, then do not return values in it. */
3289 target_flags &= ~MASK_FLOAT_RETURNS;
3291 /* Use external vectorized library in vectorizing intrinsics. */
3292 if (ix86_veclibabi_string)
3294 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3295 ix86_veclib_handler = ix86_veclibabi_svml;
3296 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3297 ix86_veclib_handler = ix86_veclibabi_acml;
3299 error ("unknown vectorization library ABI type (%s) for "
3300 "%sveclibabi=%s %s", ix86_veclibabi_string,
3301 prefix, suffix, sw);
3304 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3305 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3307 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3309 /* ??? Unwind info is not correct around the CFG unless either a frame
3310 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3311 unwind info generation to be aware of the CFG and propagating states
3313 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3314 || flag_exceptions || flag_non_call_exceptions)
3315 && flag_omit_frame_pointer
3316 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3318 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3319 warning (0, "unwind tables currently require either a frame pointer "
3320 "or %saccumulate-outgoing-args%s for correctness",
3322 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3325 /* If stack probes are required, the space used for large function
3326 arguments on the stack must also be probed, so enable
3327 -maccumulate-outgoing-args so this happens in the prologue. */
3328 if (TARGET_STACK_PROBE
3329 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3331 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3332 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3333 "for correctness", prefix, suffix);
3334 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3337 /* For sane SSE instruction set generation we need fcomi instruction.
3338 It is safe to enable all CMOVE instructions. */
3342 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3345 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3346 p = strchr (internal_label_prefix, 'X');
3347 internal_label_prefix_len = p - internal_label_prefix;
3351 /* When scheduling description is not available, disable scheduler pass
3352 so it won't slow down the compilation and make x87 code slower. */
3353 if (!TARGET_SCHEDULE)
3354 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3356 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3357 set_param_value ("simultaneous-prefetches",
3358 ix86_cost->simultaneous_prefetches);
3359 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3360 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3361 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3362 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3363 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3364 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3366 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3367 can be optimized to ap = __builtin_next_arg (0). */
3369 targetm.expand_builtin_va_start = NULL;
3373 ix86_gen_leave = gen_leave_rex64;
3374 ix86_gen_pop1 = gen_popdi1;
3375 ix86_gen_add3 = gen_adddi3;
3376 ix86_gen_sub3 = gen_subdi3;
3377 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3378 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3379 ix86_gen_monitor = gen_sse3_monitor64;
3380 ix86_gen_andsp = gen_anddi3;
3384 ix86_gen_leave = gen_leave;
3385 ix86_gen_pop1 = gen_popsi1;
3386 ix86_gen_add3 = gen_addsi3;
3387 ix86_gen_sub3 = gen_subsi3;
3388 ix86_gen_sub3_carry = gen_subsi3_carry;
3389 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3390 ix86_gen_monitor = gen_sse3_monitor;
3391 ix86_gen_andsp = gen_andsi3;
3395 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3397 target_flags |= MASK_CLD & ~target_flags_explicit;
3400 /* Save the initial options in case the user does function specific options */
3402 target_option_default_node = target_option_current_node
3403 = build_target_option_node ();
3406 /* Save the current options */
3409 ix86_function_specific_save (struct cl_target_option *ptr)
3411 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3412 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3413 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3414 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3415 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3417 ptr->arch = ix86_arch;
3418 ptr->schedule = ix86_schedule;
3419 ptr->tune = ix86_tune;
3420 ptr->fpmath = ix86_fpmath;
3421 ptr->branch_cost = ix86_branch_cost;
3422 ptr->tune_defaulted = ix86_tune_defaulted;
3423 ptr->arch_specified = ix86_arch_specified;
3424 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3425 ptr->target_flags_explicit = target_flags_explicit;
3428 /* Restore the current options */
3431 ix86_function_specific_restore (struct cl_target_option *ptr)
3433 enum processor_type old_tune = ix86_tune;
3434 enum processor_type old_arch = ix86_arch;
3435 unsigned int ix86_arch_mask, ix86_tune_mask;
3438 ix86_arch = (enum processor_type) ptr->arch;
3439 ix86_schedule = (enum attr_cpu) ptr->schedule;
3440 ix86_tune = (enum processor_type) ptr->tune;
3441 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3442 ix86_branch_cost = ptr->branch_cost;
3443 ix86_tune_defaulted = ptr->tune_defaulted;
3444 ix86_arch_specified = ptr->arch_specified;
3445 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3446 target_flags_explicit = ptr->target_flags_explicit;
3448 /* Recreate the arch feature tests if the arch changed */
3449 if (old_arch != ix86_arch)
3451 ix86_arch_mask = 1u << ix86_arch;
3452 for (i = 0; i < X86_ARCH_LAST; ++i)
3453 ix86_arch_features[i]
3454 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3457 /* Recreate the tune optimization tests */
3458 if (old_tune != ix86_tune)
3460 ix86_tune_mask = 1u << ix86_tune;
3461 for (i = 0; i < X86_TUNE_LAST; ++i)
3462 ix86_tune_features[i]
3463 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3467 /* Print the current options */
3470 ix86_function_specific_print (FILE *file, int indent,
3471 struct cl_target_option *ptr)
3474 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3475 NULL, NULL, NULL, false);
3477 fprintf (file, "%*sarch = %d (%s)\n",
3480 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3481 ? cpu_names[ptr->arch]
3484 fprintf (file, "%*stune = %d (%s)\n",
3487 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3488 ? cpu_names[ptr->tune]
3491 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3492 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3493 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3494 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3498 fprintf (file, "%*s%s\n", indent, "", target_string);
3499 free (target_string);
3504 /* Inner function to process the attribute((target(...))), take an argument and
3505 set the current options from the argument. If we have a list, recursively go
3509 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3514 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3515 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3516 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3517 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3532 enum ix86_opt_type type;
3537 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3538 IX86_ATTR_ISA ("abm", OPT_mabm),
3539 IX86_ATTR_ISA ("aes", OPT_maes),
3540 IX86_ATTR_ISA ("avx", OPT_mavx),
3541 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3542 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3543 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3544 IX86_ATTR_ISA ("sse", OPT_msse),
3545 IX86_ATTR_ISA ("sse2", OPT_msse2),
3546 IX86_ATTR_ISA ("sse3", OPT_msse3),
3547 IX86_ATTR_ISA ("sse4", OPT_msse4),
3548 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3549 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3550 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3551 IX86_ATTR_ISA ("sse5", OPT_msse5),
3552 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3554 /* string options */
3555 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3556 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3557 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3560 IX86_ATTR_YES ("cld",
3564 IX86_ATTR_NO ("fancy-math-387",
3565 OPT_mfancy_math_387,
3566 MASK_NO_FANCY_MATH_387),
3568 IX86_ATTR_NO ("fused-madd",
3570 MASK_NO_FUSED_MADD),
3572 IX86_ATTR_YES ("ieee-fp",
3576 IX86_ATTR_YES ("inline-all-stringops",
3577 OPT_minline_all_stringops,
3578 MASK_INLINE_ALL_STRINGOPS),
3580 IX86_ATTR_YES ("inline-stringops-dynamically",
3581 OPT_minline_stringops_dynamically,
3582 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3584 IX86_ATTR_NO ("align-stringops",
3585 OPT_mno_align_stringops,
3586 MASK_NO_ALIGN_STRINGOPS),
3588 IX86_ATTR_YES ("recip",
3594 /* If this is a list, recurse to get the options. */
3595 if (TREE_CODE (args) == TREE_LIST)
3599 for (; args; args = TREE_CHAIN (args))
3600 if (TREE_VALUE (args)
3601 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3607 else if (TREE_CODE (args) != STRING_CST)
3610 /* Handle multiple arguments separated by commas. */
3611 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3613 while (next_optstr && *next_optstr != '\0')
3615 char *p = next_optstr;
3617 char *comma = strchr (next_optstr, ',');
3618 const char *opt_string;
3619 size_t len, opt_len;
3624 enum ix86_opt_type type = ix86_opt_unknown;
3630 len = comma - next_optstr;
3631 next_optstr = comma + 1;
3639 /* Recognize no-xxx. */
3640 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3649 /* Find the option. */
3652 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3654 type = attrs[i].type;
3655 opt_len = attrs[i].len;
3656 if (ch == attrs[i].string[0]
3657 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3658 && memcmp (p, attrs[i].string, opt_len) == 0)
3661 mask = attrs[i].mask;
3662 opt_string = attrs[i].string;
3667 /* Process the option. */
3670 error ("attribute(target(\"%s\")) is unknown", orig_p);
3674 else if (type == ix86_opt_isa)
3675 ix86_handle_option (opt, p, opt_set_p);
3677 else if (type == ix86_opt_yes || type == ix86_opt_no)
3679 if (type == ix86_opt_no)
3680 opt_set_p = !opt_set_p;
3683 target_flags |= mask;
3685 target_flags &= ~mask;
3688 else if (type == ix86_opt_str)
3692 error ("option(\"%s\") was already specified", opt_string);
3696 p_strings[opt] = xstrdup (p + opt_len);
3706 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3709 ix86_valid_target_attribute_tree (tree args)
3711 const char *orig_arch_string = ix86_arch_string;
3712 const char *orig_tune_string = ix86_tune_string;
3713 const char *orig_fpmath_string = ix86_fpmath_string;
3714 int orig_tune_defaulted = ix86_tune_defaulted;
3715 int orig_arch_specified = ix86_arch_specified;
3716 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3719 struct cl_target_option *def
3720 = TREE_TARGET_OPTION (target_option_default_node);
3722 /* Process each of the options on the chain. */
3723 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3726 /* If the changed options are different from the default, rerun override_options,
3727 and then save the options away. The string options are are attribute options,
3728 and will be undone when we copy the save structure. */
3729 if (ix86_isa_flags != def->ix86_isa_flags
3730 || target_flags != def->target_flags
3731 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3732 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3733 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3735 /* If we are using the default tune= or arch=, undo the string assigned,
3736 and use the default. */
3737 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3738 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3739 else if (!orig_arch_specified)
3740 ix86_arch_string = NULL;
3742 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3743 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3744 else if (orig_tune_defaulted)
3745 ix86_tune_string = NULL;
3747 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3748 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3749 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3750 else if (!TARGET_64BIT && TARGET_SSE)
3751 ix86_fpmath_string = "sse,387";
3753 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3754 override_options (false);
3756 /* Add any builtin functions with the new isa if any. */
3757 ix86_add_new_builtins (ix86_isa_flags);
3759 /* Save the current options unless we are validating options for
3761 t = build_target_option_node ();
3763 ix86_arch_string = orig_arch_string;
3764 ix86_tune_string = orig_tune_string;
3765 ix86_fpmath_string = orig_fpmath_string;
3767 /* Free up memory allocated to hold the strings */
3768 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3769 if (option_strings[i])
3770 free (option_strings[i]);
3776 /* Hook to validate attribute((target("string"))). */
3779 ix86_valid_target_attribute_p (tree fndecl,
3780 tree ARG_UNUSED (name),
3782 int ARG_UNUSED (flags))
3784 struct cl_target_option cur_target;
3786 tree old_optimize = build_optimization_node ();
3787 tree new_target, new_optimize;
3788 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3790 /* If the function changed the optimization levels as well as setting target
3791 options, start with the optimizations specified. */
3792 if (func_optimize && func_optimize != old_optimize)
3793 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3795 /* The target attributes may also change some optimization flags, so update
3796 the optimization options if necessary. */
3797 cl_target_option_save (&cur_target);
3798 new_target = ix86_valid_target_attribute_tree (args);
3799 new_optimize = build_optimization_node ();
3806 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3808 if (old_optimize != new_optimize)
3809 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3812 cl_target_option_restore (&cur_target);
3814 if (old_optimize != new_optimize)
3815 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3821 /* Hook to determine if one function can safely inline another. */
3824 ix86_can_inline_p (tree caller, tree callee)
3827 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3828 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3830 /* If callee has no option attributes, then it is ok to inline. */
3834 /* If caller has no option attributes, but callee does then it is not ok to
3836 else if (!caller_tree)
3841 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3842 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3844 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3845 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3847 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3848 != callee_opts->ix86_isa_flags)
3851 /* See if we have the same non-isa options. */
3852 else if (caller_opts->target_flags != callee_opts->target_flags)
3855 /* See if arch, tune, etc. are the same. */
3856 else if (caller_opts->arch != callee_opts->arch)
3859 else if (caller_opts->tune != callee_opts->tune)
3862 else if (caller_opts->fpmath != callee_opts->fpmath)
3865 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3876 /* Remember the last target of ix86_set_current_function. */
3877 static GTY(()) tree ix86_previous_fndecl;
3879 /* Establish appropriate back-end context for processing the function
3880 FNDECL. The argument might be NULL to indicate processing at top
3881 level, outside of any function scope. */
3883 ix86_set_current_function (tree fndecl)
3885 /* Only change the context if the function changes. This hook is called
3886 several times in the course of compiling a function, and we don't want to
3887 slow things down too much or call target_reinit when it isn't safe. */
3888 if (fndecl && fndecl != ix86_previous_fndecl)
3890 tree old_tree = (ix86_previous_fndecl
3891 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3894 tree new_tree = (fndecl
3895 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3898 ix86_previous_fndecl = fndecl;
3899 if (old_tree == new_tree)
3904 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3910 struct cl_target_option *def
3911 = TREE_TARGET_OPTION (target_option_current_node);
3913 cl_target_option_restore (def);
3920 /* Return true if this goes in large data/bss. */
3923 ix86_in_large_data_p (tree exp)
3925 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3928 /* Functions are never large data. */
3929 if (TREE_CODE (exp) == FUNCTION_DECL)
3932 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3934 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3935 if (strcmp (section, ".ldata") == 0
3936 || strcmp (section, ".lbss") == 0)
3942 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3944 /* If this is an incomplete type with size 0, then we can't put it
3945 in data because it might be too big when completed. */
3946 if (!size || size > ix86_section_threshold)
3953 /* Switch to the appropriate section for output of DECL.
3954 DECL is either a `VAR_DECL' node or a constant of some sort.
3955 RELOC indicates whether forming the initial value of DECL requires
3956 link-time relocations. */
3958 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3962 x86_64_elf_select_section (tree decl, int reloc,
3963 unsigned HOST_WIDE_INT align)
3965 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3966 && ix86_in_large_data_p (decl))
3968 const char *sname = NULL;
3969 unsigned int flags = SECTION_WRITE;
3970 switch (categorize_decl_for_section (decl, reloc))
3975 case SECCAT_DATA_REL:
3976 sname = ".ldata.rel";
3978 case SECCAT_DATA_REL_LOCAL:
3979 sname = ".ldata.rel.local";
3981 case SECCAT_DATA_REL_RO:
3982 sname = ".ldata.rel.ro";
3984 case SECCAT_DATA_REL_RO_LOCAL:
3985 sname = ".ldata.rel.ro.local";
3989 flags |= SECTION_BSS;
3992 case SECCAT_RODATA_MERGE_STR:
3993 case SECCAT_RODATA_MERGE_STR_INIT:
3994 case SECCAT_RODATA_MERGE_CONST:
3998 case SECCAT_SRODATA:
4005 /* We don't split these for medium model. Place them into
4006 default sections and hope for best. */
4008 case SECCAT_EMUTLS_VAR:
4009 case SECCAT_EMUTLS_TMPL:
4014 /* We might get called with string constants, but get_named_section
4015 doesn't like them as they are not DECLs. Also, we need to set
4016 flags in that case. */
4018 return get_section (sname, flags, NULL);
4019 return get_named_section (decl, sname, reloc);
4022 return default_elf_select_section (decl, reloc, align);
4025 /* Build up a unique section name, expressed as a
4026 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4027 RELOC indicates whether the initial value of EXP requires
4028 link-time relocations. */
4030 static void ATTRIBUTE_UNUSED
4031 x86_64_elf_unique_section (tree decl, int reloc)
4033 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4034 && ix86_in_large_data_p (decl))
4036 const char *prefix = NULL;
4037 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4038 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4040 switch (categorize_decl_for_section (decl, reloc))
4043 case SECCAT_DATA_REL:
4044 case SECCAT_DATA_REL_LOCAL:
4045 case SECCAT_DATA_REL_RO:
4046 case SECCAT_DATA_REL_RO_LOCAL:
4047 prefix = one_only ? ".ld" : ".ldata";
4050 prefix = one_only ? ".lb" : ".lbss";
4053 case SECCAT_RODATA_MERGE_STR:
4054 case SECCAT_RODATA_MERGE_STR_INIT:
4055 case SECCAT_RODATA_MERGE_CONST:
4056 prefix = one_only ? ".lr" : ".lrodata";
4058 case SECCAT_SRODATA:
4065 /* We don't split these for medium model. Place them into
4066 default sections and hope for best. */
4068 case SECCAT_EMUTLS_VAR:
4069 prefix = targetm.emutls.var_section;
4071 case SECCAT_EMUTLS_TMPL:
4072 prefix = targetm.emutls.tmpl_section;
4077 const char *name, *linkonce;
4080 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4081 name = targetm.strip_name_encoding (name);
4083 /* If we're using one_only, then there needs to be a .gnu.linkonce
4084 prefix to the section name. */
4085 linkonce = one_only ? ".gnu.linkonce" : "";
4087 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4089 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4093 default_unique_section (decl, reloc);
4096 #ifdef COMMON_ASM_OP
4097 /* This says how to output assembler code to declare an
4098 uninitialized external linkage data object.
4100 For medium model x86-64 we need to use .largecomm opcode for
4103 x86_elf_aligned_common (FILE *file,
4104 const char *name, unsigned HOST_WIDE_INT size,
4107 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4108 && size > (unsigned int)ix86_section_threshold)
4109 fprintf (file, ".largecomm\t");
4111 fprintf (file, "%s", COMMON_ASM_OP);
4112 assemble_name (file, name);
4113 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4114 size, align / BITS_PER_UNIT);
4118 /* Utility function for targets to use in implementing
4119 ASM_OUTPUT_ALIGNED_BSS. */
4122 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4123 const char *name, unsigned HOST_WIDE_INT size,
4126 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4127 && size > (unsigned int)ix86_section_threshold)
4128 switch_to_section (get_named_section (decl, ".lbss", 0));
4130 switch_to_section (bss_section);
4131 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4132 #ifdef ASM_DECLARE_OBJECT_NAME
4133 last_assemble_variable_decl = decl;
4134 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4136 /* Standard thing is just output label for the object. */
4137 ASM_OUTPUT_LABEL (file, name);
4138 #endif /* ASM_DECLARE_OBJECT_NAME */
4139 ASM_OUTPUT_SKIP (file, size ? size : 1);
4143 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4145 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4146 make the problem with not enough registers even worse. */
4147 #ifdef INSN_SCHEDULING
4149 flag_schedule_insns = 0;
4153 /* The Darwin libraries never set errno, so we might as well
4154 avoid calling them when that's the only reason we would. */
4155 flag_errno_math = 0;
4157 /* The default values of these switches depend on the TARGET_64BIT
4158 that is not known at this moment. Mark these values with 2 and
4159 let user the to override these. In case there is no command line option
4160 specifying them, we will set the defaults in override_options. */
4162 flag_omit_frame_pointer = 2;
4163 flag_pcc_struct_return = 2;
4164 flag_asynchronous_unwind_tables = 2;
4165 flag_vect_cost_model = 1;
4166 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4167 SUBTARGET_OPTIMIZATION_OPTIONS;
4171 /* Decide whether we can make a sibling call to a function. DECL is the
4172 declaration of the function being targeted by the call and EXP is the
4173 CALL_EXPR representing the call. */
4176 ix86_function_ok_for_sibcall (tree decl, tree exp)
4181 /* If we are generating position-independent code, we cannot sibcall
4182 optimize any indirect call, or a direct call to a global function,
4183 as the PLT requires %ebx be live. */
4184 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4191 func = TREE_TYPE (CALL_EXPR_FN (exp));
4192 if (POINTER_TYPE_P (func))
4193 func = TREE_TYPE (func);
4196 /* Check that the return value locations are the same. Like
4197 if we are returning floats on the 80387 register stack, we cannot
4198 make a sibcall from a function that doesn't return a float to a
4199 function that does or, conversely, from a function that does return
4200 a float to a function that doesn't; the necessary stack adjustment
4201 would not be executed. This is also the place we notice
4202 differences in the return value ABI. Note that it is ok for one
4203 of the functions to have void return type as long as the return
4204 value of the other is passed in a register. */
4205 a = ix86_function_value (TREE_TYPE (exp), func, false);
4206 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4208 if (STACK_REG_P (a) || STACK_REG_P (b))
4210 if (!rtx_equal_p (a, b))
4213 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4215 else if (!rtx_equal_p (a, b))
4218 /* If this call is indirect, we'll need to be able to use a call-clobbered
4219 register for the address of the target function. Make sure that all
4220 such registers are not used for passing parameters. */
4221 if (!decl && !TARGET_64BIT)
4225 /* We're looking at the CALL_EXPR, we need the type of the function. */
4226 type = CALL_EXPR_FN (exp); /* pointer expression */
4227 type = TREE_TYPE (type); /* pointer type */
4228 type = TREE_TYPE (type); /* function type */
4230 if (ix86_function_regparm (type, NULL) >= 3)
4232 /* ??? Need to count the actual number of registers to be used,
4233 not the possible number of registers. Fix later. */
4238 /* Dllimport'd functions are also called indirectly. */
4239 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4241 && decl && DECL_DLLIMPORT_P (decl)
4242 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4245 /* If we need to align the outgoing stack, then sibcalling would
4246 unalign the stack, which may break the called function. */
4247 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4250 /* Otherwise okay. That also includes certain types of indirect calls. */
4254 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4255 calling convention attributes;
4256 arguments as in struct attribute_spec.handler. */
4259 ix86_handle_cconv_attribute (tree *node, tree name,
4261 int flags ATTRIBUTE_UNUSED,
4264 if (TREE_CODE (*node) != FUNCTION_TYPE
4265 && TREE_CODE (*node) != METHOD_TYPE
4266 && TREE_CODE (*node) != FIELD_DECL
4267 && TREE_CODE (*node) != TYPE_DECL)
4269 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4271 *no_add_attrs = true;
4275 /* Can combine regparm with all attributes but fastcall. */
4276 if (is_attribute_p ("regparm", name))
4280 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4282 error ("fastcall and regparm attributes are not compatible");
4285 cst = TREE_VALUE (args);
4286 if (TREE_CODE (cst) != INTEGER_CST)
4288 warning (OPT_Wattributes,
4289 "%qE attribute requires an integer constant argument",
4291 *no_add_attrs = true;
4293 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4295 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4297 *no_add_attrs = true;
4305 /* Do not warn when emulating the MS ABI. */
4306 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4307 warning (OPT_Wattributes, "%qE attribute ignored",
4309 *no_add_attrs = true;
4313 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4314 if (is_attribute_p ("fastcall", name))
4316 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4318 error ("fastcall and cdecl attributes are not compatible");
4320 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4322 error ("fastcall and stdcall attributes are not compatible");
4324 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4326 error ("fastcall and regparm attributes are not compatible");
4330 /* Can combine stdcall with fastcall (redundant), regparm and
4332 else if (is_attribute_p ("stdcall", name))
4334 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4336 error ("stdcall and cdecl attributes are not compatible");
4338 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4340 error ("stdcall and fastcall attributes are not compatible");
4344 /* Can combine cdecl with regparm and sseregparm. */
4345 else if (is_attribute_p ("cdecl", name))
4347 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4349 error ("stdcall and cdecl attributes are not compatible");
4351 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4353 error ("fastcall and cdecl attributes are not compatible");
4357 /* Can combine sseregparm with all attributes. */
4362 /* Return 0 if the attributes for two types are incompatible, 1 if they
4363 are compatible, and 2 if they are nearly compatible (which causes a
4364 warning to be generated). */
4367 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4369 /* Check for mismatch of non-default calling convention. */
4370 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4372 if (TREE_CODE (type1) != FUNCTION_TYPE
4373 && TREE_CODE (type1) != METHOD_TYPE)
4376 /* Check for mismatched fastcall/regparm types. */
4377 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4378 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4379 || (ix86_function_regparm (type1, NULL)
4380 != ix86_function_regparm (type2, NULL)))
4383 /* Check for mismatched sseregparm types. */
4384 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4385 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4388 /* Check for mismatched return types (cdecl vs stdcall). */
4389 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4390 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4396 /* Return the regparm value for a function with the indicated TYPE and DECL.
4397 DECL may be NULL when calling function indirectly
4398 or considering a libcall. */
4401 ix86_function_regparm (const_tree type, const_tree decl)
4406 static bool error_issued;
4409 return (ix86_function_type_abi (type) == SYSV_ABI
4410 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4412 regparm = ix86_regparm;
4413 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4417 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4419 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4421 /* We can't use regparm(3) for nested functions because
4422 these pass static chain pointer in %ecx register. */
4423 if (!error_issued && regparm == 3
4424 && decl_function_context (decl)
4425 && !DECL_NO_STATIC_CHAIN (decl))
4427 error ("nested functions are limited to 2 register parameters");
4428 error_issued = true;
4436 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4439 /* Use register calling convention for local functions when possible. */
4441 && TREE_CODE (decl) == FUNCTION_DECL
4445 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4446 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4449 int local_regparm, globals = 0, regno;
4452 /* Make sure no regparm register is taken by a
4453 fixed register variable. */
4454 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4455 if (fixed_regs[local_regparm])
4458 /* We can't use regparm(3) for nested functions as these use
4459 static chain pointer in third argument. */
4460 if (local_regparm == 3
4461 && decl_function_context (decl)
4462 && !DECL_NO_STATIC_CHAIN (decl))
4465 /* If the function realigns its stackpointer, the prologue will
4466 clobber %ecx. If we've already generated code for the callee,
4467 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4468 scanning the attributes for the self-realigning property. */
4469 f = DECL_STRUCT_FUNCTION (decl);
4470 /* Since current internal arg pointer won't conflict with
4471 parameter passing regs, so no need to change stack
4472 realignment and adjust regparm number.
4474 Each fixed register usage increases register pressure,
4475 so less registers should be used for argument passing.
4476 This functionality can be overriden by an explicit
4478 for (regno = 0; regno <= DI_REG; regno++)
4479 if (fixed_regs[regno])
4483 = globals < local_regparm ? local_regparm - globals : 0;
4485 if (local_regparm > regparm)
4486 regparm = local_regparm;
4493 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4494 DFmode (2) arguments in SSE registers for a function with the
4495 indicated TYPE and DECL. DECL may be NULL when calling function
4496 indirectly or considering a libcall. Otherwise return 0. */
4499 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4501 gcc_assert (!TARGET_64BIT);
4503 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4504 by the sseregparm attribute. */
4505 if (TARGET_SSEREGPARM
4506 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4513 error ("Calling %qD with attribute sseregparm without "
4514 "SSE/SSE2 enabled", decl);
4516 error ("Calling %qT with attribute sseregparm without "
4517 "SSE/SSE2 enabled", type);
4525 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4526 (and DFmode for SSE2) arguments in SSE registers. */
4527 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4529 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4530 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4532 return TARGET_SSE2 ? 2 : 1;
4538 /* Return true if EAX is live at the start of the function. Used by
4539 ix86_expand_prologue to determine if we need special help before
4540 calling allocate_stack_worker. */
4543 ix86_eax_live_at_start_p (void)
4545 /* Cheat. Don't bother working forward from ix86_function_regparm
4546 to the function type to whether an actual argument is located in
4547 eax. Instead just look at cfg info, which is still close enough
4548 to correct at this point. This gives false positives for broken
4549 functions that might use uninitialized data that happens to be
4550 allocated in eax, but who cares? */
4551 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4554 /* Value is the number of bytes of arguments automatically
4555 popped when returning from a subroutine call.
4556 FUNDECL is the declaration node of the function (as a tree),
4557 FUNTYPE is the data type of the function (as a tree),
4558 or for a library call it is an identifier node for the subroutine name.
4559 SIZE is the number of bytes of arguments passed on the stack.
4561 On the 80386, the RTD insn may be used to pop them if the number
4562 of args is fixed, but if the number is variable then the caller
4563 must pop them all. RTD can't be used for library calls now
4564 because the library is compiled with the Unix compiler.
4565 Use of RTD is a selectable option, since it is incompatible with
4566 standard Unix calling sequences. If the option is not selected,
4567 the caller must always pop the args.
4569 The attribute stdcall is equivalent to RTD on a per module basis. */
4572 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4576 /* None of the 64-bit ABIs pop arguments. */
4580 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4582 /* Cdecl functions override -mrtd, and never pop the stack. */
4583 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4585 /* Stdcall and fastcall functions will pop the stack if not
4587 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4588 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4591 if (rtd && ! stdarg_p (funtype))
4595 /* Lose any fake structure return argument if it is passed on the stack. */
4596 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4597 && !KEEP_AGGREGATE_RETURN_POINTER)
4599 int nregs = ix86_function_regparm (funtype, fundecl);
4601 return GET_MODE_SIZE (Pmode);
4607 /* Argument support functions. */
4609 /* Return true when register may be used to pass function parameters. */
4611 ix86_function_arg_regno_p (int regno)
4614 const int *parm_regs;
4619 return (regno < REGPARM_MAX
4620 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4622 return (regno < REGPARM_MAX
4623 || (TARGET_MMX && MMX_REGNO_P (regno)
4624 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4625 || (TARGET_SSE && SSE_REGNO_P (regno)
4626 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4631 if (SSE_REGNO_P (regno) && TARGET_SSE)
4636 if (TARGET_SSE && SSE_REGNO_P (regno)
4637 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4641 /* TODO: The function should depend on current function ABI but
4642 builtins.c would need updating then. Therefore we use the
4645 /* RAX is used as hidden argument to va_arg functions. */
4646 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4649 if (ix86_abi == MS_ABI)
4650 parm_regs = x86_64_ms_abi_int_parameter_registers;
4652 parm_regs = x86_64_int_parameter_registers;
4653 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4654 : X86_64_REGPARM_MAX); i++)
4655 if (regno == parm_regs[i])
4660 /* Return if we do not know how to pass TYPE solely in registers. */
4663 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4665 if (must_pass_in_stack_var_size_or_pad (mode, type))
4668 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4669 The layout_type routine is crafty and tries to trick us into passing
4670 currently unsupported vector types on the stack by using TImode. */
4671 return (!TARGET_64BIT && mode == TImode
4672 && type && TREE_CODE (type) != VECTOR_TYPE);
4675 /* It returns the size, in bytes, of the area reserved for arguments passed
4676 in registers for the function represented by fndecl dependent to the used
4679 ix86_reg_parm_stack_space (const_tree fndecl)
4681 enum calling_abi call_abi = SYSV_ABI;
4682 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4683 call_abi = ix86_function_abi (fndecl);
4685 call_abi = ix86_function_type_abi (fndecl);
4686 if (call_abi == MS_ABI)
4691 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4694 ix86_function_type_abi (const_tree fntype)
4696 if (TARGET_64BIT && fntype != NULL)
4698 enum calling_abi abi = ix86_abi;
4699 if (abi == SYSV_ABI)
4701 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4704 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4711 static enum calling_abi
4712 ix86_function_abi (const_tree fndecl)
4716 return ix86_function_type_abi (TREE_TYPE (fndecl));
4719 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4722 ix86_cfun_abi (void)
4724 if (! cfun || ! TARGET_64BIT)
4726 return cfun->machine->call_abi;
4730 extern void init_regs (void);
4732 /* Implementation of call abi switching target hook. Specific to FNDECL
4733 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4734 for more details. */
4736 ix86_call_abi_override (const_tree fndecl)
4738 if (fndecl == NULL_TREE)
4739 cfun->machine->call_abi = ix86_abi;
4741 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4744 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4745 re-initialization of init_regs each time we switch function context since
4746 this is needed only during RTL expansion. */
4748 ix86_maybe_switch_abi (void)
4751 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4755 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4756 for a call to a function whose data type is FNTYPE.
4757 For a library call, FNTYPE is 0. */
4760 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4761 tree fntype, /* tree ptr for function decl */
4762 rtx libname, /* SYMBOL_REF of library name or 0 */
4765 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4766 memset (cum, 0, sizeof (*cum));
4769 cum->call_abi = ix86_function_abi (fndecl);
4771 cum->call_abi = ix86_function_type_abi (fntype);
4772 /* Set up the number of registers to use for passing arguments. */
4774 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4775 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4776 cum->nregs = ix86_regparm;
4779 if (cum->call_abi != ix86_abi)
4780 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4785 cum->sse_nregs = SSE_REGPARM_MAX;
4788 if (cum->call_abi != ix86_abi)
4789 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4790 : X64_SSE_REGPARM_MAX;
4794 cum->mmx_nregs = MMX_REGPARM_MAX;
4795 cum->warn_avx = true;
4796 cum->warn_sse = true;
4797 cum->warn_mmx = true;
4799 /* Because type might mismatch in between caller and callee, we need to
4800 use actual type of function for local calls.
4801 FIXME: cgraph_analyze can be told to actually record if function uses
4802 va_start so for local functions maybe_vaarg can be made aggressive
4804 FIXME: once typesytem is fixed, we won't need this code anymore. */
4806 fntype = TREE_TYPE (fndecl);
4807 cum->maybe_vaarg = (fntype
4808 ? (!prototype_p (fntype) || stdarg_p (fntype))
4813 /* If there are variable arguments, then we won't pass anything
4814 in registers in 32-bit mode. */
4815 if (stdarg_p (fntype))
4826 /* Use ecx and edx registers if function has fastcall attribute,
4827 else look for regparm information. */
4830 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4836 cum->nregs = ix86_function_regparm (fntype, fndecl);
4839 /* Set up the number of SSE registers used for passing SFmode
4840 and DFmode arguments. Warn for mismatching ABI. */
4841 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4845 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4846 But in the case of vector types, it is some vector mode.
4848 When we have only some of our vector isa extensions enabled, then there
4849 are some modes for which vector_mode_supported_p is false. For these
4850 modes, the generic vector support in gcc will choose some non-vector mode
4851 in order to implement the type. By computing the natural mode, we'll
4852 select the proper ABI location for the operand and not depend on whatever
4853 the middle-end decides to do with these vector types.
4855 The midde-end can't deal with the vector types > 16 bytes. In this
4856 case, we return the original mode and warn ABI change if CUM isn't
4859 static enum machine_mode
4860 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4862 enum machine_mode mode = TYPE_MODE (type);
4864 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4866 HOST_WIDE_INT size = int_size_in_bytes (type);
4867 if ((size == 8 || size == 16 || size == 32)
4868 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4869 && TYPE_VECTOR_SUBPARTS (type) > 1)
4871 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4873 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4874 mode = MIN_MODE_VECTOR_FLOAT;
4876 mode = MIN_MODE_VECTOR_INT;
4878 /* Get the mode which has this inner mode and number of units. */
4879 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4880 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4881 && GET_MODE_INNER (mode) == innermode)
4883 if (size == 32 && !TARGET_AVX)
4885 static bool warnedavx;
4892 warning (0, "AVX vector argument without AVX "
4893 "enabled changes the ABI");
4895 return TYPE_MODE (type);
4908 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4909 this may not agree with the mode that the type system has chosen for the
4910 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4911 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4914 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4919 if (orig_mode != BLKmode)
4920 tmp = gen_rtx_REG (orig_mode, regno);
4923 tmp = gen_rtx_REG (mode, regno);
4924 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4925 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4931 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4932 of this code is to classify each 8bytes of incoming argument by the register
4933 class and assign registers accordingly. */
4935 /* Return the union class of CLASS1 and CLASS2.
4936 See the x86-64 PS ABI for details. */
4938 static enum x86_64_reg_class
4939 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4941 /* Rule #1: If both classes are equal, this is the resulting class. */
4942 if (class1 == class2)
4945 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4947 if (class1 == X86_64_NO_CLASS)
4949 if (class2 == X86_64_NO_CLASS)
4952 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4953 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4954 return X86_64_MEMORY_CLASS;
4956 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4957 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4958 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4959 return X86_64_INTEGERSI_CLASS;
4960 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4961 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4962 return X86_64_INTEGER_CLASS;
4964 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4966 if (class1 == X86_64_X87_CLASS
4967 || class1 == X86_64_X87UP_CLASS
4968 || class1 == X86_64_COMPLEX_X87_CLASS
4969 || class2 == X86_64_X87_CLASS
4970 || class2 == X86_64_X87UP_CLASS
4971 || class2 == X86_64_COMPLEX_X87_CLASS)
4972 return X86_64_MEMORY_CLASS;
4974 /* Rule #6: Otherwise class SSE is used. */
4975 return X86_64_SSE_CLASS;
4978 /* Classify the argument of type TYPE and mode MODE.
4979 CLASSES will be filled by the register class used to pass each word
4980 of the operand. The number of words is returned. In case the parameter
4981 should be passed in memory, 0 is returned. As a special case for zero
4982 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4984 BIT_OFFSET is used internally for handling records and specifies offset
4985 of the offset in bits modulo 256 to avoid overflow cases.
4987 See the x86-64 PS ABI for details.
4991 classify_argument (enum machine_mode mode, const_tree type,
4992 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4994 HOST_WIDE_INT bytes =
4995 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4996 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4998 /* Variable sized entities are always passed/returned in memory. */
5002 if (mode != VOIDmode
5003 && targetm.calls.must_pass_in_stack (mode, type))
5006 if (type && AGGREGATE_TYPE_P (type))
5010 enum x86_64_reg_class subclasses[MAX_CLASSES];
5012 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5016 for (i = 0; i < words; i++)
5017 classes[i] = X86_64_NO_CLASS;
5019 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5020 signalize memory class, so handle it as special case. */
5023 classes[0] = X86_64_NO_CLASS;
5027 /* Classify each field of record and merge classes. */
5028 switch (TREE_CODE (type))
5031 /* And now merge the fields of structure. */
5032 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5034 if (TREE_CODE (field) == FIELD_DECL)
5038 if (TREE_TYPE (field) == error_mark_node)
5041 /* Bitfields are always classified as integer. Handle them
5042 early, since later code would consider them to be
5043 misaligned integers. */
5044 if (DECL_BIT_FIELD (field))
5046 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5047 i < ((int_bit_position (field) + (bit_offset % 64))
5048 + tree_low_cst (DECL_SIZE (field), 0)
5051 merge_classes (X86_64_INTEGER_CLASS,
5058 type = TREE_TYPE (field);
5060 /* Flexible array member is ignored. */
5061 if (TYPE_MODE (type) == BLKmode
5062 && TREE_CODE (type) == ARRAY_TYPE
5063 && TYPE_SIZE (type) == NULL_TREE
5064 && TYPE_DOMAIN (type) != NULL_TREE
5065 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5070 if (!warned && warn_psabi)
5073 inform (input_location,
5074 "The ABI of passing struct with"
5075 " a flexible array member has"
5076 " changed in GCC 4.4");
5080 num = classify_argument (TYPE_MODE (type), type,
5082 (int_bit_position (field)
5083 + bit_offset) % 256);
5086 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5087 for (i = 0; i < num && (i + pos) < words; i++)
5089 merge_classes (subclasses[i], classes[i + pos]);
5096 /* Arrays are handled as small records. */
5099 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5100 TREE_TYPE (type), subclasses, bit_offset);
5104 /* The partial classes are now full classes. */
5105 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5106 subclasses[0] = X86_64_SSE_CLASS;
5107 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5108 && !((bit_offset % 64) == 0 && bytes == 4))
5109 subclasses[0] = X86_64_INTEGER_CLASS;
5111 for (i = 0; i < words; i++)
5112 classes[i] = subclasses[i % num];
5117 case QUAL_UNION_TYPE:
5118 /* Unions are similar to RECORD_TYPE but offset is always 0.
5120 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5122 if (TREE_CODE (field) == FIELD_DECL)
5126 if (TREE_TYPE (field) == error_mark_node)
5129 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5130 TREE_TYPE (field), subclasses,
5134 for (i = 0; i < num; i++)
5135 classes[i] = merge_classes (subclasses[i], classes[i]);
5146 /* When size > 16 bytes, if the first one isn't
5147 X86_64_SSE_CLASS or any other ones aren't
5148 X86_64_SSEUP_CLASS, everything should be passed in
5150 if (classes[0] != X86_64_SSE_CLASS)
5153 for (i = 1; i < words; i++)
5154 if (classes[i] != X86_64_SSEUP_CLASS)
5158 /* Final merger cleanup. */
5159 for (i = 0; i < words; i++)
5161 /* If one class is MEMORY, everything should be passed in
5163 if (classes[i] == X86_64_MEMORY_CLASS)
5166 /* The X86_64_SSEUP_CLASS should be always preceded by
5167 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5168 if (classes[i] == X86_64_SSEUP_CLASS
5169 && classes[i - 1] != X86_64_SSE_CLASS
5170 && classes[i - 1] != X86_64_SSEUP_CLASS)
5172 /* The first one should never be X86_64_SSEUP_CLASS. */
5173 gcc_assert (i != 0);
5174 classes[i] = X86_64_SSE_CLASS;
5177 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5178 everything should be passed in memory. */
5179 if (classes[i] == X86_64_X87UP_CLASS
5180 && (classes[i - 1] != X86_64_X87_CLASS))
5184 /* The first one should never be X86_64_X87UP_CLASS. */
5185 gcc_assert (i != 0);
5186 if (!warned && warn_psabi)
5189 inform (input_location,
5190 "The ABI of passing union with long double"
5191 " has changed in GCC 4.4");
5199 /* Compute alignment needed. We align all types to natural boundaries with
5200 exception of XFmode that is aligned to 64bits. */
5201 if (mode != VOIDmode && mode != BLKmode)
5203 int mode_alignment = GET_MODE_BITSIZE (mode);
5206 mode_alignment = 128;
5207 else if (mode == XCmode)
5208 mode_alignment = 256;
5209 if (COMPLEX_MODE_P (mode))
5210 mode_alignment /= 2;
5211 /* Misaligned fields are always returned in memory. */
5212 if (bit_offset % mode_alignment)
5216 /* for V1xx modes, just use the base mode */
5217 if (VECTOR_MODE_P (mode) && mode != V1DImode
5218 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5219 mode = GET_MODE_INNER (mode);
5221 /* Classification of atomic types. */
5226 classes[0] = X86_64_SSE_CLASS;
5229 classes[0] = X86_64_SSE_CLASS;
5230 classes[1] = X86_64_SSEUP_CLASS;
5240 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5244 classes[0] = X86_64_INTEGERSI_CLASS;
5247 else if (size <= 64)
5249 classes[0] = X86_64_INTEGER_CLASS;
5252 else if (size <= 64+32)
5254 classes[0] = X86_64_INTEGER_CLASS;
5255 classes[1] = X86_64_INTEGERSI_CLASS;
5258 else if (size <= 64+64)
5260 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5268 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5272 /* OImode shouldn't be used directly. */
5277 if (!(bit_offset % 64))
5278 classes[0] = X86_64_SSESF_CLASS;
5280 classes[0] = X86_64_SSE_CLASS;
5283 classes[0] = X86_64_SSEDF_CLASS;
5286 classes[0] = X86_64_X87_CLASS;
5287 classes[1] = X86_64_X87UP_CLASS;
5290 classes[0] = X86_64_SSE_CLASS;
5291 classes[1] = X86_64_SSEUP_CLASS;
5294 classes[0] = X86_64_SSE_CLASS;
5295 if (!(bit_offset % 64))
5301 if (!warned && warn_psabi)
5304 inform (input_location,
5305 "The ABI of passing structure with complex float"
5306 " member has changed in GCC 4.4");
5308 classes[1] = X86_64_SSESF_CLASS;
5312 classes[0] = X86_64_SSEDF_CLASS;
5313 classes[1] = X86_64_SSEDF_CLASS;
5316 classes[0] = X86_64_COMPLEX_X87_CLASS;
5319 /* This modes is larger than 16 bytes. */
5327 classes[0] = X86_64_SSE_CLASS;
5328 classes[1] = X86_64_SSEUP_CLASS;
5329 classes[2] = X86_64_SSEUP_CLASS;
5330 classes[3] = X86_64_SSEUP_CLASS;
5338 classes[0] = X86_64_SSE_CLASS;
5339 classes[1] = X86_64_SSEUP_CLASS;
5346 classes[0] = X86_64_SSE_CLASS;
5352 gcc_assert (VECTOR_MODE_P (mode));
5357 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5359 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5360 classes[0] = X86_64_INTEGERSI_CLASS;
5362 classes[0] = X86_64_INTEGER_CLASS;
5363 classes[1] = X86_64_INTEGER_CLASS;
5364 return 1 + (bytes > 8);
5368 /* Examine the argument and return set number of register required in each
5369 class. Return 0 iff parameter should be passed in memory. */
5371 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5372 int *int_nregs, int *sse_nregs)
5374 enum x86_64_reg_class regclass[MAX_CLASSES];
5375 int n = classify_argument (mode, type, regclass, 0);
5381 for (n--; n >= 0; n--)
5382 switch (regclass[n])
5384 case X86_64_INTEGER_CLASS:
5385 case X86_64_INTEGERSI_CLASS:
5388 case X86_64_SSE_CLASS:
5389 case X86_64_SSESF_CLASS:
5390 case X86_64_SSEDF_CLASS:
5393 case X86_64_NO_CLASS:
5394 case X86_64_SSEUP_CLASS:
5396 case X86_64_X87_CLASS:
5397 case X86_64_X87UP_CLASS:
5401 case X86_64_COMPLEX_X87_CLASS:
5402 return in_return ? 2 : 0;
5403 case X86_64_MEMORY_CLASS:
5409 /* Construct container for the argument used by GCC interface. See
5410 FUNCTION_ARG for the detailed description. */
5413 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5414 const_tree type, int in_return, int nintregs, int nsseregs,
5415 const int *intreg, int sse_regno)
5417 /* The following variables hold the static issued_error state. */
5418 static bool issued_sse_arg_error;
5419 static bool issued_sse_ret_error;
5420 static bool issued_x87_ret_error;
5422 enum machine_mode tmpmode;
5424 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5425 enum x86_64_reg_class regclass[MAX_CLASSES];
5429 int needed_sseregs, needed_intregs;
5430 rtx exp[MAX_CLASSES];
5433 n = classify_argument (mode, type, regclass, 0);
5436 if (!examine_argument (mode, type, in_return, &needed_intregs,
5439 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5442 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5443 some less clueful developer tries to use floating-point anyway. */
5444 if (needed_sseregs && !TARGET_SSE)
5448 if (!issued_sse_ret_error)
5450 error ("SSE register return with SSE disabled");
5451 issued_sse_ret_error = true;
5454 else if (!issued_sse_arg_error)
5456 error ("SSE register argument with SSE disabled");
5457 issued_sse_arg_error = true;
5462 /* Likewise, error if the ABI requires us to return values in the
5463 x87 registers and the user specified -mno-80387. */
5464 if (!TARGET_80387 && in_return)
5465 for (i = 0; i < n; i++)
5466 if (regclass[i] == X86_64_X87_CLASS
5467 || regclass[i] == X86_64_X87UP_CLASS
5468 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5470 if (!issued_x87_ret_error)
5472 error ("x87 register return with x87 disabled");
5473 issued_x87_ret_error = true;
5478 /* First construct simple cases. Avoid SCmode, since we want to use
5479 single register to pass this type. */
5480 if (n == 1 && mode != SCmode)
5481 switch (regclass[0])
5483 case X86_64_INTEGER_CLASS:
5484 case X86_64_INTEGERSI_CLASS:
5485 return gen_rtx_REG (mode, intreg[0]);
5486 case X86_64_SSE_CLASS:
5487 case X86_64_SSESF_CLASS:
5488 case X86_64_SSEDF_CLASS:
5489 if (mode != BLKmode)
5490 return gen_reg_or_parallel (mode, orig_mode,
5491 SSE_REGNO (sse_regno));
5493 case X86_64_X87_CLASS:
5494 case X86_64_COMPLEX_X87_CLASS:
5495 return gen_rtx_REG (mode, FIRST_STACK_REG);
5496 case X86_64_NO_CLASS:
5497 /* Zero sized array, struct or class. */
5502 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5503 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5504 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5506 && regclass[0] == X86_64_SSE_CLASS
5507 && regclass[1] == X86_64_SSEUP_CLASS
5508 && regclass[2] == X86_64_SSEUP_CLASS
5509 && regclass[3] == X86_64_SSEUP_CLASS
5511 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5514 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5515 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5516 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5517 && regclass[1] == X86_64_INTEGER_CLASS
5518 && (mode == CDImode || mode == TImode || mode == TFmode)
5519 && intreg[0] + 1 == intreg[1])
5520 return gen_rtx_REG (mode, intreg[0]);
5522 /* Otherwise figure out the entries of the PARALLEL. */
5523 for (i = 0; i < n; i++)
5527 switch (regclass[i])
5529 case X86_64_NO_CLASS:
5531 case X86_64_INTEGER_CLASS:
5532 case X86_64_INTEGERSI_CLASS:
5533 /* Merge TImodes on aligned occasions here too. */
5534 if (i * 8 + 8 > bytes)
5535 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5536 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5540 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5541 if (tmpmode == BLKmode)
5543 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5544 gen_rtx_REG (tmpmode, *intreg),
5548 case X86_64_SSESF_CLASS:
5549 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5550 gen_rtx_REG (SFmode,
5551 SSE_REGNO (sse_regno)),
5555 case X86_64_SSEDF_CLASS:
5556 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5557 gen_rtx_REG (DFmode,
5558 SSE_REGNO (sse_regno)),
5562 case X86_64_SSE_CLASS:
5570 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5580 && regclass[1] == X86_64_SSEUP_CLASS
5581 && regclass[2] == X86_64_SSEUP_CLASS
5582 && regclass[3] == X86_64_SSEUP_CLASS);
5589 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5590 gen_rtx_REG (tmpmode,
5591 SSE_REGNO (sse_regno)),
5600 /* Empty aligned struct, union or class. */
5604 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5605 for (i = 0; i < nexps; i++)
5606 XVECEXP (ret, 0, i) = exp [i];
5610 /* Update the data in CUM to advance over an argument of mode MODE
5611 and data type TYPE. (TYPE is null for libcalls where that information
5612 may not be available.) */
5615 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5616 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5632 cum->words += words;
5633 cum->nregs -= words;
5634 cum->regno += words;
5636 if (cum->nregs <= 0)
5644 /* OImode shouldn't be used directly. */
5648 if (cum->float_in_sse < 2)
5651 if (cum->float_in_sse < 1)
5668 if (!type || !AGGREGATE_TYPE_P (type))
5670 cum->sse_words += words;
5671 cum->sse_nregs -= 1;
5672 cum->sse_regno += 1;
5673 if (cum->sse_nregs <= 0)
5686 if (!type || !AGGREGATE_TYPE_P (type))
5688 cum->mmx_words += words;
5689 cum->mmx_nregs -= 1;
5690 cum->mmx_regno += 1;
5691 if (cum->mmx_nregs <= 0)
5702 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5703 tree type, HOST_WIDE_INT words, int named)
5705 int int_nregs, sse_nregs;
5707 /* Unnamed 256bit vector mode parameters are passed on stack. */
5708 if (!named && VALID_AVX256_REG_MODE (mode))
5711 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5712 cum->words += words;
5713 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5715 cum->nregs -= int_nregs;
5716 cum->sse_nregs -= sse_nregs;
5717 cum->regno += int_nregs;
5718 cum->sse_regno += sse_nregs;
5721 cum->words += words;
5725 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5726 HOST_WIDE_INT words)
5728 /* Otherwise, this should be passed indirect. */
5729 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5731 cum->words += words;
5740 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5741 tree type, int named)
5743 HOST_WIDE_INT bytes, words;
5745 if (mode == BLKmode)
5746 bytes = int_size_in_bytes (type);
5748 bytes = GET_MODE_SIZE (mode);
5749 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5752 mode = type_natural_mode (type, NULL);
5754 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5755 function_arg_advance_ms_64 (cum, bytes, words);
5756 else if (TARGET_64BIT)
5757 function_arg_advance_64 (cum, mode, type, words, named);
5759 function_arg_advance_32 (cum, mode, type, bytes, words);
5762 /* Define where to put the arguments to a function.
5763 Value is zero to push the argument on the stack,
5764 or a hard register in which to store the argument.
5766 MODE is the argument's machine mode.
5767 TYPE is the data type of the argument (as a tree).
5768 This is null for libcalls where that information may
5770 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5771 the preceding args and about the function being called.
5772 NAMED is nonzero if this argument is a named parameter
5773 (otherwise it is an extra parameter matching an ellipsis). */
5776 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5777 enum machine_mode orig_mode, tree type,
5778 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5780 static bool warnedsse, warnedmmx;
5782 /* Avoid the AL settings for the Unix64 ABI. */
5783 if (mode == VOIDmode)
5799 if (words <= cum->nregs)
5801 int regno = cum->regno;
5803 /* Fastcall allocates the first two DWORD (SImode) or
5804 smaller arguments to ECX and EDX if it isn't an
5810 || (type && AGGREGATE_TYPE_P (type)))
5813 /* ECX not EAX is the first allocated register. */
5814 if (regno == AX_REG)
5817 return gen_rtx_REG (mode, regno);
5822 if (cum->float_in_sse < 2)
5825 if (cum->float_in_sse < 1)
5829 /* In 32bit, we pass TImode in xmm registers. */
5836 if (!type || !AGGREGATE_TYPE_P (type))
5838 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5841 warning (0, "SSE vector argument without SSE enabled "
5845 return gen_reg_or_parallel (mode, orig_mode,
5846 cum->sse_regno + FIRST_SSE_REG);
5851 /* OImode shouldn't be used directly. */
5860 if (!type || !AGGREGATE_TYPE_P (type))
5863 return gen_reg_or_parallel (mode, orig_mode,
5864 cum->sse_regno + FIRST_SSE_REG);
5873 if (!type || !AGGREGATE_TYPE_P (type))
5875 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5878 warning (0, "MMX vector argument without MMX enabled "
5882 return gen_reg_or_parallel (mode, orig_mode,
5883 cum->mmx_regno + FIRST_MMX_REG);
5892 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5893 enum machine_mode orig_mode, tree type, int named)
5895 /* Handle a hidden AL argument containing number of registers
5896 for varargs x86-64 functions. */
5897 if (mode == VOIDmode)
5898 return GEN_INT (cum->maybe_vaarg
5899 ? (cum->sse_nregs < 0
5900 ? (cum->call_abi == ix86_abi
5902 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5903 : X64_SSE_REGPARM_MAX))
5918 /* Unnamed 256bit vector mode parameters are passed on stack. */
5924 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5926 &x86_64_int_parameter_registers [cum->regno],
5931 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5932 enum machine_mode orig_mode, int named,
5933 HOST_WIDE_INT bytes)
5937 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5938 We use value of -2 to specify that current function call is MSABI. */
5939 if (mode == VOIDmode)
5940 return GEN_INT (-2);
5942 /* If we've run out of registers, it goes on the stack. */
5943 if (cum->nregs == 0)
5946 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5948 /* Only floating point modes are passed in anything but integer regs. */
5949 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5952 regno = cum->regno + FIRST_SSE_REG;
5957 /* Unnamed floating parameters are passed in both the
5958 SSE and integer registers. */
5959 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5960 t2 = gen_rtx_REG (mode, regno);
5961 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5962 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5963 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5966 /* Handle aggregated types passed in register. */
5967 if (orig_mode == BLKmode)
5969 if (bytes > 0 && bytes <= 8)
5970 mode = (bytes > 4 ? DImode : SImode);
5971 if (mode == BLKmode)
5975 return gen_reg_or_parallel (mode, orig_mode, regno);
5979 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5980 tree type, int named)
5982 enum machine_mode mode = omode;
5983 HOST_WIDE_INT bytes, words;
5985 if (mode == BLKmode)
5986 bytes = int_size_in_bytes (type);
5988 bytes = GET_MODE_SIZE (mode);
5989 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5991 /* To simplify the code below, represent vector types with a vector mode
5992 even if MMX/SSE are not active. */
5993 if (type && TREE_CODE (type) == VECTOR_TYPE)
5994 mode = type_natural_mode (type, cum);
5996 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5997 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5998 else if (TARGET_64BIT)
5999 return function_arg_64 (cum, mode, omode, type, named);
6001 return function_arg_32 (cum, mode, omode, type, bytes, words);
6004 /* A C expression that indicates when an argument must be passed by
6005 reference. If nonzero for an argument, a copy of that argument is
6006 made in memory and a pointer to the argument is passed instead of
6007 the argument itself. The pointer is passed in whatever way is
6008 appropriate for passing a pointer to that type. */
6011 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6012 enum machine_mode mode ATTRIBUTE_UNUSED,
6013 const_tree type, bool named ATTRIBUTE_UNUSED)
6015 /* See Windows x64 Software Convention. */
6016 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6018 int msize = (int) GET_MODE_SIZE (mode);
6021 /* Arrays are passed by reference. */
6022 if (TREE_CODE (type) == ARRAY_TYPE)
6025 if (AGGREGATE_TYPE_P (type))
6027 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6028 are passed by reference. */
6029 msize = int_size_in_bytes (type);
6033 /* __m128 is passed by reference. */
6035 case 1: case 2: case 4: case 8:
6041 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6047 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6050 contains_aligned_value_p (tree type)
6052 enum machine_mode mode = TYPE_MODE (type);
6053 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6057 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6059 if (TYPE_ALIGN (type) < 128)
6062 if (AGGREGATE_TYPE_P (type))
6064 /* Walk the aggregates recursively. */
6065 switch (TREE_CODE (type))
6069 case QUAL_UNION_TYPE:
6073 /* Walk all the structure fields. */
6074 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6076 if (TREE_CODE (field) == FIELD_DECL
6077 && contains_aligned_value_p (TREE_TYPE (field)))
6084 /* Just for use if some languages passes arrays by value. */
6085 if (contains_aligned_value_p (TREE_TYPE (type)))
6096 /* Gives the alignment boundary, in bits, of an argument with the
6097 specified mode and type. */
6100 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6105 /* Since canonical type is used for call, we convert it to
6106 canonical type if needed. */
6107 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6108 type = TYPE_CANONICAL (type);
6109 align = TYPE_ALIGN (type);
6112 align = GET_MODE_ALIGNMENT (mode);
6113 if (align < PARM_BOUNDARY)
6114 align = PARM_BOUNDARY;
6115 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6116 natural boundaries. */
6117 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6119 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6120 make an exception for SSE modes since these require 128bit
6123 The handling here differs from field_alignment. ICC aligns MMX
6124 arguments to 4 byte boundaries, while structure fields are aligned
6125 to 8 byte boundaries. */
6128 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6129 align = PARM_BOUNDARY;
6133 if (!contains_aligned_value_p (type))
6134 align = PARM_BOUNDARY;
6137 if (align > BIGGEST_ALIGNMENT)
6138 align = BIGGEST_ALIGNMENT;
6142 /* Return true if N is a possible register number of function value. */
6145 ix86_function_value_regno_p (int regno)
6152 case FIRST_FLOAT_REG:
6153 /* TODO: The function should depend on current function ABI but
6154 builtins.c would need updating then. Therefore we use the
6156 if (TARGET_64BIT && ix86_abi == MS_ABI)
6158 return TARGET_FLOAT_RETURNS_IN_80387;
6164 if (TARGET_MACHO || TARGET_64BIT)
6172 /* Define how to find the value returned by a function.
6173 VALTYPE is the data type of the value (as a tree).
6174 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6175 otherwise, FUNC is 0. */
6178 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6179 const_tree fntype, const_tree fn)
6183 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6184 we normally prevent this case when mmx is not available. However
6185 some ABIs may require the result to be returned like DImode. */
6186 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6187 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6189 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6190 we prevent this case when sse is not available. However some ABIs
6191 may require the result to be returned like integer TImode. */
6192 else if (mode == TImode
6193 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6194 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6196 /* 32-byte vector modes in %ymm0. */
6197 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6198 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6200 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6201 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6202 regno = FIRST_FLOAT_REG;
6204 /* Most things go in %eax. */
6207 /* Override FP return register with %xmm0 for local functions when
6208 SSE math is enabled or for functions with sseregparm attribute. */
6209 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6211 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6212 if ((sse_level >= 1 && mode == SFmode)
6213 || (sse_level == 2 && mode == DFmode))
6214 regno = FIRST_SSE_REG;
6217 /* OImode shouldn't be used directly. */
6218 gcc_assert (mode != OImode);
6220 return gen_rtx_REG (orig_mode, regno);
6224 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6229 /* Handle libcalls, which don't provide a type node. */
6230 if (valtype == NULL)
6242 return gen_rtx_REG (mode, FIRST_SSE_REG);
6245 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6249 return gen_rtx_REG (mode, AX_REG);
6253 ret = construct_container (mode, orig_mode, valtype, 1,
6254 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6255 x86_64_int_return_registers, 0);
6257 /* For zero sized structures, construct_container returns NULL, but we
6258 need to keep rest of compiler happy by returning meaningful value. */
6260 ret = gen_rtx_REG (orig_mode, AX_REG);
6266 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6268 unsigned int regno = AX_REG;
6272 switch (GET_MODE_SIZE (mode))
6275 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6276 && !COMPLEX_MODE_P (mode))
6277 regno = FIRST_SSE_REG;
6281 if (mode == SFmode || mode == DFmode)
6282 regno = FIRST_SSE_REG;
6288 return gen_rtx_REG (orig_mode, regno);
6292 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6293 enum machine_mode orig_mode, enum machine_mode mode)
6295 const_tree fn, fntype;
6298 if (fntype_or_decl && DECL_P (fntype_or_decl))
6299 fn = fntype_or_decl;
6300 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6302 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6303 return function_value_ms_64 (orig_mode, mode);
6304 else if (TARGET_64BIT)
6305 return function_value_64 (orig_mode, mode, valtype);
6307 return function_value_32 (orig_mode, mode, fntype, fn);
6311 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6312 bool outgoing ATTRIBUTE_UNUSED)
6314 enum machine_mode mode, orig_mode;
6316 orig_mode = TYPE_MODE (valtype);
6317 mode = type_natural_mode (valtype, NULL);
6318 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6322 ix86_libcall_value (enum machine_mode mode)
6324 return ix86_function_value_1 (NULL, NULL, mode, mode);
6327 /* Return true iff type is returned in memory. */
6329 static int ATTRIBUTE_UNUSED
6330 return_in_memory_32 (const_tree type, enum machine_mode mode)
6334 if (mode == BLKmode)
6337 size = int_size_in_bytes (type);
6339 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6342 if (VECTOR_MODE_P (mode) || mode == TImode)
6344 /* User-created vectors small enough to fit in EAX. */
6348 /* MMX/3dNow values are returned in MM0,
6349 except when it doesn't exits. */
6351 return (TARGET_MMX ? 0 : 1);
6353 /* SSE values are returned in XMM0, except when it doesn't exist. */
6355 return (TARGET_SSE ? 0 : 1);
6357 /* AVX values are returned in YMM0, except when it doesn't exist. */
6359 return TARGET_AVX ? 0 : 1;
6368 /* OImode shouldn't be used directly. */
6369 gcc_assert (mode != OImode);
6374 static int ATTRIBUTE_UNUSED
6375 return_in_memory_64 (const_tree type, enum machine_mode mode)
6377 int needed_intregs, needed_sseregs;
6378 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6381 static int ATTRIBUTE_UNUSED
6382 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6384 HOST_WIDE_INT size = int_size_in_bytes (type);
6386 /* __m128 is returned in xmm0. */
6387 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6388 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6391 /* Otherwise, the size must be exactly in [1248]. */
6392 return (size != 1 && size != 2 && size != 4 && size != 8);
6396 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6398 #ifdef SUBTARGET_RETURN_IN_MEMORY
6399 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6401 const enum machine_mode mode = type_natural_mode (type, NULL);
6405 if (ix86_function_type_abi (fntype) == MS_ABI)
6406 return return_in_memory_ms_64 (type, mode);
6408 return return_in_memory_64 (type, mode);
6411 return return_in_memory_32 (type, mode);
6415 /* Return false iff TYPE is returned in memory. This version is used
6416 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6417 but differs notably in that when MMX is available, 8-byte vectors
6418 are returned in memory, rather than in MMX registers. */
6421 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6424 enum machine_mode mode = type_natural_mode (type, NULL);
6427 return return_in_memory_64 (type, mode);
6429 if (mode == BLKmode)
6432 size = int_size_in_bytes (type);
6434 if (VECTOR_MODE_P (mode))
6436 /* Return in memory only if MMX registers *are* available. This
6437 seems backwards, but it is consistent with the existing
6444 else if (mode == TImode)
6446 else if (mode == XFmode)
6452 /* When returning SSE vector types, we have a choice of either
6453 (1) being abi incompatible with a -march switch, or
6454 (2) generating an error.
6455 Given no good solution, I think the safest thing is one warning.
6456 The user won't be able to use -Werror, but....
6458 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6459 called in response to actually generating a caller or callee that
6460 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6461 via aggregate_value_p for general type probing from tree-ssa. */
6464 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6466 static bool warnedsse, warnedmmx;
6468 if (!TARGET_64BIT && type)
6470 /* Look at the return type of the function, not the function type. */
6471 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6473 if (!TARGET_SSE && !warnedsse)
6476 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6479 warning (0, "SSE vector return without SSE enabled "
6484 if (!TARGET_MMX && !warnedmmx)
6486 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6489 warning (0, "MMX vector return without MMX enabled "
6499 /* Create the va_list data type. */
6501 /* Returns the calling convention specific va_list date type.
6502 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6505 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6507 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6509 /* For i386 we use plain pointer to argument area. */
6510 if (!TARGET_64BIT || abi == MS_ABI)
6511 return build_pointer_type (char_type_node);
6513 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6514 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6516 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6517 unsigned_type_node);
6518 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6519 unsigned_type_node);
6520 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6522 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6525 va_list_gpr_counter_field = f_gpr;
6526 va_list_fpr_counter_field = f_fpr;
6528 DECL_FIELD_CONTEXT (f_gpr) = record;
6529 DECL_FIELD_CONTEXT (f_fpr) = record;
6530 DECL_FIELD_CONTEXT (f_ovf) = record;
6531 DECL_FIELD_CONTEXT (f_sav) = record;
6533 TREE_CHAIN (record) = type_decl;
6534 TYPE_NAME (record) = type_decl;
6535 TYPE_FIELDS (record) = f_gpr;
6536 TREE_CHAIN (f_gpr) = f_fpr;
6537 TREE_CHAIN (f_fpr) = f_ovf;
6538 TREE_CHAIN (f_ovf) = f_sav;
6540 layout_type (record);
6542 /* The correct type is an array type of one element. */
6543 return build_array_type (record, build_index_type (size_zero_node));
6546 /* Setup the builtin va_list data type and for 64-bit the additional
6547 calling convention specific va_list data types. */
6550 ix86_build_builtin_va_list (void)
6552 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6554 /* Initialize abi specific va_list builtin types. */
6558 if (ix86_abi == MS_ABI)
6560 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6561 if (TREE_CODE (t) != RECORD_TYPE)
6562 t = build_variant_type_copy (t);
6563 sysv_va_list_type_node = t;
6568 if (TREE_CODE (t) != RECORD_TYPE)
6569 t = build_variant_type_copy (t);
6570 sysv_va_list_type_node = t;
6572 if (ix86_abi != MS_ABI)
6574 t = ix86_build_builtin_va_list_abi (MS_ABI);
6575 if (TREE_CODE (t) != RECORD_TYPE)
6576 t = build_variant_type_copy (t);
6577 ms_va_list_type_node = t;
6582 if (TREE_CODE (t) != RECORD_TYPE)
6583 t = build_variant_type_copy (t);
6584 ms_va_list_type_node = t;
6591 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6594 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6603 int regparm = ix86_regparm;
6605 if (cum->call_abi != ix86_abi)
6606 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6608 /* GPR size of varargs save area. */
6609 if (cfun->va_list_gpr_size)
6610 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6612 ix86_varargs_gpr_size = 0;
6614 /* FPR size of varargs save area. We don't need it if we don't pass
6615 anything in SSE registers. */
6616 if (cum->sse_nregs && cfun->va_list_fpr_size)
6617 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6619 ix86_varargs_fpr_size = 0;
6621 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6624 save_area = frame_pointer_rtx;
6625 set = get_varargs_alias_set ();
6627 for (i = cum->regno;
6629 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6632 mem = gen_rtx_MEM (Pmode,
6633 plus_constant (save_area, i * UNITS_PER_WORD));
6634 MEM_NOTRAP_P (mem) = 1;
6635 set_mem_alias_set (mem, set);
6636 emit_move_insn (mem, gen_rtx_REG (Pmode,
6637 x86_64_int_parameter_registers[i]));
6640 if (ix86_varargs_fpr_size)
6642 /* Now emit code to save SSE registers. The AX parameter contains number
6643 of SSE parameter registers used to call this function. We use
6644 sse_prologue_save insn template that produces computed jump across
6645 SSE saves. We need some preparation work to get this working. */
6647 label = gen_label_rtx ();
6648 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6650 /* Compute address to jump to :
6651 label - eax*4 + nnamed_sse_arguments*4 Or
6652 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6653 tmp_reg = gen_reg_rtx (Pmode);
6654 nsse_reg = gen_reg_rtx (Pmode);
6655 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6656 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6657 gen_rtx_MULT (Pmode, nsse_reg,
6660 /* vmovaps is one byte longer than movaps. */
6662 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6663 gen_rtx_PLUS (Pmode, tmp_reg,
6669 gen_rtx_CONST (DImode,
6670 gen_rtx_PLUS (DImode,
6672 GEN_INT (cum->sse_regno
6673 * (TARGET_AVX ? 5 : 4)))));
6675 emit_move_insn (nsse_reg, label_ref);
6676 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6678 /* Compute address of memory block we save into. We always use pointer
6679 pointing 127 bytes after first byte to store - this is needed to keep
6680 instruction size limited by 4 bytes (5 bytes for AVX) with one
6681 byte displacement. */
6682 tmp_reg = gen_reg_rtx (Pmode);
6683 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6684 plus_constant (save_area,
6685 ix86_varargs_gpr_size + 127)));
6686 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6687 MEM_NOTRAP_P (mem) = 1;
6688 set_mem_alias_set (mem, set);
6689 set_mem_align (mem, BITS_PER_WORD);
6691 /* And finally do the dirty job! */
6692 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6693 GEN_INT (cum->sse_regno), label));
6698 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6700 alias_set_type set = get_varargs_alias_set ();
6703 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6707 mem = gen_rtx_MEM (Pmode,
6708 plus_constant (virtual_incoming_args_rtx,
6709 i * UNITS_PER_WORD));
6710 MEM_NOTRAP_P (mem) = 1;
6711 set_mem_alias_set (mem, set);
6713 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6714 emit_move_insn (mem, reg);
6719 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6720 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6723 CUMULATIVE_ARGS next_cum;
6726 /* This argument doesn't appear to be used anymore. Which is good,
6727 because the old code here didn't suppress rtl generation. */
6728 gcc_assert (!no_rtl);
6733 fntype = TREE_TYPE (current_function_decl);
6735 /* For varargs, we do not want to skip the dummy va_dcl argument.
6736 For stdargs, we do want to skip the last named argument. */
6738 if (stdarg_p (fntype))
6739 function_arg_advance (&next_cum, mode, type, 1);
6741 if (cum->call_abi == MS_ABI)
6742 setup_incoming_varargs_ms_64 (&next_cum);
6744 setup_incoming_varargs_64 (&next_cum);
6747 /* Checks if TYPE is of kind va_list char *. */
6750 is_va_list_char_pointer (tree type)
6754 /* For 32-bit it is always true. */
6757 canonic = ix86_canonical_va_list_type (type);
6758 return (canonic == ms_va_list_type_node
6759 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6762 /* Implement va_start. */
6765 ix86_va_start (tree valist, rtx nextarg)
6767 HOST_WIDE_INT words, n_gpr, n_fpr;
6768 tree f_gpr, f_fpr, f_ovf, f_sav;
6769 tree gpr, fpr, ovf, sav, t;
6772 /* Only 64bit target needs something special. */
6773 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6775 std_expand_builtin_va_start (valist, nextarg);
6779 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6780 f_fpr = TREE_CHAIN (f_gpr);
6781 f_ovf = TREE_CHAIN (f_fpr);
6782 f_sav = TREE_CHAIN (f_ovf);
6784 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6785 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6786 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6787 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6788 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6790 /* Count number of gp and fp argument registers used. */
6791 words = crtl->args.info.words;
6792 n_gpr = crtl->args.info.regno;
6793 n_fpr = crtl->args.info.sse_regno;
6795 if (cfun->va_list_gpr_size)
6797 type = TREE_TYPE (gpr);
6798 t = build2 (MODIFY_EXPR, type,
6799 gpr, build_int_cst (type, n_gpr * 8));
6800 TREE_SIDE_EFFECTS (t) = 1;
6801 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6804 if (TARGET_SSE && cfun->va_list_fpr_size)
6806 type = TREE_TYPE (fpr);
6807 t = build2 (MODIFY_EXPR, type, fpr,
6808 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6809 TREE_SIDE_EFFECTS (t) = 1;
6810 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6813 /* Find the overflow area. */
6814 type = TREE_TYPE (ovf);
6815 t = make_tree (type, crtl->args.internal_arg_pointer);
6817 t = build2 (POINTER_PLUS_EXPR, type, t,
6818 size_int (words * UNITS_PER_WORD));
6819 t = build2 (MODIFY_EXPR, type, ovf, t);
6820 TREE_SIDE_EFFECTS (t) = 1;
6821 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6823 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6825 /* Find the register save area.
6826 Prologue of the function save it right above stack frame. */
6827 type = TREE_TYPE (sav);
6828 t = make_tree (type, frame_pointer_rtx);
6829 if (!ix86_varargs_gpr_size)
6830 t = build2 (POINTER_PLUS_EXPR, type, t,
6831 size_int (-8 * X86_64_REGPARM_MAX));
6832 t = build2 (MODIFY_EXPR, type, sav, t);
6833 TREE_SIDE_EFFECTS (t) = 1;
6834 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6838 /* Implement va_arg. */
6841 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6844 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6845 tree f_gpr, f_fpr, f_ovf, f_sav;
6846 tree gpr, fpr, ovf, sav, t;
6848 tree lab_false, lab_over = NULL_TREE;
6853 enum machine_mode nat_mode;
6856 /* Only 64bit target needs something special. */
6857 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6858 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6860 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6861 f_fpr = TREE_CHAIN (f_gpr);
6862 f_ovf = TREE_CHAIN (f_fpr);
6863 f_sav = TREE_CHAIN (f_ovf);
6865 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6866 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6867 valist = build_va_arg_indirect_ref (valist);
6868 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6869 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6870 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6872 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6874 type = build_pointer_type (type);
6875 size = int_size_in_bytes (type);
6876 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6878 nat_mode = type_natural_mode (type, NULL);
6887 /* Unnamed 256bit vector mode parameters are passed on stack. */
6888 if (ix86_cfun_abi () == SYSV_ABI)
6895 container = construct_container (nat_mode, TYPE_MODE (type),
6896 type, 0, X86_64_REGPARM_MAX,
6897 X86_64_SSE_REGPARM_MAX, intreg,
6902 /* Pull the value out of the saved registers. */
6904 addr = create_tmp_var (ptr_type_node, "addr");
6905 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6909 int needed_intregs, needed_sseregs;
6911 tree int_addr, sse_addr;
6913 lab_false = create_artificial_label ();
6914 lab_over = create_artificial_label ();
6916 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6918 need_temp = (!REG_P (container)
6919 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6920 || TYPE_ALIGN (type) > 128));
6922 /* In case we are passing structure, verify that it is consecutive block
6923 on the register save area. If not we need to do moves. */
6924 if (!need_temp && !REG_P (container))
6926 /* Verify that all registers are strictly consecutive */
6927 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6931 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6933 rtx slot = XVECEXP (container, 0, i);
6934 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6935 || INTVAL (XEXP (slot, 1)) != i * 16)
6943 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6945 rtx slot = XVECEXP (container, 0, i);
6946 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6947 || INTVAL (XEXP (slot, 1)) != i * 8)
6959 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6960 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6961 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6962 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6965 /* First ensure that we fit completely in registers. */
6968 t = build_int_cst (TREE_TYPE (gpr),
6969 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6970 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6971 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6972 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6973 gimplify_and_add (t, pre_p);
6977 t = build_int_cst (TREE_TYPE (fpr),
6978 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6979 + X86_64_REGPARM_MAX * 8);
6980 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6981 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6982 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6983 gimplify_and_add (t, pre_p);
6986 /* Compute index to start of area used for integer regs. */
6989 /* int_addr = gpr + sav; */
6990 t = fold_convert (sizetype, gpr);
6991 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6992 gimplify_assign (int_addr, t, pre_p);
6996 /* sse_addr = fpr + sav; */
6997 t = fold_convert (sizetype, fpr);
6998 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6999 gimplify_assign (sse_addr, t, pre_p);
7004 tree temp = create_tmp_var (type, "va_arg_tmp");
7007 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7008 gimplify_assign (addr, t, pre_p);
7010 for (i = 0; i < XVECLEN (container, 0); i++)
7012 rtx slot = XVECEXP (container, 0, i);
7013 rtx reg = XEXP (slot, 0);
7014 enum machine_mode mode = GET_MODE (reg);
7015 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7016 tree addr_type = build_pointer_type (piece_type);
7017 tree daddr_type = build_pointer_type_for_mode (piece_type,
7021 tree dest_addr, dest;
7023 if (SSE_REGNO_P (REGNO (reg)))
7025 src_addr = sse_addr;
7026 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7030 src_addr = int_addr;
7031 src_offset = REGNO (reg) * 8;
7033 src_addr = fold_convert (addr_type, src_addr);
7034 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7035 size_int (src_offset));
7036 src = build_va_arg_indirect_ref (src_addr);
7038 dest_addr = fold_convert (daddr_type, addr);
7039 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7040 size_int (INTVAL (XEXP (slot, 1))));
7041 dest = build_va_arg_indirect_ref (dest_addr);
7043 gimplify_assign (dest, src, pre_p);
7049 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7050 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7051 gimplify_assign (gpr, t, pre_p);
7056 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7057 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7058 gimplify_assign (fpr, t, pre_p);
7061 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7063 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7066 /* ... otherwise out of the overflow area. */
7068 /* When we align parameter on stack for caller, if the parameter
7069 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7070 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7071 here with caller. */
7072 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7073 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7074 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7076 /* Care for on-stack alignment if needed. */
7077 if (arg_boundary <= 64
7078 || integer_zerop (TYPE_SIZE (type)))
7082 HOST_WIDE_INT align = arg_boundary / 8;
7083 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7084 size_int (align - 1));
7085 t = fold_convert (sizetype, t);
7086 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7088 t = fold_convert (TREE_TYPE (ovf), t);
7090 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7091 gimplify_assign (addr, t, pre_p);
7093 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7094 size_int (rsize * UNITS_PER_WORD));
7095 gimplify_assign (unshare_expr (ovf), t, pre_p);
7098 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7100 ptrtype = build_pointer_type (type);
7101 addr = fold_convert (ptrtype, addr);
7104 addr = build_va_arg_indirect_ref (addr);
7105 return build_va_arg_indirect_ref (addr);
7108 /* Return nonzero if OPNUM's MEM should be matched
7109 in movabs* patterns. */
7112 ix86_check_movabs (rtx insn, int opnum)
7116 set = PATTERN (insn);
7117 if (GET_CODE (set) == PARALLEL)
7118 set = XVECEXP (set, 0, 0);
7119 gcc_assert (GET_CODE (set) == SET);
7120 mem = XEXP (set, opnum);
7121 while (GET_CODE (mem) == SUBREG)
7122 mem = SUBREG_REG (mem);
7123 gcc_assert (MEM_P (mem));
7124 return (volatile_ok || !MEM_VOLATILE_P (mem));
7127 /* Initialize the table of extra 80387 mathematical constants. */
7130 init_ext_80387_constants (void)
7132 static const char * cst[5] =
7134 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7135 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7136 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7137 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7138 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7142 for (i = 0; i < 5; i++)
7144 real_from_string (&ext_80387_constants_table[i], cst[i]);
7145 /* Ensure each constant is rounded to XFmode precision. */
7146 real_convert (&ext_80387_constants_table[i],
7147 XFmode, &ext_80387_constants_table[i]);
7150 ext_80387_constants_init = 1;
7153 /* Return true if the constant is something that can be loaded with
7154 a special instruction. */
7157 standard_80387_constant_p (rtx x)
7159 enum machine_mode mode = GET_MODE (x);
7163 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7166 if (x == CONST0_RTX (mode))
7168 if (x == CONST1_RTX (mode))
7171 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7173 /* For XFmode constants, try to find a special 80387 instruction when
7174 optimizing for size or on those CPUs that benefit from them. */
7176 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7180 if (! ext_80387_constants_init)
7181 init_ext_80387_constants ();
7183 for (i = 0; i < 5; i++)
7184 if (real_identical (&r, &ext_80387_constants_table[i]))
7188 /* Load of the constant -0.0 or -1.0 will be split as
7189 fldz;fchs or fld1;fchs sequence. */
7190 if (real_isnegzero (&r))
7192 if (real_identical (&r, &dconstm1))
7198 /* Return the opcode of the special instruction to be used to load
7202 standard_80387_constant_opcode (rtx x)
7204 switch (standard_80387_constant_p (x))
7228 /* Return the CONST_DOUBLE representing the 80387 constant that is
7229 loaded by the specified special instruction. The argument IDX
7230 matches the return value from standard_80387_constant_p. */
7233 standard_80387_constant_rtx (int idx)
7237 if (! ext_80387_constants_init)
7238 init_ext_80387_constants ();
7254 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7258 /* Return 1 if mode is a valid mode for sse. */
7260 standard_sse_mode_p (enum machine_mode mode)
7277 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7278 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7279 modes and AVX is enabled. */
7282 standard_sse_constant_p (rtx x)
7284 enum machine_mode mode = GET_MODE (x);
7286 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7288 if (vector_all_ones_operand (x, mode))
7290 if (standard_sse_mode_p (mode))
7291 return TARGET_SSE2 ? 2 : -2;
7292 else if (VALID_AVX256_REG_MODE (mode))
7293 return TARGET_AVX ? 3 : -3;
7299 /* Return the opcode of the special instruction to be used to load
7303 standard_sse_constant_opcode (rtx insn, rtx x)
7305 switch (standard_sse_constant_p (x))
7308 switch (get_attr_mode (insn))
7311 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7313 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7315 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7317 return "vxorps\t%x0, %x0, %x0";
7319 return "vxorpd\t%x0, %x0, %x0";
7321 return "vpxor\t%x0, %x0, %x0";
7327 switch (get_attr_mode (insn))
7332 return "vpcmpeqd\t%0, %0, %0";
7338 return "pcmpeqd\t%0, %0";
7343 /* Returns 1 if OP contains a symbol reference */
7346 symbolic_reference_mentioned_p (rtx op)
7351 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7354 fmt = GET_RTX_FORMAT (GET_CODE (op));
7355 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7361 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7362 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7366 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7373 /* Return 1 if it is appropriate to emit `ret' instructions in the
7374 body of a function. Do this only if the epilogue is simple, needing a
7375 couple of insns. Prior to reloading, we can't tell how many registers
7376 must be saved, so return 0 then. Return 0 if there is no frame
7377 marker to de-allocate. */
7380 ix86_can_use_return_insn_p (void)
7382 struct ix86_frame frame;
7384 if (! reload_completed || frame_pointer_needed)
7387 /* Don't allow more than 32 pop, since that's all we can do
7388 with one instruction. */
7389 if (crtl->args.pops_args
7390 && crtl->args.size >= 32768)
7393 ix86_compute_frame_layout (&frame);
7394 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7397 /* Value should be nonzero if functions must have frame pointers.
7398 Zero means the frame pointer need not be set up (and parms may
7399 be accessed via the stack pointer) in functions that seem suitable. */
7402 ix86_frame_pointer_required (void)
7404 /* If we accessed previous frames, then the generated code expects
7405 to be able to access the saved ebp value in our frame. */
7406 if (cfun->machine->accesses_prev_frame)
7409 /* Several x86 os'es need a frame pointer for other reasons,
7410 usually pertaining to setjmp. */
7411 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7414 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7415 the frame pointer by default. Turn it back on now if we've not
7416 got a leaf function. */
7417 if (TARGET_OMIT_LEAF_FRAME_POINTER
7418 && (!current_function_is_leaf
7419 || ix86_current_function_calls_tls_descriptor))
7428 /* Record that the current function accesses previous call frames. */
7431 ix86_setup_frame_addresses (void)
7433 cfun->machine->accesses_prev_frame = 1;
7436 #ifndef USE_HIDDEN_LINKONCE
7437 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7438 # define USE_HIDDEN_LINKONCE 1
7440 # define USE_HIDDEN_LINKONCE 0
7444 static int pic_labels_used;
7446 /* Fills in the label name that should be used for a pc thunk for
7447 the given register. */
7450 get_pc_thunk_name (char name[32], unsigned int regno)
7452 gcc_assert (!TARGET_64BIT);
7454 if (USE_HIDDEN_LINKONCE)
7455 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7457 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7461 /* This function generates code for -fpic that loads %ebx with
7462 the return address of the caller and then returns. */
7465 ix86_file_end (void)
7470 for (regno = 0; regno < 8; ++regno)
7474 if (! ((pic_labels_used >> regno) & 1))
7477 get_pc_thunk_name (name, regno);
7482 switch_to_section (darwin_sections[text_coal_section]);
7483 fputs ("\t.weak_definition\t", asm_out_file);
7484 assemble_name (asm_out_file, name);
7485 fputs ("\n\t.private_extern\t", asm_out_file);
7486 assemble_name (asm_out_file, name);
7487 fputs ("\n", asm_out_file);
7488 ASM_OUTPUT_LABEL (asm_out_file, name);
7492 if (USE_HIDDEN_LINKONCE)
7496 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7498 TREE_PUBLIC (decl) = 1;
7499 TREE_STATIC (decl) = 1;
7500 DECL_ONE_ONLY (decl) = 1;
7502 (*targetm.asm_out.unique_section) (decl, 0);
7503 switch_to_section (get_named_section (decl, NULL, 0));
7505 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7506 fputs ("\t.hidden\t", asm_out_file);
7507 assemble_name (asm_out_file, name);
7508 fputc ('\n', asm_out_file);
7509 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7513 switch_to_section (text_section);
7514 ASM_OUTPUT_LABEL (asm_out_file, name);
7517 xops[0] = gen_rtx_REG (Pmode, regno);
7518 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7519 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7520 output_asm_insn ("ret", xops);
7523 if (NEED_INDICATE_EXEC_STACK)
7524 file_end_indicate_exec_stack ();
7527 /* Emit code for the SET_GOT patterns. */
7530 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7536 if (TARGET_VXWORKS_RTP && flag_pic)
7538 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7539 xops[2] = gen_rtx_MEM (Pmode,
7540 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7541 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7543 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7544 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7545 an unadorned address. */
7546 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7547 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7548 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7552 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7554 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7556 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7559 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7561 output_asm_insn ("call\t%a2", xops);
7564 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7565 is what will be referenced by the Mach-O PIC subsystem. */
7567 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7570 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7571 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7574 output_asm_insn ("pop%z0\t%0", xops);
7579 get_pc_thunk_name (name, REGNO (dest));
7580 pic_labels_used |= 1 << REGNO (dest);
7582 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7583 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7584 output_asm_insn ("call\t%X2", xops);
7585 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7586 is what will be referenced by the Mach-O PIC subsystem. */
7589 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7591 targetm.asm_out.internal_label (asm_out_file, "L",
7592 CODE_LABEL_NUMBER (label));
7599 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7600 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7602 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7607 /* Generate an "push" pattern for input ARG. */
7612 return gen_rtx_SET (VOIDmode,
7614 gen_rtx_PRE_DEC (Pmode,
7615 stack_pointer_rtx)),
7619 /* Return >= 0 if there is an unused call-clobbered register available
7620 for the entire function. */
7623 ix86_select_alt_pic_regnum (void)
7625 if (current_function_is_leaf && !crtl->profile
7626 && !ix86_current_function_calls_tls_descriptor)
7629 /* Can't use the same register for both PIC and DRAP. */
7631 drap = REGNO (crtl->drap_reg);
7634 for (i = 2; i >= 0; --i)
7635 if (i != drap && !df_regs_ever_live_p (i))
7639 return INVALID_REGNUM;
7642 /* Return 1 if we need to save REGNO. */
7644 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7646 if (pic_offset_table_rtx
7647 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7648 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7650 || crtl->calls_eh_return
7651 || crtl->uses_const_pool))
7653 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7658 if (crtl->calls_eh_return && maybe_eh_return)
7663 unsigned test = EH_RETURN_DATA_REGNO (i);
7664 if (test == INVALID_REGNUM)
7672 && regno == REGNO (crtl->drap_reg))
7675 return (df_regs_ever_live_p (regno)
7676 && !call_used_regs[regno]
7677 && !fixed_regs[regno]
7678 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7681 /* Return number of saved general prupose registers. */
7684 ix86_nsaved_regs (void)
7689 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7690 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7695 /* Return number of saved SSE registrers. */
7698 ix86_nsaved_sseregs (void)
7703 if (ix86_cfun_abi () != MS_ABI)
7705 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7706 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7711 /* Given FROM and TO register numbers, say whether this elimination is
7712 allowed. If stack alignment is needed, we can only replace argument
7713 pointer with hard frame pointer, or replace frame pointer with stack
7714 pointer. Otherwise, frame pointer elimination is automatically
7715 handled and all other eliminations are valid. */
7718 ix86_can_eliminate (int from, int to)
7720 if (stack_realign_fp)
7721 return ((from == ARG_POINTER_REGNUM
7722 && to == HARD_FRAME_POINTER_REGNUM)
7723 || (from == FRAME_POINTER_REGNUM
7724 && to == STACK_POINTER_REGNUM));
7726 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7729 /* Return the offset between two registers, one to be eliminated, and the other
7730 its replacement, at the start of a routine. */
7733 ix86_initial_elimination_offset (int from, int to)
7735 struct ix86_frame frame;
7736 ix86_compute_frame_layout (&frame);
7738 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7739 return frame.hard_frame_pointer_offset;
7740 else if (from == FRAME_POINTER_REGNUM
7741 && to == HARD_FRAME_POINTER_REGNUM)
7742 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7745 gcc_assert (to == STACK_POINTER_REGNUM);
7747 if (from == ARG_POINTER_REGNUM)
7748 return frame.stack_pointer_offset;
7750 gcc_assert (from == FRAME_POINTER_REGNUM);
7751 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7755 /* In a dynamically-aligned function, we can't know the offset from
7756 stack pointer to frame pointer, so we must ensure that setjmp
7757 eliminates fp against the hard fp (%ebp) rather than trying to
7758 index from %esp up to the top of the frame across a gap that is
7759 of unknown (at compile-time) size. */
7761 ix86_builtin_setjmp_frame_value (void)
7763 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7766 /* Fill structure ix86_frame about frame of currently computed function. */
7769 ix86_compute_frame_layout (struct ix86_frame *frame)
7771 HOST_WIDE_INT total_size;
7772 unsigned int stack_alignment_needed;
7773 HOST_WIDE_INT offset;
7774 unsigned int preferred_alignment;
7775 HOST_WIDE_INT size = get_frame_size ();
7777 frame->nregs = ix86_nsaved_regs ();
7778 frame->nsseregs = ix86_nsaved_sseregs ();
7781 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7782 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7784 /* MS ABI seem to require stack alignment to be always 16 except for function
7786 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7788 preferred_alignment = 16;
7789 stack_alignment_needed = 16;
7790 crtl->preferred_stack_boundary = 128;
7791 crtl->stack_alignment_needed = 128;
7794 gcc_assert (!size || stack_alignment_needed);
7795 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7796 gcc_assert (preferred_alignment <= stack_alignment_needed);
7798 /* During reload iteration the amount of registers saved can change.
7799 Recompute the value as needed. Do not recompute when amount of registers
7800 didn't change as reload does multiple calls to the function and does not
7801 expect the decision to change within single iteration. */
7802 if (!optimize_function_for_size_p (cfun)
7803 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7805 int count = frame->nregs;
7807 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7808 /* The fast prologue uses move instead of push to save registers. This
7809 is significantly longer, but also executes faster as modern hardware
7810 can execute the moves in parallel, but can't do that for push/pop.
7812 Be careful about choosing what prologue to emit: When function takes
7813 many instructions to execute we may use slow version as well as in
7814 case function is known to be outside hot spot (this is known with
7815 feedback only). Weight the size of function by number of registers
7816 to save as it is cheap to use one or two push instructions but very
7817 slow to use many of them. */
7819 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7820 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7821 || (flag_branch_probabilities
7822 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7823 cfun->machine->use_fast_prologue_epilogue = false;
7825 cfun->machine->use_fast_prologue_epilogue
7826 = !expensive_function_p (count);
7828 if (TARGET_PROLOGUE_USING_MOVE
7829 && cfun->machine->use_fast_prologue_epilogue)
7830 frame->save_regs_using_mov = true;
7832 frame->save_regs_using_mov = false;
7835 /* Skip return address and saved base pointer. */
7836 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7838 frame->hard_frame_pointer_offset = offset;
7840 /* Set offset to aligned because the realigned frame starts from
7842 if (stack_realign_fp)
7843 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7845 /* Register save area */
7846 offset += frame->nregs * UNITS_PER_WORD;
7848 /* Align SSE reg save area. */
7849 if (frame->nsseregs)
7850 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7852 frame->padding0 = 0;
7854 /* SSE register save area. */
7855 offset += frame->padding0 + frame->nsseregs * 16;
7858 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7859 offset += frame->va_arg_size;
7861 /* Align start of frame for local function. */
7862 frame->padding1 = ((offset + stack_alignment_needed - 1)
7863 & -stack_alignment_needed) - offset;
7865 offset += frame->padding1;
7867 /* Frame pointer points here. */
7868 frame->frame_pointer_offset = offset;
7872 /* Add outgoing arguments area. Can be skipped if we eliminated
7873 all the function calls as dead code.
7874 Skipping is however impossible when function calls alloca. Alloca
7875 expander assumes that last crtl->outgoing_args_size
7876 of stack frame are unused. */
7877 if (ACCUMULATE_OUTGOING_ARGS
7878 && (!current_function_is_leaf || cfun->calls_alloca
7879 || ix86_current_function_calls_tls_descriptor))
7881 offset += crtl->outgoing_args_size;
7882 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7885 frame->outgoing_arguments_size = 0;
7887 /* Align stack boundary. Only needed if we're calling another function
7889 if (!current_function_is_leaf || cfun->calls_alloca
7890 || ix86_current_function_calls_tls_descriptor)
7891 frame->padding2 = ((offset + preferred_alignment - 1)
7892 & -preferred_alignment) - offset;
7894 frame->padding2 = 0;
7896 offset += frame->padding2;
7898 /* We've reached end of stack frame. */
7899 frame->stack_pointer_offset = offset;
7901 /* Size prologue needs to allocate. */
7902 frame->to_allocate =
7903 (size + frame->padding1 + frame->padding2
7904 + frame->outgoing_arguments_size + frame->va_arg_size);
7906 if ((!frame->to_allocate && frame->nregs <= 1)
7907 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7908 frame->save_regs_using_mov = false;
7910 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7911 && current_function_is_leaf
7912 && !ix86_current_function_calls_tls_descriptor)
7914 frame->red_zone_size = frame->to_allocate;
7915 if (frame->save_regs_using_mov)
7916 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7917 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7918 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7921 frame->red_zone_size = 0;
7922 frame->to_allocate -= frame->red_zone_size;
7923 frame->stack_pointer_offset -= frame->red_zone_size;
7925 fprintf (stderr, "\n");
7926 fprintf (stderr, "size: %ld\n", (long)size);
7927 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7928 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7929 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7930 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7931 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7932 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7933 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7934 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7935 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7936 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7937 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7938 (long)frame->hard_frame_pointer_offset);
7939 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7940 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7941 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7942 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7946 /* Emit code to save registers in the prologue. */
7949 ix86_emit_save_regs (void)
7954 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7955 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7957 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7958 RTX_FRAME_RELATED_P (insn) = 1;
7962 /* Emit code to save registers using MOV insns. First register
7963 is restored from POINTER + OFFSET. */
7965 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7970 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7971 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7973 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7975 gen_rtx_REG (Pmode, regno));
7976 RTX_FRAME_RELATED_P (insn) = 1;
7977 offset += UNITS_PER_WORD;
7981 /* Emit code to save registers using MOV insns. First register
7982 is restored from POINTER + OFFSET. */
7984 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7990 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7991 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7993 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7994 set_mem_align (mem, 128);
7995 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7996 RTX_FRAME_RELATED_P (insn) = 1;
8001 /* Expand prologue or epilogue stack adjustment.
8002 The pattern exist to put a dependency on all ebp-based memory accesses.
8003 STYLE should be negative if instructions should be marked as frame related,
8004 zero if %r11 register is live and cannot be freely used and positive
8008 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
8013 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8014 else if (x86_64_immediate_operand (offset, DImode))
8015 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8019 /* r11 is used by indirect sibcall return as well, set before the
8020 epilogue and used after the epilogue. ATM indirect sibcall
8021 shouldn't be used together with huge frame sizes in one
8022 function because of the frame_size check in sibcall.c. */
8024 r11 = gen_rtx_REG (DImode, R11_REG);
8025 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8027 RTX_FRAME_RELATED_P (insn) = 1;
8028 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8032 RTX_FRAME_RELATED_P (insn) = 1;
8035 /* Find an available register to be used as dynamic realign argument
8036 pointer regsiter. Such a register will be written in prologue and
8037 used in begin of body, so it must not be
8038 1. parameter passing register.
8040 We reuse static-chain register if it is available. Otherwise, we
8041 use DI for i386 and R13 for x86-64. We chose R13 since it has
8044 Return: the regno of chosen register. */
8047 find_drap_reg (void)
8049 tree decl = cfun->decl;
8053 /* Use R13 for nested function or function need static chain.
8054 Since function with tail call may use any caller-saved
8055 registers in epilogue, DRAP must not use caller-saved
8056 register in such case. */
8057 if ((decl_function_context (decl)
8058 && !DECL_NO_STATIC_CHAIN (decl))
8059 || crtl->tail_call_emit)
8066 /* Use DI for nested function or function need static chain.
8067 Since function with tail call may use any caller-saved
8068 registers in epilogue, DRAP must not use caller-saved
8069 register in such case. */
8070 if ((decl_function_context (decl)
8071 && !DECL_NO_STATIC_CHAIN (decl))
8072 || crtl->tail_call_emit)
8075 /* Reuse static chain register if it isn't used for parameter
8077 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8078 && !lookup_attribute ("fastcall",
8079 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8086 /* Update incoming stack boundary and estimated stack alignment. */
8089 ix86_update_stack_boundary (void)
8091 /* Prefer the one specified at command line. */
8092 ix86_incoming_stack_boundary
8093 = (ix86_user_incoming_stack_boundary
8094 ? ix86_user_incoming_stack_boundary
8095 : ix86_default_incoming_stack_boundary);
8097 /* Incoming stack alignment can be changed on individual functions
8098 via force_align_arg_pointer attribute. We use the smallest
8099 incoming stack boundary. */
8100 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8101 && lookup_attribute (ix86_force_align_arg_pointer_string,
8102 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8103 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8105 /* The incoming stack frame has to be aligned at least at
8106 parm_stack_boundary. */
8107 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8108 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8110 /* Stack at entrance of main is aligned by runtime. We use the
8111 smallest incoming stack boundary. */
8112 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8113 && DECL_NAME (current_function_decl)
8114 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8115 && DECL_FILE_SCOPE_P (current_function_decl))
8116 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8118 /* x86_64 vararg needs 16byte stack alignment for register save
8122 && crtl->stack_alignment_estimated < 128)
8123 crtl->stack_alignment_estimated = 128;
8126 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8127 needed or an rtx for DRAP otherwise. */
8130 ix86_get_drap_rtx (void)
8132 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8133 crtl->need_drap = true;
8135 if (stack_realign_drap)
8137 /* Assign DRAP to vDRAP and returns vDRAP */
8138 unsigned int regno = find_drap_reg ();
8143 arg_ptr = gen_rtx_REG (Pmode, regno);
8144 crtl->drap_reg = arg_ptr;
8147 drap_vreg = copy_to_reg (arg_ptr);
8151 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8152 RTX_FRAME_RELATED_P (insn) = 1;
8159 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8162 ix86_internal_arg_pointer (void)
8164 return virtual_incoming_args_rtx;
8167 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8168 This is called from dwarf2out.c to emit call frame instructions
8169 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8171 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8173 rtx unspec = SET_SRC (pattern);
8174 gcc_assert (GET_CODE (unspec) == UNSPEC);
8178 case UNSPEC_REG_SAVE:
8179 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8180 SET_DEST (pattern));
8182 case UNSPEC_DEF_CFA:
8183 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8184 INTVAL (XVECEXP (unspec, 0, 0)));
8191 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8192 to be generated in correct form. */
8194 ix86_finalize_stack_realign_flags (void)
8196 /* Check if stack realign is really needed after reload, and
8197 stores result in cfun */
8198 unsigned int incoming_stack_boundary
8199 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8200 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8201 unsigned int stack_realign = (incoming_stack_boundary
8202 < (current_function_is_leaf
8203 ? crtl->max_used_stack_slot_alignment
8204 : crtl->stack_alignment_needed));
8206 if (crtl->stack_realign_finalized)
8208 /* After stack_realign_needed is finalized, we can't no longer
8210 gcc_assert (crtl->stack_realign_needed == stack_realign);
8214 crtl->stack_realign_needed = stack_realign;
8215 crtl->stack_realign_finalized = true;
8219 /* Expand the prologue into a bunch of separate insns. */
8222 ix86_expand_prologue (void)
8226 struct ix86_frame frame;
8227 HOST_WIDE_INT allocate;
8229 ix86_finalize_stack_realign_flags ();
8231 /* DRAP should not coexist with stack_realign_fp */
8232 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8234 ix86_compute_frame_layout (&frame);
8236 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8237 of DRAP is needed and stack realignment is really needed after reload */
8238 if (crtl->drap_reg && crtl->stack_realign_needed)
8241 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8242 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8243 ? 0 : UNITS_PER_WORD);
8245 gcc_assert (stack_realign_drap);
8247 /* Grab the argument pointer. */
8248 x = plus_constant (stack_pointer_rtx,
8249 (UNITS_PER_WORD + param_ptr_offset));
8252 /* Only need to push parameter pointer reg if it is caller
8254 if (!call_used_regs[REGNO (crtl->drap_reg)])
8256 /* Push arg pointer reg */
8257 insn = emit_insn (gen_push (y));
8258 RTX_FRAME_RELATED_P (insn) = 1;
8261 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8262 RTX_FRAME_RELATED_P (insn) = 1;
8264 /* Align the stack. */
8265 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8267 GEN_INT (-align_bytes)));
8268 RTX_FRAME_RELATED_P (insn) = 1;
8270 /* Replicate the return address on the stack so that return
8271 address can be reached via (argp - 1) slot. This is needed
8272 to implement macro RETURN_ADDR_RTX and intrinsic function
8273 expand_builtin_return_addr etc. */
8275 x = gen_frame_mem (Pmode,
8276 plus_constant (x, -UNITS_PER_WORD));
8277 insn = emit_insn (gen_push (x));
8278 RTX_FRAME_RELATED_P (insn) = 1;
8281 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8282 slower on all targets. Also sdb doesn't like it. */
8284 if (frame_pointer_needed)
8286 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8287 RTX_FRAME_RELATED_P (insn) = 1;
8289 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8290 RTX_FRAME_RELATED_P (insn) = 1;
8293 if (stack_realign_fp)
8295 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8296 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8298 /* Align the stack. */
8299 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8301 GEN_INT (-align_bytes)));
8302 RTX_FRAME_RELATED_P (insn) = 1;
8305 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8307 if (!frame.save_regs_using_mov)
8308 ix86_emit_save_regs ();
8310 allocate += frame.nregs * UNITS_PER_WORD;
8312 /* When using red zone we may start register saving before allocating
8313 the stack frame saving one cycle of the prologue. However I will
8314 avoid doing this if I am going to have to probe the stack since
8315 at least on x86_64 the stack probe can turn into a call that clobbers
8316 a red zone location */
8317 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8318 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8319 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8320 && !crtl->stack_realign_needed)
8321 ? hard_frame_pointer_rtx
8322 : stack_pointer_rtx,
8323 -frame.nregs * UNITS_PER_WORD);
8327 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8328 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8329 GEN_INT (-allocate), -1);
8332 /* Only valid for Win32. */
8333 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8337 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8339 if (cfun->machine->call_abi == MS_ABI)
8342 eax_live = ix86_eax_live_at_start_p ();
8346 emit_insn (gen_push (eax));
8347 allocate -= UNITS_PER_WORD;
8350 emit_move_insn (eax, GEN_INT (allocate));
8353 insn = gen_allocate_stack_worker_64 (eax, eax);
8355 insn = gen_allocate_stack_worker_32 (eax, eax);
8356 insn = emit_insn (insn);
8357 RTX_FRAME_RELATED_P (insn) = 1;
8358 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8359 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8360 add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
8364 if (frame_pointer_needed)
8365 t = plus_constant (hard_frame_pointer_rtx,
8368 - frame.nregs * UNITS_PER_WORD);
8370 t = plus_constant (stack_pointer_rtx, allocate);
8371 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8375 if (frame.save_regs_using_mov
8376 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8377 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8379 if (!frame_pointer_needed
8380 || !frame.to_allocate
8381 || crtl->stack_realign_needed)
8382 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8384 + frame.nsseregs * 16 + frame.padding0);
8386 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8387 -frame.nregs * UNITS_PER_WORD);
8389 if (!frame_pointer_needed
8390 || !frame.to_allocate
8391 || crtl->stack_realign_needed)
8392 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8395 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8396 - frame.nregs * UNITS_PER_WORD
8397 - frame.nsseregs * 16
8400 pic_reg_used = false;
8401 if (pic_offset_table_rtx
8402 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8405 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8407 if (alt_pic_reg_used != INVALID_REGNUM)
8408 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8410 pic_reg_used = true;
8417 if (ix86_cmodel == CM_LARGE_PIC)
8419 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8420 rtx label = gen_label_rtx ();
8422 LABEL_PRESERVE_P (label) = 1;
8423 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8424 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8425 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8426 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8427 pic_offset_table_rtx, tmp_reg));
8430 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8433 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8436 /* In the pic_reg_used case, make sure that the got load isn't deleted
8437 when mcount needs it. Blockage to avoid call movement across mcount
8438 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8440 if (crtl->profile && pic_reg_used)
8441 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8443 if (crtl->drap_reg && !crtl->stack_realign_needed)
8445 /* vDRAP is setup but after reload it turns out stack realign
8446 isn't necessary, here we will emit prologue to setup DRAP
8447 without stack realign adjustment */
8448 int drap_bp_offset = UNITS_PER_WORD * 2;
8449 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8450 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8453 /* Prevent instructions from being scheduled into register save push
8454 sequence when access to the redzone area is done through frame pointer.
8455 The offset betweeh the frame pointer and the stack pointer is calculated
8456 relative to the value of the stack pointer at the end of the function
8457 prologue, and moving instructions that access redzone area via frame
8458 pointer inside push sequence violates this assumption. */
8459 if (frame_pointer_needed && frame.red_zone_size)
8460 emit_insn (gen_memory_blockage ());
8462 /* Emit cld instruction if stringops are used in the function. */
8463 if (TARGET_CLD && ix86_current_function_needs_cld)
8464 emit_insn (gen_cld ());
8467 /* Emit code to restore saved registers using MOV insns. First register
8468 is restored from POINTER + OFFSET. */
8470 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8471 int maybe_eh_return)
8474 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8476 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8477 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8479 /* Ensure that adjust_address won't be forced to produce pointer
8480 out of range allowed by x86-64 instruction set. */
8481 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8485 r11 = gen_rtx_REG (DImode, R11_REG);
8486 emit_move_insn (r11, GEN_INT (offset));
8487 emit_insn (gen_adddi3 (r11, r11, pointer));
8488 base_address = gen_rtx_MEM (Pmode, r11);
8491 emit_move_insn (gen_rtx_REG (Pmode, regno),
8492 adjust_address (base_address, Pmode, offset));
8493 offset += UNITS_PER_WORD;
8497 /* Emit code to restore saved registers using MOV insns. First register
8498 is restored from POINTER + OFFSET. */
8500 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8501 int maybe_eh_return)
8504 rtx base_address = gen_rtx_MEM (TImode, pointer);
8507 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8508 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8510 /* Ensure that adjust_address won't be forced to produce pointer
8511 out of range allowed by x86-64 instruction set. */
8512 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8516 r11 = gen_rtx_REG (DImode, R11_REG);
8517 emit_move_insn (r11, GEN_INT (offset));
8518 emit_insn (gen_adddi3 (r11, r11, pointer));
8519 base_address = gen_rtx_MEM (TImode, r11);
8522 mem = adjust_address (base_address, TImode, offset);
8523 set_mem_align (mem, 128);
8524 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8529 /* Restore function stack, frame, and registers. */
8532 ix86_expand_epilogue (int style)
8536 struct ix86_frame frame;
8537 HOST_WIDE_INT offset;
8539 ix86_finalize_stack_realign_flags ();
8541 /* When stack is realigned, SP must be valid. */
8542 sp_valid = (!frame_pointer_needed
8543 || current_function_sp_is_unchanging
8544 || stack_realign_fp);
8546 ix86_compute_frame_layout (&frame);
8548 /* See the comment about red zone and frame
8549 pointer usage in ix86_expand_prologue. */
8550 if (frame_pointer_needed && frame.red_zone_size)
8551 emit_insn (gen_memory_blockage ());
8553 /* Calculate start of saved registers relative to ebp. Special care
8554 must be taken for the normal return case of a function using
8555 eh_return: the eax and edx registers are marked as saved, but not
8556 restored along this path. */
8557 offset = frame.nregs;
8558 if (crtl->calls_eh_return && style != 2)
8560 offset *= -UNITS_PER_WORD;
8561 offset -= frame.nsseregs * 16 + frame.padding0;
8563 /* If we're only restoring one register and sp is not valid then
8564 using a move instruction to restore the register since it's
8565 less work than reloading sp and popping the register.
8567 The default code result in stack adjustment using add/lea instruction,
8568 while this code results in LEAVE instruction (or discrete equivalent),
8569 so it is profitable in some other cases as well. Especially when there
8570 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8571 and there is exactly one register to pop. This heuristic may need some
8572 tuning in future. */
8573 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8574 || (TARGET_EPILOGUE_USING_MOVE
8575 && cfun->machine->use_fast_prologue_epilogue
8576 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8577 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8578 || (frame_pointer_needed && TARGET_USE_LEAVE
8579 && cfun->machine->use_fast_prologue_epilogue
8580 && (frame.nregs + frame.nsseregs) == 1)
8581 || crtl->calls_eh_return)
8583 /* Restore registers. We can use ebp or esp to address the memory
8584 locations. If both are available, default to ebp, since offsets
8585 are known to be small. Only exception is esp pointing directly
8586 to the end of block of saved registers, where we may simplify
8589 If we are realigning stack with bp and sp, regs restore can't
8590 be addressed by bp. sp must be used instead. */
8592 if (!frame_pointer_needed
8593 || (sp_valid && !frame.to_allocate)
8594 || stack_realign_fp)
8596 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8597 frame.to_allocate, style == 2);
8598 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8600 + frame.nsseregs * 16
8601 + frame.padding0, style == 2);
8605 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8606 offset, style == 2);
8607 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8609 + frame.nsseregs * 16
8610 + frame.padding0, style == 2);
8613 /* eh_return epilogues need %ecx added to the stack pointer. */
8616 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8618 /* Stack align doesn't work with eh_return. */
8619 gcc_assert (!crtl->stack_realign_needed);
8621 if (frame_pointer_needed)
8623 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8624 tmp = plus_constant (tmp, UNITS_PER_WORD);
8625 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8627 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8628 emit_move_insn (hard_frame_pointer_rtx, tmp);
8630 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8635 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8636 tmp = plus_constant (tmp, (frame.to_allocate
8637 + frame.nregs * UNITS_PER_WORD
8638 + frame.nsseregs * 16
8640 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8643 else if (!frame_pointer_needed)
8644 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8645 GEN_INT (frame.to_allocate
8646 + frame.nregs * UNITS_PER_WORD
8647 + frame.nsseregs * 16
8650 /* If not an i386, mov & pop is faster than "leave". */
8651 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8652 || !cfun->machine->use_fast_prologue_epilogue)
8653 emit_insn ((*ix86_gen_leave) ());
8656 pro_epilogue_adjust_stack (stack_pointer_rtx,
8657 hard_frame_pointer_rtx,
8660 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8665 /* First step is to deallocate the stack frame so that we can
8668 If we realign stack with frame pointer, then stack pointer
8669 won't be able to recover via lea $offset(%bp), %sp, because
8670 there is a padding area between bp and sp for realign.
8671 "add $to_allocate, %sp" must be used instead. */
8674 gcc_assert (frame_pointer_needed);
8675 gcc_assert (!stack_realign_fp);
8676 pro_epilogue_adjust_stack (stack_pointer_rtx,
8677 hard_frame_pointer_rtx,
8678 GEN_INT (offset), style);
8679 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8680 frame.to_allocate, style == 2);
8681 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8682 GEN_INT (frame.nsseregs * 16), style);
8684 else if (frame.to_allocate || frame.nsseregs)
8686 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8689 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8690 GEN_INT (frame.to_allocate
8691 + frame.nsseregs * 16
8692 + frame.padding0), style);
8695 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8696 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8697 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8698 if (frame_pointer_needed)
8700 /* Leave results in shorter dependency chains on CPUs that are
8701 able to grok it fast. */
8702 if (TARGET_USE_LEAVE)
8703 emit_insn ((*ix86_gen_leave) ());
8706 /* For stack realigned really happens, recover stack
8707 pointer to hard frame pointer is a must, if not using
8709 if (stack_realign_fp)
8710 pro_epilogue_adjust_stack (stack_pointer_rtx,
8711 hard_frame_pointer_rtx,
8713 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8718 if (crtl->drap_reg && crtl->stack_realign_needed)
8720 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8721 ? 0 : UNITS_PER_WORD);
8722 gcc_assert (stack_realign_drap);
8723 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8725 GEN_INT (-(UNITS_PER_WORD
8726 + param_ptr_offset))));
8727 if (!call_used_regs[REGNO (crtl->drap_reg)])
8728 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8732 /* Sibcall epilogues don't want a return instruction. */
8736 if (crtl->args.pops_args && crtl->args.size)
8738 rtx popc = GEN_INT (crtl->args.pops_args);
8740 /* i386 can only pop 64K bytes. If asked to pop more, pop
8741 return address, do explicit add, and jump indirectly to the
8744 if (crtl->args.pops_args >= 65536)
8746 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8748 /* There is no "pascal" calling convention in any 64bit ABI. */
8749 gcc_assert (!TARGET_64BIT);
8751 emit_insn (gen_popsi1 (ecx));
8752 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8753 emit_jump_insn (gen_return_indirect_internal (ecx));
8756 emit_jump_insn (gen_return_pop_internal (popc));
8759 emit_jump_insn (gen_return_internal ());
8762 /* Reset from the function's potential modifications. */
8765 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8766 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8768 if (pic_offset_table_rtx)
8769 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8771 /* Mach-O doesn't support labels at the end of objects, so if
8772 it looks like we might want one, insert a NOP. */
8774 rtx insn = get_last_insn ();
8777 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8778 insn = PREV_INSN (insn);
8782 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8783 fputs ("\tnop\n", file);
8789 /* Extract the parts of an RTL expression that is a valid memory address
8790 for an instruction. Return 0 if the structure of the address is
8791 grossly off. Return -1 if the address contains ASHIFT, so it is not
8792 strictly valid, but still used for computing length of lea instruction. */
8795 ix86_decompose_address (rtx addr, struct ix86_address *out)
8797 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8798 rtx base_reg, index_reg;
8799 HOST_WIDE_INT scale = 1;
8800 rtx scale_rtx = NULL_RTX;
8802 enum ix86_address_seg seg = SEG_DEFAULT;
8804 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8806 else if (GET_CODE (addr) == PLUS)
8816 addends[n++] = XEXP (op, 1);
8819 while (GET_CODE (op) == PLUS);
8824 for (i = n; i >= 0; --i)
8827 switch (GET_CODE (op))
8832 index = XEXP (op, 0);
8833 scale_rtx = XEXP (op, 1);
8837 if (XINT (op, 1) == UNSPEC_TP
8838 && TARGET_TLS_DIRECT_SEG_REFS
8839 && seg == SEG_DEFAULT)
8840 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8869 else if (GET_CODE (addr) == MULT)
8871 index = XEXP (addr, 0); /* index*scale */
8872 scale_rtx = XEXP (addr, 1);
8874 else if (GET_CODE (addr) == ASHIFT)
8878 /* We're called for lea too, which implements ashift on occasion. */
8879 index = XEXP (addr, 0);
8880 tmp = XEXP (addr, 1);
8881 if (!CONST_INT_P (tmp))
8883 scale = INTVAL (tmp);
8884 if ((unsigned HOST_WIDE_INT) scale > 3)
8890 disp = addr; /* displacement */
8892 /* Extract the integral value of scale. */
8895 if (!CONST_INT_P (scale_rtx))
8897 scale = INTVAL (scale_rtx);
8900 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8901 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8903 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8904 if (base_reg && index_reg && scale == 1
8905 && (index_reg == arg_pointer_rtx
8906 || index_reg == frame_pointer_rtx
8907 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8910 tmp = base, base = index, index = tmp;
8911 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8914 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8915 if ((base_reg == hard_frame_pointer_rtx
8916 || base_reg == frame_pointer_rtx
8917 || base_reg == arg_pointer_rtx) && !disp)
8920 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8921 Avoid this by transforming to [%esi+0].
8922 Reload calls address legitimization without cfun defined, so we need
8923 to test cfun for being non-NULL. */
8924 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8925 && base_reg && !index_reg && !disp
8927 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8930 /* Special case: encode reg+reg instead of reg*2. */
8931 if (!base && index && scale && scale == 2)
8932 base = index, base_reg = index_reg, scale = 1;
8934 /* Special case: scaling cannot be encoded without base or displacement. */
8935 if (!base && !disp && index && scale != 1)
8947 /* Return cost of the memory address x.
8948 For i386, it is better to use a complex address than let gcc copy
8949 the address into a reg and make a new pseudo. But not if the address
8950 requires to two regs - that would mean more pseudos with longer
8953 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8955 struct ix86_address parts;
8957 int ok = ix86_decompose_address (x, &parts);
8961 if (parts.base && GET_CODE (parts.base) == SUBREG)
8962 parts.base = SUBREG_REG (parts.base);
8963 if (parts.index && GET_CODE (parts.index) == SUBREG)
8964 parts.index = SUBREG_REG (parts.index);
8966 /* Attempt to minimize number of registers in the address. */
8968 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8970 && (!REG_P (parts.index)
8971 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8975 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8977 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8978 && parts.base != parts.index)
8981 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8982 since it's predecode logic can't detect the length of instructions
8983 and it degenerates to vector decoded. Increase cost of such
8984 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8985 to split such addresses or even refuse such addresses at all.
8987 Following addressing modes are affected:
8992 The first and last case may be avoidable by explicitly coding the zero in
8993 memory address, but I don't have AMD-K6 machine handy to check this
8997 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8998 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8999 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9005 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9006 this is used for to form addresses to local data when -fPIC is in
9010 darwin_local_data_pic (rtx disp)
9012 return (GET_CODE (disp) == UNSPEC
9013 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9016 /* Determine if a given RTX is a valid constant. We already know this
9017 satisfies CONSTANT_P. */
9020 legitimate_constant_p (rtx x)
9022 switch (GET_CODE (x))
9027 if (GET_CODE (x) == PLUS)
9029 if (!CONST_INT_P (XEXP (x, 1)))
9034 if (TARGET_MACHO && darwin_local_data_pic (x))
9037 /* Only some unspecs are valid as "constants". */
9038 if (GET_CODE (x) == UNSPEC)
9039 switch (XINT (x, 1))
9044 return TARGET_64BIT;
9047 x = XVECEXP (x, 0, 0);
9048 return (GET_CODE (x) == SYMBOL_REF
9049 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9051 x = XVECEXP (x, 0, 0);
9052 return (GET_CODE (x) == SYMBOL_REF
9053 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9058 /* We must have drilled down to a symbol. */
9059 if (GET_CODE (x) == LABEL_REF)
9061 if (GET_CODE (x) != SYMBOL_REF)
9066 /* TLS symbols are never valid. */
9067 if (SYMBOL_REF_TLS_MODEL (x))
9070 /* DLLIMPORT symbols are never valid. */
9071 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9072 && SYMBOL_REF_DLLIMPORT_P (x))
9077 if (GET_MODE (x) == TImode
9078 && x != CONST0_RTX (TImode)
9084 if (!standard_sse_constant_p (x))
9091 /* Otherwise we handle everything else in the move patterns. */
9095 /* Determine if it's legal to put X into the constant pool. This
9096 is not possible for the address of thread-local symbols, which
9097 is checked above. */
9100 ix86_cannot_force_const_mem (rtx x)
9102 /* We can always put integral constants and vectors in memory. */
9103 switch (GET_CODE (x))
9113 return !legitimate_constant_p (x);
9117 /* Nonzero if the constant value X is a legitimate general operand
9118 when generating PIC code. It is given that flag_pic is on and
9119 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9122 legitimate_pic_operand_p (rtx x)
9126 switch (GET_CODE (x))
9129 inner = XEXP (x, 0);
9130 if (GET_CODE (inner) == PLUS
9131 && CONST_INT_P (XEXP (inner, 1)))
9132 inner = XEXP (inner, 0);
9134 /* Only some unspecs are valid as "constants". */
9135 if (GET_CODE (inner) == UNSPEC)
9136 switch (XINT (inner, 1))
9141 return TARGET_64BIT;
9143 x = XVECEXP (inner, 0, 0);
9144 return (GET_CODE (x) == SYMBOL_REF
9145 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9146 case UNSPEC_MACHOPIC_OFFSET:
9147 return legitimate_pic_address_disp_p (x);
9155 return legitimate_pic_address_disp_p (x);
9162 /* Determine if a given CONST RTX is a valid memory displacement
9166 legitimate_pic_address_disp_p (rtx disp)
9170 /* In 64bit mode we can allow direct addresses of symbols and labels
9171 when they are not dynamic symbols. */
9174 rtx op0 = disp, op1;
9176 switch (GET_CODE (disp))
9182 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9184 op0 = XEXP (XEXP (disp, 0), 0);
9185 op1 = XEXP (XEXP (disp, 0), 1);
9186 if (!CONST_INT_P (op1)
9187 || INTVAL (op1) >= 16*1024*1024
9188 || INTVAL (op1) < -16*1024*1024)
9190 if (GET_CODE (op0) == LABEL_REF)
9192 if (GET_CODE (op0) != SYMBOL_REF)
9197 /* TLS references should always be enclosed in UNSPEC. */
9198 if (SYMBOL_REF_TLS_MODEL (op0))
9200 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9201 && ix86_cmodel != CM_LARGE_PIC)
9209 if (GET_CODE (disp) != CONST)
9211 disp = XEXP (disp, 0);
9215 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9216 of GOT tables. We should not need these anyway. */
9217 if (GET_CODE (disp) != UNSPEC
9218 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9219 && XINT (disp, 1) != UNSPEC_GOTOFF
9220 && XINT (disp, 1) != UNSPEC_PLTOFF))
9223 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9224 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9230 if (GET_CODE (disp) == PLUS)
9232 if (!CONST_INT_P (XEXP (disp, 1)))
9234 disp = XEXP (disp, 0);
9238 if (TARGET_MACHO && darwin_local_data_pic (disp))
9241 if (GET_CODE (disp) != UNSPEC)
9244 switch (XINT (disp, 1))
9249 /* We need to check for both symbols and labels because VxWorks loads
9250 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9252 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9253 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9255 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9256 While ABI specify also 32bit relocation but we don't produce it in
9257 small PIC model at all. */
9258 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9259 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9261 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9263 case UNSPEC_GOTTPOFF:
9264 case UNSPEC_GOTNTPOFF:
9265 case UNSPEC_INDNTPOFF:
9268 disp = XVECEXP (disp, 0, 0);
9269 return (GET_CODE (disp) == SYMBOL_REF
9270 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9272 disp = XVECEXP (disp, 0, 0);
9273 return (GET_CODE (disp) == SYMBOL_REF
9274 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9276 disp = XVECEXP (disp, 0, 0);
9277 return (GET_CODE (disp) == SYMBOL_REF
9278 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9284 /* Recognizes RTL expressions that are valid memory addresses for an
9285 instruction. The MODE argument is the machine mode for the MEM
9286 expression that wants to use this address.
9288 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9289 convert common non-canonical forms to canonical form so that they will
9293 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9294 rtx addr, bool strict)
9296 struct ix86_address parts;
9297 rtx base, index, disp;
9298 HOST_WIDE_INT scale;
9299 const char *reason = NULL;
9300 rtx reason_rtx = NULL_RTX;
9302 if (ix86_decompose_address (addr, &parts) <= 0)
9304 reason = "decomposition failed";
9309 index = parts.index;
9311 scale = parts.scale;
9313 /* Validate base register.
9315 Don't allow SUBREG's that span more than a word here. It can lead to spill
9316 failures when the base is one word out of a two word structure, which is
9317 represented internally as a DImode int. */
9326 else if (GET_CODE (base) == SUBREG
9327 && REG_P (SUBREG_REG (base))
9328 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9330 reg = SUBREG_REG (base);
9333 reason = "base is not a register";
9337 if (GET_MODE (base) != Pmode)
9339 reason = "base is not in Pmode";
9343 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9344 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9346 reason = "base is not valid";
9351 /* Validate index register.
9353 Don't allow SUBREG's that span more than a word here -- same as above. */
9362 else if (GET_CODE (index) == SUBREG
9363 && REG_P (SUBREG_REG (index))
9364 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9366 reg = SUBREG_REG (index);
9369 reason = "index is not a register";
9373 if (GET_MODE (index) != Pmode)
9375 reason = "index is not in Pmode";
9379 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9380 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9382 reason = "index is not valid";
9387 /* Validate scale factor. */
9390 reason_rtx = GEN_INT (scale);
9393 reason = "scale without index";
9397 if (scale != 2 && scale != 4 && scale != 8)
9399 reason = "scale is not a valid multiplier";
9404 /* Validate displacement. */
9409 if (GET_CODE (disp) == CONST
9410 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9411 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9412 switch (XINT (XEXP (disp, 0), 1))
9414 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9415 used. While ABI specify also 32bit relocations, we don't produce
9416 them at all and use IP relative instead. */
9419 gcc_assert (flag_pic);
9421 goto is_legitimate_pic;
9422 reason = "64bit address unspec";
9425 case UNSPEC_GOTPCREL:
9426 gcc_assert (flag_pic);
9427 goto is_legitimate_pic;
9429 case UNSPEC_GOTTPOFF:
9430 case UNSPEC_GOTNTPOFF:
9431 case UNSPEC_INDNTPOFF:
9437 reason = "invalid address unspec";
9441 else if (SYMBOLIC_CONST (disp)
9445 && MACHOPIC_INDIRECT
9446 && !machopic_operand_p (disp)
9452 if (TARGET_64BIT && (index || base))
9454 /* foo@dtpoff(%rX) is ok. */
9455 if (GET_CODE (disp) != CONST
9456 || GET_CODE (XEXP (disp, 0)) != PLUS
9457 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9458 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9459 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9460 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9462 reason = "non-constant pic memory reference";
9466 else if (! legitimate_pic_address_disp_p (disp))
9468 reason = "displacement is an invalid pic construct";
9472 /* This code used to verify that a symbolic pic displacement
9473 includes the pic_offset_table_rtx register.
9475 While this is good idea, unfortunately these constructs may
9476 be created by "adds using lea" optimization for incorrect
9485 This code is nonsensical, but results in addressing
9486 GOT table with pic_offset_table_rtx base. We can't
9487 just refuse it easily, since it gets matched by
9488 "addsi3" pattern, that later gets split to lea in the
9489 case output register differs from input. While this
9490 can be handled by separate addsi pattern for this case
9491 that never results in lea, this seems to be easier and
9492 correct fix for crash to disable this test. */
9494 else if (GET_CODE (disp) != LABEL_REF
9495 && !CONST_INT_P (disp)
9496 && (GET_CODE (disp) != CONST
9497 || !legitimate_constant_p (disp))
9498 && (GET_CODE (disp) != SYMBOL_REF
9499 || !legitimate_constant_p (disp)))
9501 reason = "displacement is not constant";
9504 else if (TARGET_64BIT
9505 && !x86_64_immediate_operand (disp, VOIDmode))
9507 reason = "displacement is out of range";
9512 /* Everything looks valid. */
9519 /* Determine if a given RTX is a valid constant address. */
9522 constant_address_p (rtx x)
9524 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9527 /* Return a unique alias set for the GOT. */
9529 static alias_set_type
9530 ix86_GOT_alias_set (void)
9532 static alias_set_type set = -1;
9534 set = new_alias_set ();
9538 /* Return a legitimate reference for ORIG (an address) using the
9539 register REG. If REG is 0, a new pseudo is generated.
9541 There are two types of references that must be handled:
9543 1. Global data references must load the address from the GOT, via
9544 the PIC reg. An insn is emitted to do this load, and the reg is
9547 2. Static data references, constant pool addresses, and code labels
9548 compute the address as an offset from the GOT, whose base is in
9549 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9550 differentiate them from global data objects. The returned
9551 address is the PIC reg + an unspec constant.
9553 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9554 reg also appears in the address. */
9557 legitimize_pic_address (rtx orig, rtx reg)
9564 if (TARGET_MACHO && !TARGET_64BIT)
9567 reg = gen_reg_rtx (Pmode);
9568 /* Use the generic Mach-O PIC machinery. */
9569 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9573 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9575 else if (TARGET_64BIT
9576 && ix86_cmodel != CM_SMALL_PIC
9577 && gotoff_operand (addr, Pmode))
9580 /* This symbol may be referenced via a displacement from the PIC
9581 base address (@GOTOFF). */
9583 if (reload_in_progress)
9584 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9585 if (GET_CODE (addr) == CONST)
9586 addr = XEXP (addr, 0);
9587 if (GET_CODE (addr) == PLUS)
9589 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9591 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9594 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9595 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9597 tmpreg = gen_reg_rtx (Pmode);
9600 emit_move_insn (tmpreg, new_rtx);
9604 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9605 tmpreg, 1, OPTAB_DIRECT);
9608 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9610 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9612 /* This symbol may be referenced via a displacement from the PIC
9613 base address (@GOTOFF). */
9615 if (reload_in_progress)
9616 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9617 if (GET_CODE (addr) == CONST)
9618 addr = XEXP (addr, 0);
9619 if (GET_CODE (addr) == PLUS)
9621 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9623 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9626 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9627 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9628 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9632 emit_move_insn (reg, new_rtx);
9636 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9637 /* We can't use @GOTOFF for text labels on VxWorks;
9638 see gotoff_operand. */
9639 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9641 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9643 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9644 return legitimize_dllimport_symbol (addr, true);
9645 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9646 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9647 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9649 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9650 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9654 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9656 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9657 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9658 new_rtx = gen_const_mem (Pmode, new_rtx);
9659 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9662 reg = gen_reg_rtx (Pmode);
9663 /* Use directly gen_movsi, otherwise the address is loaded
9664 into register for CSE. We don't want to CSE this addresses,
9665 instead we CSE addresses from the GOT table, so skip this. */
9666 emit_insn (gen_movsi (reg, new_rtx));
9671 /* This symbol must be referenced via a load from the
9672 Global Offset Table (@GOT). */
9674 if (reload_in_progress)
9675 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9676 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9677 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9679 new_rtx = force_reg (Pmode, new_rtx);
9680 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9681 new_rtx = gen_const_mem (Pmode, new_rtx);
9682 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9685 reg = gen_reg_rtx (Pmode);
9686 emit_move_insn (reg, new_rtx);
9692 if (CONST_INT_P (addr)
9693 && !x86_64_immediate_operand (addr, VOIDmode))
9697 emit_move_insn (reg, addr);
9701 new_rtx = force_reg (Pmode, addr);
9703 else if (GET_CODE (addr) == CONST)
9705 addr = XEXP (addr, 0);
9707 /* We must match stuff we generate before. Assume the only
9708 unspecs that can get here are ours. Not that we could do
9709 anything with them anyway.... */
9710 if (GET_CODE (addr) == UNSPEC
9711 || (GET_CODE (addr) == PLUS
9712 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9714 gcc_assert (GET_CODE (addr) == PLUS);
9716 if (GET_CODE (addr) == PLUS)
9718 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9720 /* Check first to see if this is a constant offset from a @GOTOFF
9721 symbol reference. */
9722 if (gotoff_operand (op0, Pmode)
9723 && CONST_INT_P (op1))
9727 if (reload_in_progress)
9728 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9729 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9731 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9732 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9733 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9737 emit_move_insn (reg, new_rtx);
9743 if (INTVAL (op1) < -16*1024*1024
9744 || INTVAL (op1) >= 16*1024*1024)
9746 if (!x86_64_immediate_operand (op1, Pmode))
9747 op1 = force_reg (Pmode, op1);
9748 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9754 base = legitimize_pic_address (XEXP (addr, 0), reg);
9755 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9756 base == reg ? NULL_RTX : reg);
9758 if (CONST_INT_P (new_rtx))
9759 new_rtx = plus_constant (base, INTVAL (new_rtx));
9762 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9764 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9765 new_rtx = XEXP (new_rtx, 1);
9767 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9775 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9778 get_thread_pointer (int to_reg)
9782 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9786 reg = gen_reg_rtx (Pmode);
9787 insn = gen_rtx_SET (VOIDmode, reg, tp);
9788 insn = emit_insn (insn);
9793 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
9794 false if we expect this to be used for a memory address and true if
9795 we expect to load the address into a register. */
9798 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9800 rtx dest, base, off, pic, tp;
9805 case TLS_MODEL_GLOBAL_DYNAMIC:
9806 dest = gen_reg_rtx (Pmode);
9807 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9809 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9811 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9814 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9815 insns = get_insns ();
9818 RTL_CONST_CALL_P (insns) = 1;
9819 emit_libcall_block (insns, dest, rax, x);
9821 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9822 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9824 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9826 if (TARGET_GNU2_TLS)
9828 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9830 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9834 case TLS_MODEL_LOCAL_DYNAMIC:
9835 base = gen_reg_rtx (Pmode);
9836 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9838 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9840 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9843 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9844 insns = get_insns ();
9847 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9848 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9849 RTL_CONST_CALL_P (insns) = 1;
9850 emit_libcall_block (insns, base, rax, note);
9852 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9853 emit_insn (gen_tls_local_dynamic_base_64 (base));
9855 emit_insn (gen_tls_local_dynamic_base_32 (base));
9857 if (TARGET_GNU2_TLS)
9859 rtx x = ix86_tls_module_base ();
9861 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9862 gen_rtx_MINUS (Pmode, x, tp));
9865 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9866 off = gen_rtx_CONST (Pmode, off);
9868 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9870 if (TARGET_GNU2_TLS)
9872 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9874 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9879 case TLS_MODEL_INITIAL_EXEC:
9883 type = UNSPEC_GOTNTPOFF;
9887 if (reload_in_progress)
9888 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9889 pic = pic_offset_table_rtx;
9890 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9892 else if (!TARGET_ANY_GNU_TLS)
9894 pic = gen_reg_rtx (Pmode);
9895 emit_insn (gen_set_got (pic));
9896 type = UNSPEC_GOTTPOFF;
9901 type = UNSPEC_INDNTPOFF;
9904 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9905 off = gen_rtx_CONST (Pmode, off);
9907 off = gen_rtx_PLUS (Pmode, pic, off);
9908 off = gen_const_mem (Pmode, off);
9909 set_mem_alias_set (off, ix86_GOT_alias_set ());
9911 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9913 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9914 off = force_reg (Pmode, off);
9915 return gen_rtx_PLUS (Pmode, base, off);
9919 base = get_thread_pointer (true);
9920 dest = gen_reg_rtx (Pmode);
9921 emit_insn (gen_subsi3 (dest, base, off));
9925 case TLS_MODEL_LOCAL_EXEC:
9926 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9927 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9928 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9929 off = gen_rtx_CONST (Pmode, off);
9931 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9933 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9934 return gen_rtx_PLUS (Pmode, base, off);
9938 base = get_thread_pointer (true);
9939 dest = gen_reg_rtx (Pmode);
9940 emit_insn (gen_subsi3 (dest, base, off));
9951 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9954 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9955 htab_t dllimport_map;
9958 get_dllimport_decl (tree decl)
9960 struct tree_map *h, in;
9964 size_t namelen, prefixlen;
9970 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9972 in.hash = htab_hash_pointer (decl);
9973 in.base.from = decl;
9974 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9975 h = (struct tree_map *) *loc;
9979 *loc = h = GGC_NEW (struct tree_map);
9981 h->base.from = decl;
9982 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9983 DECL_ARTIFICIAL (to) = 1;
9984 DECL_IGNORED_P (to) = 1;
9985 DECL_EXTERNAL (to) = 1;
9986 TREE_READONLY (to) = 1;
9988 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9989 name = targetm.strip_name_encoding (name);
9990 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9991 ? "*__imp_" : "*__imp__";
9992 namelen = strlen (name);
9993 prefixlen = strlen (prefix);
9994 imp_name = (char *) alloca (namelen + prefixlen + 1);
9995 memcpy (imp_name, prefix, prefixlen);
9996 memcpy (imp_name + prefixlen, name, namelen + 1);
9998 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9999 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10000 SET_SYMBOL_REF_DECL (rtl, to);
10001 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10003 rtl = gen_const_mem (Pmode, rtl);
10004 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10006 SET_DECL_RTL (to, rtl);
10007 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10012 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10013 true if we require the result be a register. */
10016 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10021 gcc_assert (SYMBOL_REF_DECL (symbol));
10022 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10024 x = DECL_RTL (imp_decl);
10026 x = force_reg (Pmode, x);
10030 /* Try machine-dependent ways of modifying an illegitimate address
10031 to be legitimate. If we find one, return the new, valid address.
10032 This macro is used in only one place: `memory_address' in explow.c.
10034 OLDX is the address as it was before break_out_memory_refs was called.
10035 In some cases it is useful to look at this to decide what needs to be done.
10037 It is always safe for this macro to do nothing. It exists to recognize
10038 opportunities to optimize the output.
10040 For the 80386, we handle X+REG by loading X into a register R and
10041 using R+REG. R will go in a general reg and indexing will be used.
10042 However, if REG is a broken-out memory address or multiplication,
10043 nothing needs to be done because REG can certainly go in a general reg.
10045 When -fpic is used, special handling is needed for symbolic references.
10046 See comments by legitimize_pic_address in i386.c for details. */
10049 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10050 enum machine_mode mode)
10055 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10057 return legitimize_tls_address (x, (enum tls_model) log, false);
10058 if (GET_CODE (x) == CONST
10059 && GET_CODE (XEXP (x, 0)) == PLUS
10060 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10061 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10063 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10064 (enum tls_model) log, false);
10065 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10068 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10070 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10071 return legitimize_dllimport_symbol (x, true);
10072 if (GET_CODE (x) == CONST
10073 && GET_CODE (XEXP (x, 0)) == PLUS
10074 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10075 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10077 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10078 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10082 if (flag_pic && SYMBOLIC_CONST (x))
10083 return legitimize_pic_address (x, 0);
10085 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10086 if (GET_CODE (x) == ASHIFT
10087 && CONST_INT_P (XEXP (x, 1))
10088 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10091 log = INTVAL (XEXP (x, 1));
10092 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10093 GEN_INT (1 << log));
10096 if (GET_CODE (x) == PLUS)
10098 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10100 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10101 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10102 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10105 log = INTVAL (XEXP (XEXP (x, 0), 1));
10106 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10107 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10108 GEN_INT (1 << log));
10111 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10112 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10113 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10116 log = INTVAL (XEXP (XEXP (x, 1), 1));
10117 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10118 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10119 GEN_INT (1 << log));
10122 /* Put multiply first if it isn't already. */
10123 if (GET_CODE (XEXP (x, 1)) == MULT)
10125 rtx tmp = XEXP (x, 0);
10126 XEXP (x, 0) = XEXP (x, 1);
10131 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10132 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10133 created by virtual register instantiation, register elimination, and
10134 similar optimizations. */
10135 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10138 x = gen_rtx_PLUS (Pmode,
10139 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10140 XEXP (XEXP (x, 1), 0)),
10141 XEXP (XEXP (x, 1), 1));
10145 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10146 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10147 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10148 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10149 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10150 && CONSTANT_P (XEXP (x, 1)))
10153 rtx other = NULL_RTX;
10155 if (CONST_INT_P (XEXP (x, 1)))
10157 constant = XEXP (x, 1);
10158 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10160 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10162 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10163 other = XEXP (x, 1);
10171 x = gen_rtx_PLUS (Pmode,
10172 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10173 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10174 plus_constant (other, INTVAL (constant)));
10178 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10181 if (GET_CODE (XEXP (x, 0)) == MULT)
10184 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10187 if (GET_CODE (XEXP (x, 1)) == MULT)
10190 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10194 && REG_P (XEXP (x, 1))
10195 && REG_P (XEXP (x, 0)))
10198 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10201 x = legitimize_pic_address (x, 0);
10204 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10207 if (REG_P (XEXP (x, 0)))
10209 rtx temp = gen_reg_rtx (Pmode);
10210 rtx val = force_operand (XEXP (x, 1), temp);
10212 emit_move_insn (temp, val);
10214 XEXP (x, 1) = temp;
10218 else if (REG_P (XEXP (x, 1)))
10220 rtx temp = gen_reg_rtx (Pmode);
10221 rtx val = force_operand (XEXP (x, 0), temp);
10223 emit_move_insn (temp, val);
10225 XEXP (x, 0) = temp;
10233 /* Print an integer constant expression in assembler syntax. Addition
10234 and subtraction are the only arithmetic that may appear in these
10235 expressions. FILE is the stdio stream to write to, X is the rtx, and
10236 CODE is the operand print code from the output string. */
10239 output_pic_addr_const (FILE *file, rtx x, int code)
10243 switch (GET_CODE (x))
10246 gcc_assert (flag_pic);
10251 if (! TARGET_MACHO || TARGET_64BIT)
10252 output_addr_const (file, x);
10255 const char *name = XSTR (x, 0);
10257 /* Mark the decl as referenced so that cgraph will
10258 output the function. */
10259 if (SYMBOL_REF_DECL (x))
10260 mark_decl_referenced (SYMBOL_REF_DECL (x));
10263 if (MACHOPIC_INDIRECT
10264 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10265 name = machopic_indirection_name (x, /*stub_p=*/true);
10267 assemble_name (file, name);
10269 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10270 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10271 fputs ("@PLT", file);
10278 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10279 assemble_name (asm_out_file, buf);
10283 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10287 /* This used to output parentheses around the expression,
10288 but that does not work on the 386 (either ATT or BSD assembler). */
10289 output_pic_addr_const (file, XEXP (x, 0), code);
10293 if (GET_MODE (x) == VOIDmode)
10295 /* We can use %d if the number is <32 bits and positive. */
10296 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10297 fprintf (file, "0x%lx%08lx",
10298 (unsigned long) CONST_DOUBLE_HIGH (x),
10299 (unsigned long) CONST_DOUBLE_LOW (x));
10301 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10304 /* We can't handle floating point constants;
10305 PRINT_OPERAND must handle them. */
10306 output_operand_lossage ("floating constant misused");
10310 /* Some assemblers need integer constants to appear first. */
10311 if (CONST_INT_P (XEXP (x, 0)))
10313 output_pic_addr_const (file, XEXP (x, 0), code);
10315 output_pic_addr_const (file, XEXP (x, 1), code);
10319 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10320 output_pic_addr_const (file, XEXP (x, 1), code);
10322 output_pic_addr_const (file, XEXP (x, 0), code);
10328 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10329 output_pic_addr_const (file, XEXP (x, 0), code);
10331 output_pic_addr_const (file, XEXP (x, 1), code);
10333 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10337 gcc_assert (XVECLEN (x, 0) == 1);
10338 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10339 switch (XINT (x, 1))
10342 fputs ("@GOT", file);
10344 case UNSPEC_GOTOFF:
10345 fputs ("@GOTOFF", file);
10347 case UNSPEC_PLTOFF:
10348 fputs ("@PLTOFF", file);
10350 case UNSPEC_GOTPCREL:
10351 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10352 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10354 case UNSPEC_GOTTPOFF:
10355 /* FIXME: This might be @TPOFF in Sun ld too. */
10356 fputs ("@GOTTPOFF", file);
10359 fputs ("@TPOFF", file);
10361 case UNSPEC_NTPOFF:
10363 fputs ("@TPOFF", file);
10365 fputs ("@NTPOFF", file);
10367 case UNSPEC_DTPOFF:
10368 fputs ("@DTPOFF", file);
10370 case UNSPEC_GOTNTPOFF:
10372 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10373 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10375 fputs ("@GOTNTPOFF", file);
10377 case UNSPEC_INDNTPOFF:
10378 fputs ("@INDNTPOFF", file);
10381 case UNSPEC_MACHOPIC_OFFSET:
10383 machopic_output_function_base_name (file);
10387 output_operand_lossage ("invalid UNSPEC as operand");
10393 output_operand_lossage ("invalid expression as operand");
10397 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10398 We need to emit DTP-relative relocations. */
10400 static void ATTRIBUTE_UNUSED
10401 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10403 fputs (ASM_LONG, file);
10404 output_addr_const (file, x);
10405 fputs ("@DTPOFF", file);
10411 fputs (", 0", file);
10414 gcc_unreachable ();
10418 /* Return true if X is a representation of the PIC register. This copes
10419 with calls from ix86_find_base_term, where the register might have
10420 been replaced by a cselib value. */
10423 ix86_pic_register_p (rtx x)
10425 if (GET_CODE (x) == VALUE)
10426 return (pic_offset_table_rtx
10427 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10429 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10432 /* In the name of slightly smaller debug output, and to cater to
10433 general assembler lossage, recognize PIC+GOTOFF and turn it back
10434 into a direct symbol reference.
10436 On Darwin, this is necessary to avoid a crash, because Darwin
10437 has a different PIC label for each routine but the DWARF debugging
10438 information is not associated with any particular routine, so it's
10439 necessary to remove references to the PIC label from RTL stored by
10440 the DWARF output code. */
10443 ix86_delegitimize_address (rtx orig_x)
10446 /* reg_addend is NULL or a multiple of some register. */
10447 rtx reg_addend = NULL_RTX;
10448 /* const_addend is NULL or a const_int. */
10449 rtx const_addend = NULL_RTX;
10450 /* This is the result, or NULL. */
10451 rtx result = NULL_RTX;
10458 if (GET_CODE (x) != CONST
10459 || GET_CODE (XEXP (x, 0)) != UNSPEC
10460 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10461 || !MEM_P (orig_x))
10463 return XVECEXP (XEXP (x, 0), 0, 0);
10466 if (GET_CODE (x) != PLUS
10467 || GET_CODE (XEXP (x, 1)) != CONST)
10470 if (ix86_pic_register_p (XEXP (x, 0)))
10471 /* %ebx + GOT/GOTOFF */
10473 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10475 /* %ebx + %reg * scale + GOT/GOTOFF */
10476 reg_addend = XEXP (x, 0);
10477 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10478 reg_addend = XEXP (reg_addend, 1);
10479 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10480 reg_addend = XEXP (reg_addend, 0);
10483 if (!REG_P (reg_addend)
10484 && GET_CODE (reg_addend) != MULT
10485 && GET_CODE (reg_addend) != ASHIFT)
10491 x = XEXP (XEXP (x, 1), 0);
10492 if (GET_CODE (x) == PLUS
10493 && CONST_INT_P (XEXP (x, 1)))
10495 const_addend = XEXP (x, 1);
10499 if (GET_CODE (x) == UNSPEC
10500 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10501 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10502 result = XVECEXP (x, 0, 0);
10504 if (TARGET_MACHO && darwin_local_data_pic (x)
10505 && !MEM_P (orig_x))
10506 result = XVECEXP (x, 0, 0);
10512 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10514 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10518 /* If X is a machine specific address (i.e. a symbol or label being
10519 referenced as a displacement from the GOT implemented using an
10520 UNSPEC), then return the base term. Otherwise return X. */
10523 ix86_find_base_term (rtx x)
10529 if (GET_CODE (x) != CONST)
10531 term = XEXP (x, 0);
10532 if (GET_CODE (term) == PLUS
10533 && (CONST_INT_P (XEXP (term, 1))
10534 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10535 term = XEXP (term, 0);
10536 if (GET_CODE (term) != UNSPEC
10537 || XINT (term, 1) != UNSPEC_GOTPCREL)
10540 return XVECEXP (term, 0, 0);
10543 return ix86_delegitimize_address (x);
10547 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10548 int fp, FILE *file)
10550 const char *suffix;
10552 if (mode == CCFPmode || mode == CCFPUmode)
10554 enum rtx_code second_code, bypass_code;
10555 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10556 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10557 code = ix86_fp_compare_code_to_integer (code);
10561 code = reverse_condition (code);
10612 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10616 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10617 Those same assemblers have the same but opposite lossage on cmov. */
10618 if (mode == CCmode)
10619 suffix = fp ? "nbe" : "a";
10620 else if (mode == CCCmode)
10623 gcc_unreachable ();
10639 gcc_unreachable ();
10643 gcc_assert (mode == CCmode || mode == CCCmode);
10660 gcc_unreachable ();
10664 /* ??? As above. */
10665 gcc_assert (mode == CCmode || mode == CCCmode);
10666 suffix = fp ? "nb" : "ae";
10669 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10673 /* ??? As above. */
10674 if (mode == CCmode)
10676 else if (mode == CCCmode)
10677 suffix = fp ? "nb" : "ae";
10679 gcc_unreachable ();
10682 suffix = fp ? "u" : "p";
10685 suffix = fp ? "nu" : "np";
10688 gcc_unreachable ();
10690 fputs (suffix, file);
10693 /* Print the name of register X to FILE based on its machine mode and number.
10694 If CODE is 'w', pretend the mode is HImode.
10695 If CODE is 'b', pretend the mode is QImode.
10696 If CODE is 'k', pretend the mode is SImode.
10697 If CODE is 'q', pretend the mode is DImode.
10698 If CODE is 'x', pretend the mode is V4SFmode.
10699 If CODE is 't', pretend the mode is V8SFmode.
10700 If CODE is 'h', pretend the reg is the 'high' byte register.
10701 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10702 If CODE is 'd', duplicate the operand for AVX instruction.
10706 print_reg (rtx x, int code, FILE *file)
10709 bool duplicated = code == 'd' && TARGET_AVX;
10711 gcc_assert (x == pc_rtx
10712 || (REGNO (x) != ARG_POINTER_REGNUM
10713 && REGNO (x) != FRAME_POINTER_REGNUM
10714 && REGNO (x) != FLAGS_REG
10715 && REGNO (x) != FPSR_REG
10716 && REGNO (x) != FPCR_REG));
10718 if (ASSEMBLER_DIALECT == ASM_ATT)
10723 gcc_assert (TARGET_64BIT);
10724 fputs ("rip", file);
10728 if (code == 'w' || MMX_REG_P (x))
10730 else if (code == 'b')
10732 else if (code == 'k')
10734 else if (code == 'q')
10736 else if (code == 'y')
10738 else if (code == 'h')
10740 else if (code == 'x')
10742 else if (code == 't')
10745 code = GET_MODE_SIZE (GET_MODE (x));
10747 /* Irritatingly, AMD extended registers use different naming convention
10748 from the normal registers. */
10749 if (REX_INT_REG_P (x))
10751 gcc_assert (TARGET_64BIT);
10755 error ("extended registers have no high halves");
10758 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10761 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10764 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10767 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10770 error ("unsupported operand size for extended register");
10780 if (STACK_TOP_P (x))
10789 if (! ANY_FP_REG_P (x))
10790 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10795 reg = hi_reg_name[REGNO (x)];
10798 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10800 reg = qi_reg_name[REGNO (x)];
10803 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10805 reg = qi_high_reg_name[REGNO (x)];
10810 gcc_assert (!duplicated);
10812 fputs (hi_reg_name[REGNO (x)] + 1, file);
10817 gcc_unreachable ();
10823 if (ASSEMBLER_DIALECT == ASM_ATT)
10824 fprintf (file, ", %%%s", reg);
10826 fprintf (file, ", %s", reg);
10830 /* Locate some local-dynamic symbol still in use by this function
10831 so that we can print its name in some tls_local_dynamic_base
10835 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10839 if (GET_CODE (x) == SYMBOL_REF
10840 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10842 cfun->machine->some_ld_name = XSTR (x, 0);
10849 static const char *
10850 get_some_local_dynamic_name (void)
10854 if (cfun->machine->some_ld_name)
10855 return cfun->machine->some_ld_name;
10857 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10859 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10860 return cfun->machine->some_ld_name;
10862 gcc_unreachable ();
10865 /* Meaning of CODE:
10866 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10867 C -- print opcode suffix for set/cmov insn.
10868 c -- like C, but print reversed condition
10869 E,e -- likewise, but for compare-and-branch fused insn.
10870 F,f -- likewise, but for floating-point.
10871 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10873 R -- print the prefix for register names.
10874 z -- print the opcode suffix for the size of the current operand.
10875 Z -- likewise, with special suffixes for x87 instructions.
10876 * -- print a star (in certain assembler syntax)
10877 A -- print an absolute memory reference.
10878 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10879 s -- print a shift double count, followed by the assemblers argument
10881 b -- print the QImode name of the register for the indicated operand.
10882 %b0 would print %al if operands[0] is reg 0.
10883 w -- likewise, print the HImode name of the register.
10884 k -- likewise, print the SImode name of the register.
10885 q -- likewise, print the DImode name of the register.
10886 x -- likewise, print the V4SFmode name of the register.
10887 t -- likewise, print the V8SFmode name of the register.
10888 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10889 y -- print "st(0)" instead of "st" as a register.
10890 d -- print duplicated register operand for AVX instruction.
10891 D -- print condition for SSE cmp instruction.
10892 P -- if PIC, print an @PLT suffix.
10893 X -- don't print any sort of PIC '@' suffix for a symbol.
10894 & -- print some in-use local-dynamic symbol name.
10895 H -- print a memory address offset by 8; used for sse high-parts
10896 Y -- print condition for SSE5 com* instruction.
10897 + -- print a branch hint as 'cs' or 'ds' prefix
10898 ; -- print a semicolon (after prefixes due to bug in older gas).
10902 print_operand (FILE *file, rtx x, int code)
10909 if (ASSEMBLER_DIALECT == ASM_ATT)
10914 assemble_name (file, get_some_local_dynamic_name ());
10918 switch (ASSEMBLER_DIALECT)
10925 /* Intel syntax. For absolute addresses, registers should not
10926 be surrounded by braces. */
10930 PRINT_OPERAND (file, x, 0);
10937 gcc_unreachable ();
10940 PRINT_OPERAND (file, x, 0);
10945 if (ASSEMBLER_DIALECT == ASM_ATT)
10950 if (ASSEMBLER_DIALECT == ASM_ATT)
10955 if (ASSEMBLER_DIALECT == ASM_ATT)
10960 if (ASSEMBLER_DIALECT == ASM_ATT)
10965 if (ASSEMBLER_DIALECT == ASM_ATT)
10970 if (ASSEMBLER_DIALECT == ASM_ATT)
10975 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10977 /* Opcodes don't get size suffixes if using Intel opcodes. */
10978 if (ASSEMBLER_DIALECT == ASM_INTEL)
10981 switch (GET_MODE_SIZE (GET_MODE (x)))
11000 output_operand_lossage
11001 ("invalid operand size for operand code '%c'", code);
11006 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11008 (0, "non-integer operand used with operand code '%c'", code);
11012 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11013 if (ASSEMBLER_DIALECT == ASM_INTEL)
11016 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11018 switch (GET_MODE_SIZE (GET_MODE (x)))
11021 #ifdef HAVE_AS_IX86_FILDS
11031 #ifdef HAVE_AS_IX86_FILDQ
11034 fputs ("ll", file);
11042 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11044 /* 387 opcodes don't get size suffixes
11045 if the operands are registers. */
11046 if (STACK_REG_P (x))
11049 switch (GET_MODE_SIZE (GET_MODE (x)))
11070 output_operand_lossage
11071 ("invalid operand type used with operand code '%c'", code);
11075 output_operand_lossage
11076 ("invalid operand size for operand code '%c'", code);
11093 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11095 PRINT_OPERAND (file, x, 0);
11096 fputs (", ", file);
11101 /* Little bit of braindamage here. The SSE compare instructions
11102 does use completely different names for the comparisons that the
11103 fp conditional moves. */
11106 switch (GET_CODE (x))
11109 fputs ("eq", file);
11112 fputs ("eq_us", file);
11115 fputs ("lt", file);
11118 fputs ("nge", file);
11121 fputs ("le", file);
11124 fputs ("ngt", file);
11127 fputs ("unord", file);
11130 fputs ("neq", file);
11133 fputs ("neq_oq", file);
11136 fputs ("ge", file);
11139 fputs ("nlt", file);
11142 fputs ("gt", file);
11145 fputs ("nle", file);
11148 fputs ("ord", file);
11151 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11157 switch (GET_CODE (x))
11161 fputs ("eq", file);
11165 fputs ("lt", file);
11169 fputs ("le", file);
11172 fputs ("unord", file);
11176 fputs ("neq", file);
11180 fputs ("nlt", file);
11184 fputs ("nle", file);
11187 fputs ("ord", file);
11190 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11196 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11197 if (ASSEMBLER_DIALECT == ASM_ATT)
11199 switch (GET_MODE (x))
11201 case HImode: putc ('w', file); break;
11203 case SFmode: putc ('l', file); break;
11205 case DFmode: putc ('q', file); break;
11206 default: gcc_unreachable ();
11213 if (!COMPARISON_P (x))
11215 output_operand_lossage ("operand is neither a constant nor a "
11216 "condition code, invalid operand code "
11220 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11223 if (!COMPARISON_P (x))
11225 output_operand_lossage ("operand is neither a constant nor a "
11226 "condition code, invalid operand code "
11230 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11231 if (ASSEMBLER_DIALECT == ASM_ATT)
11234 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11237 /* Like above, but reverse condition */
11239 /* Check to see if argument to %c is really a constant
11240 and not a condition code which needs to be reversed. */
11241 if (!COMPARISON_P (x))
11243 output_operand_lossage ("operand is neither a constant nor a "
11244 "condition code, invalid operand "
11248 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11251 if (!COMPARISON_P (x))
11253 output_operand_lossage ("operand is neither a constant nor a "
11254 "condition code, invalid operand "
11258 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11259 if (ASSEMBLER_DIALECT == ASM_ATT)
11262 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11266 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11270 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11274 /* It doesn't actually matter what mode we use here, as we're
11275 only going to use this for printing. */
11276 x = adjust_address_nv (x, DImode, 8);
11284 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11287 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11290 int pred_val = INTVAL (XEXP (x, 0));
11292 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11293 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11295 int taken = pred_val > REG_BR_PROB_BASE / 2;
11296 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11298 /* Emit hints only in the case default branch prediction
11299 heuristics would fail. */
11300 if (taken != cputaken)
11302 /* We use 3e (DS) prefix for taken branches and
11303 2e (CS) prefix for not taken branches. */
11305 fputs ("ds ; ", file);
11307 fputs ("cs ; ", file);
11315 switch (GET_CODE (x))
11318 fputs ("neq", file);
11321 fputs ("eq", file);
11325 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11329 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11333 fputs ("le", file);
11337 fputs ("lt", file);
11340 fputs ("unord", file);
11343 fputs ("ord", file);
11346 fputs ("ueq", file);
11349 fputs ("nlt", file);
11352 fputs ("nle", file);
11355 fputs ("ule", file);
11358 fputs ("ult", file);
11361 fputs ("une", file);
11364 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11371 fputs (" ; ", file);
11378 output_operand_lossage ("invalid operand code '%c'", code);
11383 print_reg (x, code, file);
11385 else if (MEM_P (x))
11387 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11388 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11389 && GET_MODE (x) != BLKmode)
11392 switch (GET_MODE_SIZE (GET_MODE (x)))
11394 case 1: size = "BYTE"; break;
11395 case 2: size = "WORD"; break;
11396 case 4: size = "DWORD"; break;
11397 case 8: size = "QWORD"; break;
11398 case 12: size = "XWORD"; break;
11400 if (GET_MODE (x) == XFmode)
11406 gcc_unreachable ();
11409 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11412 else if (code == 'w')
11414 else if (code == 'k')
11417 fputs (size, file);
11418 fputs (" PTR ", file);
11422 /* Avoid (%rip) for call operands. */
11423 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11424 && !CONST_INT_P (x))
11425 output_addr_const (file, x);
11426 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11427 output_operand_lossage ("invalid constraints for operand");
11429 output_address (x);
11432 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11437 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11438 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11440 if (ASSEMBLER_DIALECT == ASM_ATT)
11442 fprintf (file, "0x%08lx", (long unsigned int) l);
11445 /* These float cases don't actually occur as immediate operands. */
11446 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11450 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11451 fprintf (file, "%s", dstr);
11454 else if (GET_CODE (x) == CONST_DOUBLE
11455 && GET_MODE (x) == XFmode)
11459 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11460 fprintf (file, "%s", dstr);
11465 /* We have patterns that allow zero sets of memory, for instance.
11466 In 64-bit mode, we should probably support all 8-byte vectors,
11467 since we can in fact encode that into an immediate. */
11468 if (GET_CODE (x) == CONST_VECTOR)
11470 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11476 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11478 if (ASSEMBLER_DIALECT == ASM_ATT)
11481 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11482 || GET_CODE (x) == LABEL_REF)
11484 if (ASSEMBLER_DIALECT == ASM_ATT)
11487 fputs ("OFFSET FLAT:", file);
11490 if (CONST_INT_P (x))
11491 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11493 output_pic_addr_const (file, x, code);
11495 output_addr_const (file, x);
11499 /* Print a memory operand whose address is ADDR. */
11502 print_operand_address (FILE *file, rtx addr)
11504 struct ix86_address parts;
11505 rtx base, index, disp;
11507 int ok = ix86_decompose_address (addr, &parts);
11512 index = parts.index;
11514 scale = parts.scale;
11522 if (ASSEMBLER_DIALECT == ASM_ATT)
11524 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11527 gcc_unreachable ();
11530 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11531 if (TARGET_64BIT && !base && !index)
11535 if (GET_CODE (disp) == CONST
11536 && GET_CODE (XEXP (disp, 0)) == PLUS
11537 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11538 symbol = XEXP (XEXP (disp, 0), 0);
11540 if (GET_CODE (symbol) == LABEL_REF
11541 || (GET_CODE (symbol) == SYMBOL_REF
11542 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11545 if (!base && !index)
11547 /* Displacement only requires special attention. */
11549 if (CONST_INT_P (disp))
11551 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11552 fputs ("ds:", file);
11553 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11556 output_pic_addr_const (file, disp, 0);
11558 output_addr_const (file, disp);
11562 if (ASSEMBLER_DIALECT == ASM_ATT)
11567 output_pic_addr_const (file, disp, 0);
11568 else if (GET_CODE (disp) == LABEL_REF)
11569 output_asm_label (disp);
11571 output_addr_const (file, disp);
11576 print_reg (base, 0, file);
11580 print_reg (index, 0, file);
11582 fprintf (file, ",%d", scale);
11588 rtx offset = NULL_RTX;
11592 /* Pull out the offset of a symbol; print any symbol itself. */
11593 if (GET_CODE (disp) == CONST
11594 && GET_CODE (XEXP (disp, 0)) == PLUS
11595 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11597 offset = XEXP (XEXP (disp, 0), 1);
11598 disp = gen_rtx_CONST (VOIDmode,
11599 XEXP (XEXP (disp, 0), 0));
11603 output_pic_addr_const (file, disp, 0);
11604 else if (GET_CODE (disp) == LABEL_REF)
11605 output_asm_label (disp);
11606 else if (CONST_INT_P (disp))
11609 output_addr_const (file, disp);
11615 print_reg (base, 0, file);
11618 if (INTVAL (offset) >= 0)
11620 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11624 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11631 print_reg (index, 0, file);
11633 fprintf (file, "*%d", scale);
11641 output_addr_const_extra (FILE *file, rtx x)
11645 if (GET_CODE (x) != UNSPEC)
11648 op = XVECEXP (x, 0, 0);
11649 switch (XINT (x, 1))
11651 case UNSPEC_GOTTPOFF:
11652 output_addr_const (file, op);
11653 /* FIXME: This might be @TPOFF in Sun ld. */
11654 fputs ("@GOTTPOFF", file);
11657 output_addr_const (file, op);
11658 fputs ("@TPOFF", file);
11660 case UNSPEC_NTPOFF:
11661 output_addr_const (file, op);
11663 fputs ("@TPOFF", file);
11665 fputs ("@NTPOFF", file);
11667 case UNSPEC_DTPOFF:
11668 output_addr_const (file, op);
11669 fputs ("@DTPOFF", file);
11671 case UNSPEC_GOTNTPOFF:
11672 output_addr_const (file, op);
11674 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11675 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11677 fputs ("@GOTNTPOFF", file);
11679 case UNSPEC_INDNTPOFF:
11680 output_addr_const (file, op);
11681 fputs ("@INDNTPOFF", file);
11684 case UNSPEC_MACHOPIC_OFFSET:
11685 output_addr_const (file, op);
11687 machopic_output_function_base_name (file);
11698 /* Split one or more DImode RTL references into pairs of SImode
11699 references. The RTL can be REG, offsettable MEM, integer constant, or
11700 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11701 split and "num" is its length. lo_half and hi_half are output arrays
11702 that parallel "operands". */
11705 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11709 rtx op = operands[num];
11711 /* simplify_subreg refuse to split volatile memory addresses,
11712 but we still have to handle it. */
11715 lo_half[num] = adjust_address (op, SImode, 0);
11716 hi_half[num] = adjust_address (op, SImode, 4);
11720 lo_half[num] = simplify_gen_subreg (SImode, op,
11721 GET_MODE (op) == VOIDmode
11722 ? DImode : GET_MODE (op), 0);
11723 hi_half[num] = simplify_gen_subreg (SImode, op,
11724 GET_MODE (op) == VOIDmode
11725 ? DImode : GET_MODE (op), 4);
11729 /* Split one or more TImode RTL references into pairs of DImode
11730 references. The RTL can be REG, offsettable MEM, integer constant, or
11731 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11732 split and "num" is its length. lo_half and hi_half are output arrays
11733 that parallel "operands". */
11736 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11740 rtx op = operands[num];
11742 /* simplify_subreg refuse to split volatile memory addresses, but we
11743 still have to handle it. */
11746 lo_half[num] = adjust_address (op, DImode, 0);
11747 hi_half[num] = adjust_address (op, DImode, 8);
11751 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11752 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11757 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11758 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11759 is the expression of the binary operation. The output may either be
11760 emitted here, or returned to the caller, like all output_* functions.
11762 There is no guarantee that the operands are the same mode, as they
11763 might be within FLOAT or FLOAT_EXTEND expressions. */
11765 #ifndef SYSV386_COMPAT
11766 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11767 wants to fix the assemblers because that causes incompatibility
11768 with gcc. No-one wants to fix gcc because that causes
11769 incompatibility with assemblers... You can use the option of
11770 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11771 #define SYSV386_COMPAT 1
11775 output_387_binary_op (rtx insn, rtx *operands)
11777 static char buf[40];
11780 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11782 #ifdef ENABLE_CHECKING
11783 /* Even if we do not want to check the inputs, this documents input
11784 constraints. Which helps in understanding the following code. */
11785 if (STACK_REG_P (operands[0])
11786 && ((REG_P (operands[1])
11787 && REGNO (operands[0]) == REGNO (operands[1])
11788 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11789 || (REG_P (operands[2])
11790 && REGNO (operands[0]) == REGNO (operands[2])
11791 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11792 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11795 gcc_assert (is_sse);
11798 switch (GET_CODE (operands[3]))
11801 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11802 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11810 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11811 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11819 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11820 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11828 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11829 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11837 gcc_unreachable ();
11844 strcpy (buf, ssep);
11845 if (GET_MODE (operands[0]) == SFmode)
11846 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11848 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11852 strcpy (buf, ssep + 1);
11853 if (GET_MODE (operands[0]) == SFmode)
11854 strcat (buf, "ss\t{%2, %0|%0, %2}");
11856 strcat (buf, "sd\t{%2, %0|%0, %2}");
11862 switch (GET_CODE (operands[3]))
11866 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11868 rtx temp = operands[2];
11869 operands[2] = operands[1];
11870 operands[1] = temp;
11873 /* know operands[0] == operands[1]. */
11875 if (MEM_P (operands[2]))
11881 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11883 if (STACK_TOP_P (operands[0]))
11884 /* How is it that we are storing to a dead operand[2]?
11885 Well, presumably operands[1] is dead too. We can't
11886 store the result to st(0) as st(0) gets popped on this
11887 instruction. Instead store to operands[2] (which I
11888 think has to be st(1)). st(1) will be popped later.
11889 gcc <= 2.8.1 didn't have this check and generated
11890 assembly code that the Unixware assembler rejected. */
11891 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11893 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11897 if (STACK_TOP_P (operands[0]))
11898 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11900 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11905 if (MEM_P (operands[1]))
11911 if (MEM_P (operands[2]))
11917 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11920 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11921 derived assemblers, confusingly reverse the direction of
11922 the operation for fsub{r} and fdiv{r} when the
11923 destination register is not st(0). The Intel assembler
11924 doesn't have this brain damage. Read !SYSV386_COMPAT to
11925 figure out what the hardware really does. */
11926 if (STACK_TOP_P (operands[0]))
11927 p = "{p\t%0, %2|rp\t%2, %0}";
11929 p = "{rp\t%2, %0|p\t%0, %2}";
11931 if (STACK_TOP_P (operands[0]))
11932 /* As above for fmul/fadd, we can't store to st(0). */
11933 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11935 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11940 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11943 if (STACK_TOP_P (operands[0]))
11944 p = "{rp\t%0, %1|p\t%1, %0}";
11946 p = "{p\t%1, %0|rp\t%0, %1}";
11948 if (STACK_TOP_P (operands[0]))
11949 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11951 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11956 if (STACK_TOP_P (operands[0]))
11958 if (STACK_TOP_P (operands[1]))
11959 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11961 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11964 else if (STACK_TOP_P (operands[1]))
11967 p = "{\t%1, %0|r\t%0, %1}";
11969 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11975 p = "{r\t%2, %0|\t%0, %2}";
11977 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11983 gcc_unreachable ();
11990 /* Return needed mode for entity in optimize_mode_switching pass. */
11993 ix86_mode_needed (int entity, rtx insn)
11995 enum attr_i387_cw mode;
11997 /* The mode UNINITIALIZED is used to store control word after a
11998 function call or ASM pattern. The mode ANY specify that function
11999 has no requirements on the control word and make no changes in the
12000 bits we are interested in. */
12003 || (NONJUMP_INSN_P (insn)
12004 && (asm_noperands (PATTERN (insn)) >= 0
12005 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12006 return I387_CW_UNINITIALIZED;
12008 if (recog_memoized (insn) < 0)
12009 return I387_CW_ANY;
12011 mode = get_attr_i387_cw (insn);
12016 if (mode == I387_CW_TRUNC)
12021 if (mode == I387_CW_FLOOR)
12026 if (mode == I387_CW_CEIL)
12031 if (mode == I387_CW_MASK_PM)
12036 gcc_unreachable ();
12039 return I387_CW_ANY;
12042 /* Output code to initialize control word copies used by trunc?f?i and
12043 rounding patterns. CURRENT_MODE is set to current control word,
12044 while NEW_MODE is set to new control word. */
12047 emit_i387_cw_initialization (int mode)
12049 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12052 enum ix86_stack_slot slot;
12054 rtx reg = gen_reg_rtx (HImode);
12056 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12057 emit_move_insn (reg, copy_rtx (stored_mode));
12059 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12060 || optimize_function_for_size_p (cfun))
12064 case I387_CW_TRUNC:
12065 /* round toward zero (truncate) */
12066 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12067 slot = SLOT_CW_TRUNC;
12070 case I387_CW_FLOOR:
12071 /* round down toward -oo */
12072 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12073 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12074 slot = SLOT_CW_FLOOR;
12078 /* round up toward +oo */
12079 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12080 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12081 slot = SLOT_CW_CEIL;
12084 case I387_CW_MASK_PM:
12085 /* mask precision exception for nearbyint() */
12086 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12087 slot = SLOT_CW_MASK_PM;
12091 gcc_unreachable ();
12098 case I387_CW_TRUNC:
12099 /* round toward zero (truncate) */
12100 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12101 slot = SLOT_CW_TRUNC;
12104 case I387_CW_FLOOR:
12105 /* round down toward -oo */
12106 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12107 slot = SLOT_CW_FLOOR;
12111 /* round up toward +oo */
12112 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12113 slot = SLOT_CW_CEIL;
12116 case I387_CW_MASK_PM:
12117 /* mask precision exception for nearbyint() */
12118 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12119 slot = SLOT_CW_MASK_PM;
12123 gcc_unreachable ();
12127 gcc_assert (slot < MAX_386_STACK_LOCALS);
12129 new_mode = assign_386_stack_local (HImode, slot);
12130 emit_move_insn (new_mode, reg);
12133 /* Output code for INSN to convert a float to a signed int. OPERANDS
12134 are the insn operands. The output may be [HSD]Imode and the input
12135 operand may be [SDX]Fmode. */
12138 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12140 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12141 int dimode_p = GET_MODE (operands[0]) == DImode;
12142 int round_mode = get_attr_i387_cw (insn);
12144 /* Jump through a hoop or two for DImode, since the hardware has no
12145 non-popping instruction. We used to do this a different way, but
12146 that was somewhat fragile and broke with post-reload splitters. */
12147 if ((dimode_p || fisttp) && !stack_top_dies)
12148 output_asm_insn ("fld\t%y1", operands);
12150 gcc_assert (STACK_TOP_P (operands[1]));
12151 gcc_assert (MEM_P (operands[0]));
12152 gcc_assert (GET_MODE (operands[1]) != TFmode);
12155 output_asm_insn ("fisttp%Z0\t%0", operands);
12158 if (round_mode != I387_CW_ANY)
12159 output_asm_insn ("fldcw\t%3", operands);
12160 if (stack_top_dies || dimode_p)
12161 output_asm_insn ("fistp%Z0\t%0", operands);
12163 output_asm_insn ("fist%Z0\t%0", operands);
12164 if (round_mode != I387_CW_ANY)
12165 output_asm_insn ("fldcw\t%2", operands);
12171 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12172 have the values zero or one, indicates the ffreep insn's operand
12173 from the OPERANDS array. */
12175 static const char *
12176 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12178 if (TARGET_USE_FFREEP)
12179 #if HAVE_AS_IX86_FFREEP
12180 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12183 static char retval[] = ".word\t0xc_df";
12184 int regno = REGNO (operands[opno]);
12186 gcc_assert (FP_REGNO_P (regno));
12188 retval[9] = '0' + (regno - FIRST_STACK_REG);
12193 return opno ? "fstp\t%y1" : "fstp\t%y0";
12197 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12198 should be used. UNORDERED_P is true when fucom should be used. */
12201 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12203 int stack_top_dies;
12204 rtx cmp_op0, cmp_op1;
12205 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12209 cmp_op0 = operands[0];
12210 cmp_op1 = operands[1];
12214 cmp_op0 = operands[1];
12215 cmp_op1 = operands[2];
12220 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12221 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12222 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12223 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12225 if (GET_MODE (operands[0]) == SFmode)
12227 return &ucomiss[TARGET_AVX ? 0 : 1];
12229 return &comiss[TARGET_AVX ? 0 : 1];
12232 return &ucomisd[TARGET_AVX ? 0 : 1];
12234 return &comisd[TARGET_AVX ? 0 : 1];
12237 gcc_assert (STACK_TOP_P (cmp_op0));
12239 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12241 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12243 if (stack_top_dies)
12245 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12246 return output_387_ffreep (operands, 1);
12249 return "ftst\n\tfnstsw\t%0";
12252 if (STACK_REG_P (cmp_op1)
12254 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12255 && REGNO (cmp_op1) != FIRST_STACK_REG)
12257 /* If both the top of the 387 stack dies, and the other operand
12258 is also a stack register that dies, then this must be a
12259 `fcompp' float compare */
12263 /* There is no double popping fcomi variant. Fortunately,
12264 eflags is immune from the fstp's cc clobbering. */
12266 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12268 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12269 return output_387_ffreep (operands, 0);
12274 return "fucompp\n\tfnstsw\t%0";
12276 return "fcompp\n\tfnstsw\t%0";
12281 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12283 static const char * const alt[16] =
12285 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12286 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12287 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12288 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12290 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12291 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12295 "fcomi\t{%y1, %0|%0, %y1}",
12296 "fcomip\t{%y1, %0|%0, %y1}",
12297 "fucomi\t{%y1, %0|%0, %y1}",
12298 "fucomip\t{%y1, %0|%0, %y1}",
12309 mask = eflags_p << 3;
12310 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12311 mask |= unordered_p << 1;
12312 mask |= stack_top_dies;
12314 gcc_assert (mask < 16);
12323 ix86_output_addr_vec_elt (FILE *file, int value)
12325 const char *directive = ASM_LONG;
12329 directive = ASM_QUAD;
12331 gcc_assert (!TARGET_64BIT);
12334 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12338 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12340 const char *directive = ASM_LONG;
12343 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12344 directive = ASM_QUAD;
12346 gcc_assert (!TARGET_64BIT);
12348 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12349 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12350 fprintf (file, "%s%s%d-%s%d\n",
12351 directive, LPREFIX, value, LPREFIX, rel);
12352 else if (HAVE_AS_GOTOFF_IN_DATA)
12353 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12355 else if (TARGET_MACHO)
12357 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12358 machopic_output_function_base_name (file);
12359 fprintf(file, "\n");
12363 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12364 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12367 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12371 ix86_expand_clear (rtx dest)
12375 /* We play register width games, which are only valid after reload. */
12376 gcc_assert (reload_completed);
12378 /* Avoid HImode and its attendant prefix byte. */
12379 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12380 dest = gen_rtx_REG (SImode, REGNO (dest));
12381 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12383 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12384 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12386 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12387 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12393 /* X is an unchanging MEM. If it is a constant pool reference, return
12394 the constant pool rtx, else NULL. */
12397 maybe_get_pool_constant (rtx x)
12399 x = ix86_delegitimize_address (XEXP (x, 0));
12401 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12402 return get_pool_constant (x);
12408 ix86_expand_move (enum machine_mode mode, rtx operands[])
12411 enum tls_model model;
12416 if (GET_CODE (op1) == SYMBOL_REF)
12418 model = SYMBOL_REF_TLS_MODEL (op1);
12421 op1 = legitimize_tls_address (op1, model, true);
12422 op1 = force_operand (op1, op0);
12426 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12427 && SYMBOL_REF_DLLIMPORT_P (op1))
12428 op1 = legitimize_dllimport_symbol (op1, false);
12430 else if (GET_CODE (op1) == CONST
12431 && GET_CODE (XEXP (op1, 0)) == PLUS
12432 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12434 rtx addend = XEXP (XEXP (op1, 0), 1);
12435 rtx symbol = XEXP (XEXP (op1, 0), 0);
12438 model = SYMBOL_REF_TLS_MODEL (symbol);
12440 tmp = legitimize_tls_address (symbol, model, true);
12441 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12442 && SYMBOL_REF_DLLIMPORT_P (symbol))
12443 tmp = legitimize_dllimport_symbol (symbol, true);
12447 tmp = force_operand (tmp, NULL);
12448 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12449 op0, 1, OPTAB_DIRECT);
12455 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12457 if (TARGET_MACHO && !TARGET_64BIT)
12462 rtx temp = ((reload_in_progress
12463 || ((op0 && REG_P (op0))
12465 ? op0 : gen_reg_rtx (Pmode));
12466 op1 = machopic_indirect_data_reference (op1, temp);
12467 op1 = machopic_legitimize_pic_address (op1, mode,
12468 temp == op1 ? 0 : temp);
12470 else if (MACHOPIC_INDIRECT)
12471 op1 = machopic_indirect_data_reference (op1, 0);
12479 op1 = force_reg (Pmode, op1);
12480 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12482 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12483 op1 = legitimize_pic_address (op1, reg);
12492 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12493 || !push_operand (op0, mode))
12495 op1 = force_reg (mode, op1);
12497 if (push_operand (op0, mode)
12498 && ! general_no_elim_operand (op1, mode))
12499 op1 = copy_to_mode_reg (mode, op1);
12501 /* Force large constants in 64bit compilation into register
12502 to get them CSEed. */
12503 if (can_create_pseudo_p ()
12504 && (mode == DImode) && TARGET_64BIT
12505 && immediate_operand (op1, mode)
12506 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12507 && !register_operand (op0, mode)
12509 op1 = copy_to_mode_reg (mode, op1);
12511 if (can_create_pseudo_p ()
12512 && FLOAT_MODE_P (mode)
12513 && GET_CODE (op1) == CONST_DOUBLE)
12515 /* If we are loading a floating point constant to a register,
12516 force the value to memory now, since we'll get better code
12517 out the back end. */
12519 op1 = validize_mem (force_const_mem (mode, op1));
12520 if (!register_operand (op0, mode))
12522 rtx temp = gen_reg_rtx (mode);
12523 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12524 emit_move_insn (op0, temp);
12530 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12534 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12536 rtx op0 = operands[0], op1 = operands[1];
12537 unsigned int align = GET_MODE_ALIGNMENT (mode);
12539 /* Force constants other than zero into memory. We do not know how
12540 the instructions used to build constants modify the upper 64 bits
12541 of the register, once we have that information we may be able
12542 to handle some of them more efficiently. */
12543 if (can_create_pseudo_p ()
12544 && register_operand (op0, mode)
12545 && (CONSTANT_P (op1)
12546 || (GET_CODE (op1) == SUBREG
12547 && CONSTANT_P (SUBREG_REG (op1))))
12548 && standard_sse_constant_p (op1) <= 0)
12549 op1 = validize_mem (force_const_mem (mode, op1));
12551 /* We need to check memory alignment for SSE mode since attribute
12552 can make operands unaligned. */
12553 if (can_create_pseudo_p ()
12554 && SSE_REG_MODE_P (mode)
12555 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12556 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12560 /* ix86_expand_vector_move_misalign() does not like constants ... */
12561 if (CONSTANT_P (op1)
12562 || (GET_CODE (op1) == SUBREG
12563 && CONSTANT_P (SUBREG_REG (op1))))
12564 op1 = validize_mem (force_const_mem (mode, op1));
12566 /* ... nor both arguments in memory. */
12567 if (!register_operand (op0, mode)
12568 && !register_operand (op1, mode))
12569 op1 = force_reg (mode, op1);
12571 tmp[0] = op0; tmp[1] = op1;
12572 ix86_expand_vector_move_misalign (mode, tmp);
12576 /* Make operand1 a register if it isn't already. */
12577 if (can_create_pseudo_p ()
12578 && !register_operand (op0, mode)
12579 && !register_operand (op1, mode))
12581 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12585 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12588 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12589 straight to ix86_expand_vector_move. */
12590 /* Code generation for scalar reg-reg moves of single and double precision data:
12591 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12595 if (x86_sse_partial_reg_dependency == true)
12600 Code generation for scalar loads of double precision data:
12601 if (x86_sse_split_regs == true)
12602 movlpd mem, reg (gas syntax)
12606 Code generation for unaligned packed loads of single precision data
12607 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12608 if (x86_sse_unaligned_move_optimal)
12611 if (x86_sse_partial_reg_dependency == true)
12623 Code generation for unaligned packed loads of double precision data
12624 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12625 if (x86_sse_unaligned_move_optimal)
12628 if (x86_sse_split_regs == true)
12641 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12650 switch (GET_MODE_CLASS (mode))
12652 case MODE_VECTOR_INT:
12654 switch (GET_MODE_SIZE (mode))
12657 op0 = gen_lowpart (V16QImode, op0);
12658 op1 = gen_lowpart (V16QImode, op1);
12659 emit_insn (gen_avx_movdqu (op0, op1));
12662 op0 = gen_lowpart (V32QImode, op0);
12663 op1 = gen_lowpart (V32QImode, op1);
12664 emit_insn (gen_avx_movdqu256 (op0, op1));
12667 gcc_unreachable ();
12670 case MODE_VECTOR_FLOAT:
12671 op0 = gen_lowpart (mode, op0);
12672 op1 = gen_lowpart (mode, op1);
12677 emit_insn (gen_avx_movups (op0, op1));
12680 emit_insn (gen_avx_movups256 (op0, op1));
12683 emit_insn (gen_avx_movupd (op0, op1));
12686 emit_insn (gen_avx_movupd256 (op0, op1));
12689 gcc_unreachable ();
12694 gcc_unreachable ();
12702 /* If we're optimizing for size, movups is the smallest. */
12703 if (optimize_insn_for_size_p ())
12705 op0 = gen_lowpart (V4SFmode, op0);
12706 op1 = gen_lowpart (V4SFmode, op1);
12707 emit_insn (gen_sse_movups (op0, op1));
12711 /* ??? If we have typed data, then it would appear that using
12712 movdqu is the only way to get unaligned data loaded with
12714 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12716 op0 = gen_lowpart (V16QImode, op0);
12717 op1 = gen_lowpart (V16QImode, op1);
12718 emit_insn (gen_sse2_movdqu (op0, op1));
12722 if (TARGET_SSE2 && mode == V2DFmode)
12726 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12728 op0 = gen_lowpart (V2DFmode, op0);
12729 op1 = gen_lowpart (V2DFmode, op1);
12730 emit_insn (gen_sse2_movupd (op0, op1));
12734 /* When SSE registers are split into halves, we can avoid
12735 writing to the top half twice. */
12736 if (TARGET_SSE_SPLIT_REGS)
12738 emit_clobber (op0);
12743 /* ??? Not sure about the best option for the Intel chips.
12744 The following would seem to satisfy; the register is
12745 entirely cleared, breaking the dependency chain. We
12746 then store to the upper half, with a dependency depth
12747 of one. A rumor has it that Intel recommends two movsd
12748 followed by an unpacklpd, but this is unconfirmed. And
12749 given that the dependency depth of the unpacklpd would
12750 still be one, I'm not sure why this would be better. */
12751 zero = CONST0_RTX (V2DFmode);
12754 m = adjust_address (op1, DFmode, 0);
12755 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12756 m = adjust_address (op1, DFmode, 8);
12757 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12761 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12763 op0 = gen_lowpart (V4SFmode, op0);
12764 op1 = gen_lowpart (V4SFmode, op1);
12765 emit_insn (gen_sse_movups (op0, op1));
12769 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12770 emit_move_insn (op0, CONST0_RTX (mode));
12772 emit_clobber (op0);
12774 if (mode != V4SFmode)
12775 op0 = gen_lowpart (V4SFmode, op0);
12776 m = adjust_address (op1, V2SFmode, 0);
12777 emit_insn (gen_sse_loadlps (op0, op0, m));
12778 m = adjust_address (op1, V2SFmode, 8);
12779 emit_insn (gen_sse_loadhps (op0, op0, m));
12782 else if (MEM_P (op0))
12784 /* If we're optimizing for size, movups is the smallest. */
12785 if (optimize_insn_for_size_p ())
12787 op0 = gen_lowpart (V4SFmode, op0);
12788 op1 = gen_lowpart (V4SFmode, op1);
12789 emit_insn (gen_sse_movups (op0, op1));
12793 /* ??? Similar to above, only less clear because of quote
12794 typeless stores unquote. */
12795 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12796 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12798 op0 = gen_lowpart (V16QImode, op0);
12799 op1 = gen_lowpart (V16QImode, op1);
12800 emit_insn (gen_sse2_movdqu (op0, op1));
12804 if (TARGET_SSE2 && mode == V2DFmode)
12806 m = adjust_address (op0, DFmode, 0);
12807 emit_insn (gen_sse2_storelpd (m, op1));
12808 m = adjust_address (op0, DFmode, 8);
12809 emit_insn (gen_sse2_storehpd (m, op1));
12813 if (mode != V4SFmode)
12814 op1 = gen_lowpart (V4SFmode, op1);
12815 m = adjust_address (op0, V2SFmode, 0);
12816 emit_insn (gen_sse_storelps (m, op1));
12817 m = adjust_address (op0, V2SFmode, 8);
12818 emit_insn (gen_sse_storehps (m, op1));
12822 gcc_unreachable ();
12825 /* Expand a push in MODE. This is some mode for which we do not support
12826 proper push instructions, at least from the registers that we expect
12827 the value to live in. */
12830 ix86_expand_push (enum machine_mode mode, rtx x)
12834 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12835 GEN_INT (-GET_MODE_SIZE (mode)),
12836 stack_pointer_rtx, 1, OPTAB_DIRECT);
12837 if (tmp != stack_pointer_rtx)
12838 emit_move_insn (stack_pointer_rtx, tmp);
12840 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12842 /* When we push an operand onto stack, it has to be aligned at least
12843 at the function argument boundary. However since we don't have
12844 the argument type, we can't determine the actual argument
12846 emit_move_insn (tmp, x);
12849 /* Helper function of ix86_fixup_binary_operands to canonicalize
12850 operand order. Returns true if the operands should be swapped. */
12853 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12856 rtx dst = operands[0];
12857 rtx src1 = operands[1];
12858 rtx src2 = operands[2];
12860 /* If the operation is not commutative, we can't do anything. */
12861 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12864 /* Highest priority is that src1 should match dst. */
12865 if (rtx_equal_p (dst, src1))
12867 if (rtx_equal_p (dst, src2))
12870 /* Next highest priority is that immediate constants come second. */
12871 if (immediate_operand (src2, mode))
12873 if (immediate_operand (src1, mode))
12876 /* Lowest priority is that memory references should come second. */
12886 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12887 destination to use for the operation. If different from the true
12888 destination in operands[0], a copy operation will be required. */
12891 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12894 rtx dst = operands[0];
12895 rtx src1 = operands[1];
12896 rtx src2 = operands[2];
12898 /* Canonicalize operand order. */
12899 if (ix86_swap_binary_operands_p (code, mode, operands))
12903 /* It is invalid to swap operands of different modes. */
12904 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12911 /* Both source operands cannot be in memory. */
12912 if (MEM_P (src1) && MEM_P (src2))
12914 /* Optimization: Only read from memory once. */
12915 if (rtx_equal_p (src1, src2))
12917 src2 = force_reg (mode, src2);
12921 src2 = force_reg (mode, src2);
12924 /* If the destination is memory, and we do not have matching source
12925 operands, do things in registers. */
12926 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12927 dst = gen_reg_rtx (mode);
12929 /* Source 1 cannot be a constant. */
12930 if (CONSTANT_P (src1))
12931 src1 = force_reg (mode, src1);
12933 /* Source 1 cannot be a non-matching memory. */
12934 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12935 src1 = force_reg (mode, src1);
12937 operands[1] = src1;
12938 operands[2] = src2;
12942 /* Similarly, but assume that the destination has already been
12943 set up properly. */
12946 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12947 enum machine_mode mode, rtx operands[])
12949 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12950 gcc_assert (dst == operands[0]);
12953 /* Attempt to expand a binary operator. Make the expansion closer to the
12954 actual machine, then just general_operand, which will allow 3 separate
12955 memory references (one output, two input) in a single insn. */
12958 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12961 rtx src1, src2, dst, op, clob;
12963 dst = ix86_fixup_binary_operands (code, mode, operands);
12964 src1 = operands[1];
12965 src2 = operands[2];
12967 /* Emit the instruction. */
12969 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12970 if (reload_in_progress)
12972 /* Reload doesn't know about the flags register, and doesn't know that
12973 it doesn't want to clobber it. We can only do this with PLUS. */
12974 gcc_assert (code == PLUS);
12979 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12980 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12983 /* Fix up the destination if needed. */
12984 if (dst != operands[0])
12985 emit_move_insn (operands[0], dst);
12988 /* Return TRUE or FALSE depending on whether the binary operator meets the
12989 appropriate constraints. */
12992 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12995 rtx dst = operands[0];
12996 rtx src1 = operands[1];
12997 rtx src2 = operands[2];
12999 /* Both source operands cannot be in memory. */
13000 if (MEM_P (src1) && MEM_P (src2))
13003 /* Canonicalize operand order for commutative operators. */
13004 if (ix86_swap_binary_operands_p (code, mode, operands))
13011 /* If the destination is memory, we must have a matching source operand. */
13012 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13015 /* Source 1 cannot be a constant. */
13016 if (CONSTANT_P (src1))
13019 /* Source 1 cannot be a non-matching memory. */
13020 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13026 /* Attempt to expand a unary operator. Make the expansion closer to the
13027 actual machine, then just general_operand, which will allow 2 separate
13028 memory references (one output, one input) in a single insn. */
13031 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13034 int matching_memory;
13035 rtx src, dst, op, clob;
13040 /* If the destination is memory, and we do not have matching source
13041 operands, do things in registers. */
13042 matching_memory = 0;
13045 if (rtx_equal_p (dst, src))
13046 matching_memory = 1;
13048 dst = gen_reg_rtx (mode);
13051 /* When source operand is memory, destination must match. */
13052 if (MEM_P (src) && !matching_memory)
13053 src = force_reg (mode, src);
13055 /* Emit the instruction. */
13057 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13058 if (reload_in_progress || code == NOT)
13060 /* Reload doesn't know about the flags register, and doesn't know that
13061 it doesn't want to clobber it. */
13062 gcc_assert (code == NOT);
13067 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13068 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13071 /* Fix up the destination if needed. */
13072 if (dst != operands[0])
13073 emit_move_insn (operands[0], dst);
13076 #define LEA_SEARCH_THRESHOLD 12
13078 /* Search backward for non-agu definition of register number REGNO1
13079 or register number REGNO2 in INSN's basic block until
13080 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13081 2. Reach BB boundary, or
13082 3. Reach agu definition.
13083 Returns the distance between the non-agu definition point and INSN.
13084 If no definition point, returns -1. */
13087 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13090 basic_block bb = BLOCK_FOR_INSN (insn);
13093 enum attr_type insn_type;
13095 if (insn != BB_HEAD (bb))
13097 rtx prev = PREV_INSN (insn);
13098 while (prev && distance < LEA_SEARCH_THRESHOLD)
13103 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13104 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13105 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13106 && (regno1 == DF_REF_REGNO (*def_rec)
13107 || regno2 == DF_REF_REGNO (*def_rec)))
13109 insn_type = get_attr_type (prev);
13110 if (insn_type != TYPE_LEA)
13114 if (prev == BB_HEAD (bb))
13116 prev = PREV_INSN (prev);
13120 if (distance < LEA_SEARCH_THRESHOLD)
13124 bool simple_loop = false;
13126 FOR_EACH_EDGE (e, ei, bb->preds)
13129 simple_loop = true;
13135 rtx prev = BB_END (bb);
13138 && distance < LEA_SEARCH_THRESHOLD)
13143 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13144 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13145 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13146 && (regno1 == DF_REF_REGNO (*def_rec)
13147 || regno2 == DF_REF_REGNO (*def_rec)))
13149 insn_type = get_attr_type (prev);
13150 if (insn_type != TYPE_LEA)
13154 prev = PREV_INSN (prev);
13162 /* get_attr_type may modify recog data. We want to make sure
13163 that recog data is valid for instruction INSN, on which
13164 distance_non_agu_define is called. INSN is unchanged here. */
13165 extract_insn_cached (insn);
13169 /* Return the distance between INSN and the next insn that uses
13170 register number REGNO0 in memory address. Return -1 if no such
13171 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13174 distance_agu_use (unsigned int regno0, rtx insn)
13176 basic_block bb = BLOCK_FOR_INSN (insn);
13181 if (insn != BB_END (bb))
13183 rtx next = NEXT_INSN (insn);
13184 while (next && distance < LEA_SEARCH_THRESHOLD)
13190 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13191 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13192 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13193 && regno0 == DF_REF_REGNO (*use_rec))
13195 /* Return DISTANCE if OP0 is used in memory
13196 address in NEXT. */
13200 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13201 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13202 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13203 && regno0 == DF_REF_REGNO (*def_rec))
13205 /* Return -1 if OP0 is set in NEXT. */
13209 if (next == BB_END (bb))
13211 next = NEXT_INSN (next);
13215 if (distance < LEA_SEARCH_THRESHOLD)
13219 bool simple_loop = false;
13221 FOR_EACH_EDGE (e, ei, bb->succs)
13224 simple_loop = true;
13230 rtx next = BB_HEAD (bb);
13233 && distance < LEA_SEARCH_THRESHOLD)
13239 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13240 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13241 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13242 && regno0 == DF_REF_REGNO (*use_rec))
13244 /* Return DISTANCE if OP0 is used in memory
13245 address in NEXT. */
13249 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13250 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13251 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13252 && regno0 == DF_REF_REGNO (*def_rec))
13254 /* Return -1 if OP0 is set in NEXT. */
13259 next = NEXT_INSN (next);
13267 /* Define this macro to tune LEA priority vs ADD, it take effect when
13268 there is a dilemma of choicing LEA or ADD
13269 Negative value: ADD is more preferred than LEA
13271 Positive value: LEA is more preferred than ADD*/
13272 #define IX86_LEA_PRIORITY 2
13274 /* Return true if it is ok to optimize an ADD operation to LEA
13275 operation to avoid flag register consumation. For the processors
13276 like ATOM, if the destination register of LEA holds an actual
13277 address which will be used soon, LEA is better and otherwise ADD
13281 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13282 rtx insn, rtx operands[])
13284 unsigned int regno0 = true_regnum (operands[0]);
13285 unsigned int regno1 = true_regnum (operands[1]);
13286 unsigned int regno2;
13288 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13289 return regno0 != regno1;
13291 regno2 = true_regnum (operands[2]);
13293 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13294 if (regno0 != regno1 && regno0 != regno2)
13298 int dist_define, dist_use;
13299 dist_define = distance_non_agu_define (regno1, regno2, insn);
13300 if (dist_define <= 0)
13303 /* If this insn has both backward non-agu dependence and forward
13304 agu dependence, the one with short distance take effect. */
13305 dist_use = distance_agu_use (regno0, insn);
13307 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13314 /* Return true if destination reg of SET_BODY is shift count of
13318 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13324 /* Retrieve destination of SET_BODY. */
13325 switch (GET_CODE (set_body))
13328 set_dest = SET_DEST (set_body);
13329 if (!set_dest || !REG_P (set_dest))
13333 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13334 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13342 /* Retrieve shift count of USE_BODY. */
13343 switch (GET_CODE (use_body))
13346 shift_rtx = XEXP (use_body, 1);
13349 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13350 if (ix86_dep_by_shift_count_body (set_body,
13351 XVECEXP (use_body, 0, i)))
13359 && (GET_CODE (shift_rtx) == ASHIFT
13360 || GET_CODE (shift_rtx) == LSHIFTRT
13361 || GET_CODE (shift_rtx) == ASHIFTRT
13362 || GET_CODE (shift_rtx) == ROTATE
13363 || GET_CODE (shift_rtx) == ROTATERT))
13365 rtx shift_count = XEXP (shift_rtx, 1);
13367 /* Return true if shift count is dest of SET_BODY. */
13368 if (REG_P (shift_count)
13369 && true_regnum (set_dest) == true_regnum (shift_count))
13376 /* Return true if destination reg of SET_INSN is shift count of
13380 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13382 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13383 PATTERN (use_insn));
13386 /* Return TRUE or FALSE depending on whether the unary operator meets the
13387 appropriate constraints. */
13390 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13391 enum machine_mode mode ATTRIBUTE_UNUSED,
13392 rtx operands[2] ATTRIBUTE_UNUSED)
13394 /* If one of operands is memory, source and destination must match. */
13395 if ((MEM_P (operands[0])
13396 || MEM_P (operands[1]))
13397 && ! rtx_equal_p (operands[0], operands[1]))
13402 /* Post-reload splitter for converting an SF or DFmode value in an
13403 SSE register into an unsigned SImode. */
13406 ix86_split_convert_uns_si_sse (rtx operands[])
13408 enum machine_mode vecmode;
13409 rtx value, large, zero_or_two31, input, two31, x;
13411 large = operands[1];
13412 zero_or_two31 = operands[2];
13413 input = operands[3];
13414 two31 = operands[4];
13415 vecmode = GET_MODE (large);
13416 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13418 /* Load up the value into the low element. We must ensure that the other
13419 elements are valid floats -- zero is the easiest such value. */
13422 if (vecmode == V4SFmode)
13423 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13425 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13429 input = gen_rtx_REG (vecmode, REGNO (input));
13430 emit_move_insn (value, CONST0_RTX (vecmode));
13431 if (vecmode == V4SFmode)
13432 emit_insn (gen_sse_movss (value, value, input));
13434 emit_insn (gen_sse2_movsd (value, value, input));
13437 emit_move_insn (large, two31);
13438 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13440 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13441 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13443 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13444 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13446 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13447 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13449 large = gen_rtx_REG (V4SImode, REGNO (large));
13450 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13452 x = gen_rtx_REG (V4SImode, REGNO (value));
13453 if (vecmode == V4SFmode)
13454 emit_insn (gen_sse2_cvttps2dq (x, value));
13456 emit_insn (gen_sse2_cvttpd2dq (x, value));
13459 emit_insn (gen_xorv4si3 (value, value, large));
13462 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13463 Expects the 64-bit DImode to be supplied in a pair of integral
13464 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13465 -mfpmath=sse, !optimize_size only. */
13468 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13470 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13471 rtx int_xmm, fp_xmm;
13472 rtx biases, exponents;
13475 int_xmm = gen_reg_rtx (V4SImode);
13476 if (TARGET_INTER_UNIT_MOVES)
13477 emit_insn (gen_movdi_to_sse (int_xmm, input));
13478 else if (TARGET_SSE_SPLIT_REGS)
13480 emit_clobber (int_xmm);
13481 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13485 x = gen_reg_rtx (V2DImode);
13486 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13487 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13490 x = gen_rtx_CONST_VECTOR (V4SImode,
13491 gen_rtvec (4, GEN_INT (0x43300000UL),
13492 GEN_INT (0x45300000UL),
13493 const0_rtx, const0_rtx));
13494 exponents = validize_mem (force_const_mem (V4SImode, x));
13496 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13497 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13499 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13500 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13501 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13502 (0x1.0p84 + double(fp_value_hi_xmm)).
13503 Note these exponents differ by 32. */
13505 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13507 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13508 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13509 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13510 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13511 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13512 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13513 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13514 biases = validize_mem (force_const_mem (V2DFmode, biases));
13515 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13517 /* Add the upper and lower DFmode values together. */
13519 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13522 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13523 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13524 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13527 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13530 /* Not used, but eases macroization of patterns. */
13532 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13533 rtx input ATTRIBUTE_UNUSED)
13535 gcc_unreachable ();
13538 /* Convert an unsigned SImode value into a DFmode. Only currently used
13539 for SSE, but applicable anywhere. */
13542 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13544 REAL_VALUE_TYPE TWO31r;
13547 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13548 NULL, 1, OPTAB_DIRECT);
13550 fp = gen_reg_rtx (DFmode);
13551 emit_insn (gen_floatsidf2 (fp, x));
13553 real_ldexp (&TWO31r, &dconst1, 31);
13554 x = const_double_from_real_value (TWO31r, DFmode);
13556 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13558 emit_move_insn (target, x);
13561 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13562 32-bit mode; otherwise we have a direct convert instruction. */
13565 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13567 REAL_VALUE_TYPE TWO32r;
13568 rtx fp_lo, fp_hi, x;
13570 fp_lo = gen_reg_rtx (DFmode);
13571 fp_hi = gen_reg_rtx (DFmode);
13573 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13575 real_ldexp (&TWO32r, &dconst1, 32);
13576 x = const_double_from_real_value (TWO32r, DFmode);
13577 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13579 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13581 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13584 emit_move_insn (target, x);
13587 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13588 For x86_32, -mfpmath=sse, !optimize_size only. */
13590 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13592 REAL_VALUE_TYPE ONE16r;
13593 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13595 real_ldexp (&ONE16r, &dconst1, 16);
13596 x = const_double_from_real_value (ONE16r, SFmode);
13597 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13598 NULL, 0, OPTAB_DIRECT);
13599 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13600 NULL, 0, OPTAB_DIRECT);
13601 fp_hi = gen_reg_rtx (SFmode);
13602 fp_lo = gen_reg_rtx (SFmode);
13603 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13604 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13605 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13607 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13609 if (!rtx_equal_p (target, fp_hi))
13610 emit_move_insn (target, fp_hi);
13613 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13614 then replicate the value for all elements of the vector
13618 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13625 v = gen_rtvec (4, value, value, value, value);
13626 return gen_rtx_CONST_VECTOR (V4SImode, v);
13630 v = gen_rtvec (2, value, value);
13631 return gen_rtx_CONST_VECTOR (V2DImode, v);
13635 v = gen_rtvec (4, value, value, value, value);
13637 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13638 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13639 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13643 v = gen_rtvec (2, value, value);
13645 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13646 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13649 gcc_unreachable ();
13653 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13654 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13655 for an SSE register. If VECT is true, then replicate the mask for
13656 all elements of the vector register. If INVERT is true, then create
13657 a mask excluding the sign bit. */
13660 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13662 enum machine_mode vec_mode, imode;
13663 HOST_WIDE_INT hi, lo;
13668 /* Find the sign bit, sign extended to 2*HWI. */
13674 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13675 lo = 0x80000000, hi = lo < 0;
13681 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13682 if (HOST_BITS_PER_WIDE_INT >= 64)
13683 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13685 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13690 vec_mode = VOIDmode;
13691 if (HOST_BITS_PER_WIDE_INT >= 64)
13694 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13701 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13705 lo = ~lo, hi = ~hi;
13711 mask = immed_double_const (lo, hi, imode);
13713 vec = gen_rtvec (2, v, mask);
13714 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13715 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13722 gcc_unreachable ();
13726 lo = ~lo, hi = ~hi;
13728 /* Force this value into the low part of a fp vector constant. */
13729 mask = immed_double_const (lo, hi, imode);
13730 mask = gen_lowpart (mode, mask);
13732 if (vec_mode == VOIDmode)
13733 return force_reg (mode, mask);
13735 v = ix86_build_const_vector (mode, vect, mask);
13736 return force_reg (vec_mode, v);
13739 /* Generate code for floating point ABS or NEG. */
13742 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13745 rtx mask, set, use, clob, dst, src;
13746 bool use_sse = false;
13747 bool vector_mode = VECTOR_MODE_P (mode);
13748 enum machine_mode elt_mode = mode;
13752 elt_mode = GET_MODE_INNER (mode);
13755 else if (mode == TFmode)
13757 else if (TARGET_SSE_MATH)
13758 use_sse = SSE_FLOAT_MODE_P (mode);
13760 /* NEG and ABS performed with SSE use bitwise mask operations.
13761 Create the appropriate mask now. */
13763 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13772 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13773 set = gen_rtx_SET (VOIDmode, dst, set);
13778 set = gen_rtx_fmt_e (code, mode, src);
13779 set = gen_rtx_SET (VOIDmode, dst, set);
13782 use = gen_rtx_USE (VOIDmode, mask);
13783 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13784 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13785 gen_rtvec (3, set, use, clob)));
13792 /* Expand a copysign operation. Special case operand 0 being a constant. */
13795 ix86_expand_copysign (rtx operands[])
13797 enum machine_mode mode;
13798 rtx dest, op0, op1, mask, nmask;
13800 dest = operands[0];
13804 mode = GET_MODE (dest);
13806 if (GET_CODE (op0) == CONST_DOUBLE)
13808 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13810 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13811 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13813 if (mode == SFmode || mode == DFmode)
13815 enum machine_mode vmode;
13817 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13819 if (op0 == CONST0_RTX (mode))
13820 op0 = CONST0_RTX (vmode);
13825 if (mode == SFmode)
13826 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13827 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13829 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13831 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13834 else if (op0 != CONST0_RTX (mode))
13835 op0 = force_reg (mode, op0);
13837 mask = ix86_build_signbit_mask (mode, 0, 0);
13839 if (mode == SFmode)
13840 copysign_insn = gen_copysignsf3_const;
13841 else if (mode == DFmode)
13842 copysign_insn = gen_copysigndf3_const;
13844 copysign_insn = gen_copysigntf3_const;
13846 emit_insn (copysign_insn (dest, op0, op1, mask));
13850 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13852 nmask = ix86_build_signbit_mask (mode, 0, 1);
13853 mask = ix86_build_signbit_mask (mode, 0, 0);
13855 if (mode == SFmode)
13856 copysign_insn = gen_copysignsf3_var;
13857 else if (mode == DFmode)
13858 copysign_insn = gen_copysigndf3_var;
13860 copysign_insn = gen_copysigntf3_var;
13862 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13866 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13867 be a constant, and so has already been expanded into a vector constant. */
13870 ix86_split_copysign_const (rtx operands[])
13872 enum machine_mode mode, vmode;
13873 rtx dest, op0, op1, mask, x;
13875 dest = operands[0];
13878 mask = operands[3];
13880 mode = GET_MODE (dest);
13881 vmode = GET_MODE (mask);
13883 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13884 x = gen_rtx_AND (vmode, dest, mask);
13885 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13887 if (op0 != CONST0_RTX (vmode))
13889 x = gen_rtx_IOR (vmode, dest, op0);
13890 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13894 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13895 so we have to do two masks. */
13898 ix86_split_copysign_var (rtx operands[])
13900 enum machine_mode mode, vmode;
13901 rtx dest, scratch, op0, op1, mask, nmask, x;
13903 dest = operands[0];
13904 scratch = operands[1];
13907 nmask = operands[4];
13908 mask = operands[5];
13910 mode = GET_MODE (dest);
13911 vmode = GET_MODE (mask);
13913 if (rtx_equal_p (op0, op1))
13915 /* Shouldn't happen often (it's useless, obviously), but when it does
13916 we'd generate incorrect code if we continue below. */
13917 emit_move_insn (dest, op0);
13921 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13923 gcc_assert (REGNO (op1) == REGNO (scratch));
13925 x = gen_rtx_AND (vmode, scratch, mask);
13926 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13929 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13930 x = gen_rtx_NOT (vmode, dest);
13931 x = gen_rtx_AND (vmode, x, op0);
13932 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13936 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13938 x = gen_rtx_AND (vmode, scratch, mask);
13940 else /* alternative 2,4 */
13942 gcc_assert (REGNO (mask) == REGNO (scratch));
13943 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13944 x = gen_rtx_AND (vmode, scratch, op1);
13946 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13948 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13950 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13951 x = gen_rtx_AND (vmode, dest, nmask);
13953 else /* alternative 3,4 */
13955 gcc_assert (REGNO (nmask) == REGNO (dest));
13957 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13958 x = gen_rtx_AND (vmode, dest, op0);
13960 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13963 x = gen_rtx_IOR (vmode, dest, scratch);
13964 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13967 /* Return TRUE or FALSE depending on whether the first SET in INSN
13968 has source and destination with matching CC modes, and that the
13969 CC mode is at least as constrained as REQ_MODE. */
13972 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13975 enum machine_mode set_mode;
13977 set = PATTERN (insn);
13978 if (GET_CODE (set) == PARALLEL)
13979 set = XVECEXP (set, 0, 0);
13980 gcc_assert (GET_CODE (set) == SET);
13981 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13983 set_mode = GET_MODE (SET_DEST (set));
13987 if (req_mode != CCNOmode
13988 && (req_mode != CCmode
13989 || XEXP (SET_SRC (set), 1) != const0_rtx))
13993 if (req_mode == CCGCmode)
13997 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14001 if (req_mode == CCZmode)
14012 gcc_unreachable ();
14015 return (GET_MODE (SET_SRC (set)) == set_mode);
14018 /* Generate insn patterns to do an integer compare of OPERANDS. */
14021 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14023 enum machine_mode cmpmode;
14026 cmpmode = SELECT_CC_MODE (code, op0, op1);
14027 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14029 /* This is very simple, but making the interface the same as in the
14030 FP case makes the rest of the code easier. */
14031 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14032 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14034 /* Return the test that should be put into the flags user, i.e.
14035 the bcc, scc, or cmov instruction. */
14036 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14039 /* Figure out whether to use ordered or unordered fp comparisons.
14040 Return the appropriate mode to use. */
14043 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14045 /* ??? In order to make all comparisons reversible, we do all comparisons
14046 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14047 all forms trapping and nontrapping comparisons, we can make inequality
14048 comparisons trapping again, since it results in better code when using
14049 FCOM based compares. */
14050 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14054 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14056 enum machine_mode mode = GET_MODE (op0);
14058 if (SCALAR_FLOAT_MODE_P (mode))
14060 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14061 return ix86_fp_compare_mode (code);
14066 /* Only zero flag is needed. */
14067 case EQ: /* ZF=0 */
14068 case NE: /* ZF!=0 */
14070 /* Codes needing carry flag. */
14071 case GEU: /* CF=0 */
14072 case LTU: /* CF=1 */
14073 /* Detect overflow checks. They need just the carry flag. */
14074 if (GET_CODE (op0) == PLUS
14075 && rtx_equal_p (op1, XEXP (op0, 0)))
14079 case GTU: /* CF=0 & ZF=0 */
14080 case LEU: /* CF=1 | ZF=1 */
14081 /* Detect overflow checks. They need just the carry flag. */
14082 if (GET_CODE (op0) == MINUS
14083 && rtx_equal_p (op1, XEXP (op0, 0)))
14087 /* Codes possibly doable only with sign flag when
14088 comparing against zero. */
14089 case GE: /* SF=OF or SF=0 */
14090 case LT: /* SF<>OF or SF=1 */
14091 if (op1 == const0_rtx)
14094 /* For other cases Carry flag is not required. */
14096 /* Codes doable only with sign flag when comparing
14097 against zero, but we miss jump instruction for it
14098 so we need to use relational tests against overflow
14099 that thus needs to be zero. */
14100 case GT: /* ZF=0 & SF=OF */
14101 case LE: /* ZF=1 | SF<>OF */
14102 if (op1 == const0_rtx)
14106 /* strcmp pattern do (use flags) and combine may ask us for proper
14111 gcc_unreachable ();
14115 /* Return the fixed registers used for condition codes. */
14118 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14125 /* If two condition code modes are compatible, return a condition code
14126 mode which is compatible with both. Otherwise, return
14129 static enum machine_mode
14130 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14135 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14138 if ((m1 == CCGCmode && m2 == CCGOCmode)
14139 || (m1 == CCGOCmode && m2 == CCGCmode))
14145 gcc_unreachable ();
14175 /* These are only compatible with themselves, which we already
14181 /* Split comparison code CODE into comparisons we can do using branch
14182 instructions. BYPASS_CODE is comparison code for branch that will
14183 branch around FIRST_CODE and SECOND_CODE. If some of branches
14184 is not required, set value to UNKNOWN.
14185 We never require more than two branches. */
14188 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14189 enum rtx_code *first_code,
14190 enum rtx_code *second_code)
14192 *first_code = code;
14193 *bypass_code = UNKNOWN;
14194 *second_code = UNKNOWN;
14196 /* The fcomi comparison sets flags as follows:
14206 case GT: /* GTU - CF=0 & ZF=0 */
14207 case GE: /* GEU - CF=0 */
14208 case ORDERED: /* PF=0 */
14209 case UNORDERED: /* PF=1 */
14210 case UNEQ: /* EQ - ZF=1 */
14211 case UNLT: /* LTU - CF=1 */
14212 case UNLE: /* LEU - CF=1 | ZF=1 */
14213 case LTGT: /* EQ - ZF=0 */
14215 case LT: /* LTU - CF=1 - fails on unordered */
14216 *first_code = UNLT;
14217 *bypass_code = UNORDERED;
14219 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14220 *first_code = UNLE;
14221 *bypass_code = UNORDERED;
14223 case EQ: /* EQ - ZF=1 - fails on unordered */
14224 *first_code = UNEQ;
14225 *bypass_code = UNORDERED;
14227 case NE: /* NE - ZF=0 - fails on unordered */
14228 *first_code = LTGT;
14229 *second_code = UNORDERED;
14231 case UNGE: /* GEU - CF=0 - fails on unordered */
14233 *second_code = UNORDERED;
14235 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14237 *second_code = UNORDERED;
14240 gcc_unreachable ();
14242 if (!TARGET_IEEE_FP)
14244 *second_code = UNKNOWN;
14245 *bypass_code = UNKNOWN;
14249 /* Return cost of comparison done fcom + arithmetics operations on AX.
14250 All following functions do use number of instructions as a cost metrics.
14251 In future this should be tweaked to compute bytes for optimize_size and
14252 take into account performance of various instructions on various CPUs. */
14254 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14256 if (!TARGET_IEEE_FP)
14258 /* The cost of code output by ix86_expand_fp_compare. */
14282 gcc_unreachable ();
14286 /* Return cost of comparison done using fcomi operation.
14287 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14289 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14291 enum rtx_code bypass_code, first_code, second_code;
14292 /* Return arbitrarily high cost when instruction is not supported - this
14293 prevents gcc from using it. */
14296 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14297 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14300 /* Return cost of comparison done using sahf operation.
14301 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14303 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14305 enum rtx_code bypass_code, first_code, second_code;
14306 /* Return arbitrarily high cost when instruction is not preferred - this
14307 avoids gcc from using it. */
14308 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14310 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14311 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14314 /* Compute cost of the comparison done using any method.
14315 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14317 ix86_fp_comparison_cost (enum rtx_code code)
14319 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14322 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14323 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14325 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14326 if (min > sahf_cost)
14328 if (min > fcomi_cost)
14333 /* Return true if we should use an FCOMI instruction for this
14337 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14339 enum rtx_code swapped_code = swap_condition (code);
14341 return ((ix86_fp_comparison_cost (code)
14342 == ix86_fp_comparison_fcomi_cost (code))
14343 || (ix86_fp_comparison_cost (swapped_code)
14344 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14347 /* Swap, force into registers, or otherwise massage the two operands
14348 to a fp comparison. The operands are updated in place; the new
14349 comparison code is returned. */
14351 static enum rtx_code
14352 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14354 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14355 rtx op0 = *pop0, op1 = *pop1;
14356 enum machine_mode op_mode = GET_MODE (op0);
14357 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14359 /* All of the unordered compare instructions only work on registers.
14360 The same is true of the fcomi compare instructions. The XFmode
14361 compare instructions require registers except when comparing
14362 against zero or when converting operand 1 from fixed point to
14366 && (fpcmp_mode == CCFPUmode
14367 || (op_mode == XFmode
14368 && ! (standard_80387_constant_p (op0) == 1
14369 || standard_80387_constant_p (op1) == 1)
14370 && GET_CODE (op1) != FLOAT)
14371 || ix86_use_fcomi_compare (code)))
14373 op0 = force_reg (op_mode, op0);
14374 op1 = force_reg (op_mode, op1);
14378 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14379 things around if they appear profitable, otherwise force op0
14380 into a register. */
14382 if (standard_80387_constant_p (op0) == 0
14384 && ! (standard_80387_constant_p (op1) == 0
14388 tmp = op0, op0 = op1, op1 = tmp;
14389 code = swap_condition (code);
14393 op0 = force_reg (op_mode, op0);
14395 if (CONSTANT_P (op1))
14397 int tmp = standard_80387_constant_p (op1);
14399 op1 = validize_mem (force_const_mem (op_mode, op1));
14403 op1 = force_reg (op_mode, op1);
14406 op1 = force_reg (op_mode, op1);
14410 /* Try to rearrange the comparison to make it cheaper. */
14411 if (ix86_fp_comparison_cost (code)
14412 > ix86_fp_comparison_cost (swap_condition (code))
14413 && (REG_P (op1) || can_create_pseudo_p ()))
14416 tmp = op0, op0 = op1, op1 = tmp;
14417 code = swap_condition (code);
14419 op0 = force_reg (op_mode, op0);
14427 /* Convert comparison codes we use to represent FP comparison to integer
14428 code that will result in proper branch. Return UNKNOWN if no such code
14432 ix86_fp_compare_code_to_integer (enum rtx_code code)
14461 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14464 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14465 rtx *second_test, rtx *bypass_test)
14467 enum machine_mode fpcmp_mode, intcmp_mode;
14469 int cost = ix86_fp_comparison_cost (code);
14470 enum rtx_code bypass_code, first_code, second_code;
14472 fpcmp_mode = ix86_fp_compare_mode (code);
14473 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14476 *second_test = NULL_RTX;
14478 *bypass_test = NULL_RTX;
14480 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14482 /* Do fcomi/sahf based test when profitable. */
14483 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14484 && (bypass_code == UNKNOWN || bypass_test)
14485 && (second_code == UNKNOWN || second_test))
14487 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14488 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14494 gcc_assert (TARGET_SAHF);
14497 scratch = gen_reg_rtx (HImode);
14498 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14500 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14503 /* The FP codes work out to act like unsigned. */
14504 intcmp_mode = fpcmp_mode;
14506 if (bypass_code != UNKNOWN)
14507 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14508 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14510 if (second_code != UNKNOWN)
14511 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14512 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14517 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14518 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14519 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14521 scratch = gen_reg_rtx (HImode);
14522 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14524 /* In the unordered case, we have to check C2 for NaN's, which
14525 doesn't happen to work out to anything nice combination-wise.
14526 So do some bit twiddling on the value we've got in AH to come
14527 up with an appropriate set of condition codes. */
14529 intcmp_mode = CCNOmode;
14534 if (code == GT || !TARGET_IEEE_FP)
14536 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14541 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14542 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14543 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14544 intcmp_mode = CCmode;
14550 if (code == LT && TARGET_IEEE_FP)
14552 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14553 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14554 intcmp_mode = CCmode;
14559 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14565 if (code == GE || !TARGET_IEEE_FP)
14567 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14572 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14573 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14580 if (code == LE && TARGET_IEEE_FP)
14582 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14583 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14584 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14585 intcmp_mode = CCmode;
14590 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14596 if (code == EQ && TARGET_IEEE_FP)
14598 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14599 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14600 intcmp_mode = CCmode;
14605 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14612 if (code == NE && TARGET_IEEE_FP)
14614 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14615 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14621 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14627 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14631 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14636 gcc_unreachable ();
14640 /* Return the test that should be put into the flags user, i.e.
14641 the bcc, scc, or cmov instruction. */
14642 return gen_rtx_fmt_ee (code, VOIDmode,
14643 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14648 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14651 op0 = ix86_compare_op0;
14652 op1 = ix86_compare_op1;
14655 *second_test = NULL_RTX;
14657 *bypass_test = NULL_RTX;
14659 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14660 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14662 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14664 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14665 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14666 second_test, bypass_test);
14669 ret = ix86_expand_int_compare (code, op0, op1);
14674 /* Return true if the CODE will result in nontrivial jump sequence. */
14676 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14678 enum rtx_code bypass_code, first_code, second_code;
14681 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14682 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14686 ix86_expand_branch (enum rtx_code code, rtx label)
14690 switch (GET_MODE (ix86_compare_op0))
14696 tmp = ix86_expand_compare (code, NULL, NULL);
14697 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14698 gen_rtx_LABEL_REF (VOIDmode, label),
14700 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14709 enum rtx_code bypass_code, first_code, second_code;
14711 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14712 &ix86_compare_op1);
14714 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14716 /* Check whether we will use the natural sequence with one jump. If
14717 so, we can expand jump early. Otherwise delay expansion by
14718 creating compound insn to not confuse optimizers. */
14719 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14721 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14722 gen_rtx_LABEL_REF (VOIDmode, label),
14723 pc_rtx, NULL_RTX, NULL_RTX);
14727 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14728 ix86_compare_op0, ix86_compare_op1);
14729 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14730 gen_rtx_LABEL_REF (VOIDmode, label),
14732 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14734 use_fcomi = ix86_use_fcomi_compare (code);
14735 vec = rtvec_alloc (3 + !use_fcomi);
14736 RTVEC_ELT (vec, 0) = tmp;
14738 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14740 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14743 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14745 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14754 /* Expand DImode branch into multiple compare+branch. */
14756 rtx lo[2], hi[2], label2;
14757 enum rtx_code code1, code2, code3;
14758 enum machine_mode submode;
14760 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14762 tmp = ix86_compare_op0;
14763 ix86_compare_op0 = ix86_compare_op1;
14764 ix86_compare_op1 = tmp;
14765 code = swap_condition (code);
14767 if (GET_MODE (ix86_compare_op0) == DImode)
14769 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14770 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14775 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14776 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14780 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14781 avoid two branches. This costs one extra insn, so disable when
14782 optimizing for size. */
14784 if ((code == EQ || code == NE)
14785 && (!optimize_insn_for_size_p ()
14786 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14791 if (hi[1] != const0_rtx)
14792 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14793 NULL_RTX, 0, OPTAB_WIDEN);
14796 if (lo[1] != const0_rtx)
14797 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14798 NULL_RTX, 0, OPTAB_WIDEN);
14800 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14801 NULL_RTX, 0, OPTAB_WIDEN);
14803 ix86_compare_op0 = tmp;
14804 ix86_compare_op1 = const0_rtx;
14805 ix86_expand_branch (code, label);
14809 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14810 op1 is a constant and the low word is zero, then we can just
14811 examine the high word. Similarly for low word -1 and
14812 less-or-equal-than or greater-than. */
14814 if (CONST_INT_P (hi[1]))
14817 case LT: case LTU: case GE: case GEU:
14818 if (lo[1] == const0_rtx)
14820 ix86_compare_op0 = hi[0];
14821 ix86_compare_op1 = hi[1];
14822 ix86_expand_branch (code, label);
14826 case LE: case LEU: case GT: case GTU:
14827 if (lo[1] == constm1_rtx)
14829 ix86_compare_op0 = hi[0];
14830 ix86_compare_op1 = hi[1];
14831 ix86_expand_branch (code, label);
14839 /* Otherwise, we need two or three jumps. */
14841 label2 = gen_label_rtx ();
14844 code2 = swap_condition (code);
14845 code3 = unsigned_condition (code);
14849 case LT: case GT: case LTU: case GTU:
14852 case LE: code1 = LT; code2 = GT; break;
14853 case GE: code1 = GT; code2 = LT; break;
14854 case LEU: code1 = LTU; code2 = GTU; break;
14855 case GEU: code1 = GTU; code2 = LTU; break;
14857 case EQ: code1 = UNKNOWN; code2 = NE; break;
14858 case NE: code2 = UNKNOWN; break;
14861 gcc_unreachable ();
14866 * if (hi(a) < hi(b)) goto true;
14867 * if (hi(a) > hi(b)) goto false;
14868 * if (lo(a) < lo(b)) goto true;
14872 ix86_compare_op0 = hi[0];
14873 ix86_compare_op1 = hi[1];
14875 if (code1 != UNKNOWN)
14876 ix86_expand_branch (code1, label);
14877 if (code2 != UNKNOWN)
14878 ix86_expand_branch (code2, label2);
14880 ix86_compare_op0 = lo[0];
14881 ix86_compare_op1 = lo[1];
14882 ix86_expand_branch (code3, label);
14884 if (code2 != UNKNOWN)
14885 emit_label (label2);
14890 /* If we have already emitted a compare insn, go straight to simple.
14891 ix86_expand_compare won't emit anything if ix86_compare_emitted
14893 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
14898 /* Split branch based on floating point condition. */
14900 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14901 rtx target1, rtx target2, rtx tmp, rtx pushed)
14903 rtx second, bypass;
14904 rtx label = NULL_RTX;
14906 int bypass_probability = -1, second_probability = -1, probability = -1;
14909 if (target2 != pc_rtx)
14912 code = reverse_condition_maybe_unordered (code);
14917 condition = ix86_expand_fp_compare (code, op1, op2,
14918 tmp, &second, &bypass);
14920 /* Remove pushed operand from stack. */
14922 ix86_free_from_memory (GET_MODE (pushed));
14924 if (split_branch_probability >= 0)
14926 /* Distribute the probabilities across the jumps.
14927 Assume the BYPASS and SECOND to be always test
14929 probability = split_branch_probability;
14931 /* Value of 1 is low enough to make no need for probability
14932 to be updated. Later we may run some experiments and see
14933 if unordered values are more frequent in practice. */
14935 bypass_probability = 1;
14937 second_probability = 1;
14939 if (bypass != NULL_RTX)
14941 label = gen_label_rtx ();
14942 i = emit_jump_insn (gen_rtx_SET
14944 gen_rtx_IF_THEN_ELSE (VOIDmode,
14946 gen_rtx_LABEL_REF (VOIDmode,
14949 if (bypass_probability >= 0)
14950 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
14952 i = emit_jump_insn (gen_rtx_SET
14954 gen_rtx_IF_THEN_ELSE (VOIDmode,
14955 condition, target1, target2)));
14956 if (probability >= 0)
14957 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
14958 if (second != NULL_RTX)
14960 i = emit_jump_insn (gen_rtx_SET
14962 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14964 if (second_probability >= 0)
14965 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
14967 if (label != NULL_RTX)
14968 emit_label (label);
14972 ix86_expand_setcc (enum rtx_code code, rtx dest)
14974 rtx ret, tmp, tmpreg, equiv;
14975 rtx second_test, bypass_test;
14977 gcc_assert (GET_MODE (dest) == QImode);
14979 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14980 PUT_MODE (ret, QImode);
14985 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14986 if (bypass_test || second_test)
14988 rtx test = second_test;
14990 rtx tmp2 = gen_reg_rtx (QImode);
14993 gcc_assert (!second_test);
14994 test = bypass_test;
14996 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14998 PUT_MODE (test, QImode);
14999 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
15002 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
15004 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
15007 /* Attach a REG_EQUAL note describing the comparison result. */
15008 if (ix86_compare_op0 && ix86_compare_op1)
15010 equiv = simplify_gen_relational (code, QImode,
15011 GET_MODE (ix86_compare_op0),
15012 ix86_compare_op0, ix86_compare_op1);
15013 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
15017 /* Expand comparison setting or clearing carry flag. Return true when
15018 successful and set pop for the operation. */
15020 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15022 enum machine_mode mode =
15023 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15025 /* Do not handle DImode compares that go through special path. */
15026 if (mode == (TARGET_64BIT ? TImode : DImode))
15029 if (SCALAR_FLOAT_MODE_P (mode))
15031 rtx second_test = NULL, bypass_test = NULL;
15032 rtx compare_op, compare_seq;
15034 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15036 /* Shortcut: following common codes never translate
15037 into carry flag compares. */
15038 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15039 || code == ORDERED || code == UNORDERED)
15042 /* These comparisons require zero flag; swap operands so they won't. */
15043 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15044 && !TARGET_IEEE_FP)
15049 code = swap_condition (code);
15052 /* Try to expand the comparison and verify that we end up with
15053 carry flag based comparison. This fails to be true only when
15054 we decide to expand comparison using arithmetic that is not
15055 too common scenario. */
15057 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15058 &second_test, &bypass_test);
15059 compare_seq = get_insns ();
15062 if (second_test || bypass_test)
15065 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15066 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15067 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15069 code = GET_CODE (compare_op);
15071 if (code != LTU && code != GEU)
15074 emit_insn (compare_seq);
15079 if (!INTEGRAL_MODE_P (mode))
15088 /* Convert a==0 into (unsigned)a<1. */
15091 if (op1 != const0_rtx)
15094 code = (code == EQ ? LTU : GEU);
15097 /* Convert a>b into b<a or a>=b-1. */
15100 if (CONST_INT_P (op1))
15102 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15103 /* Bail out on overflow. We still can swap operands but that
15104 would force loading of the constant into register. */
15105 if (op1 == const0_rtx
15106 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15108 code = (code == GTU ? GEU : LTU);
15115 code = (code == GTU ? LTU : GEU);
15119 /* Convert a>=0 into (unsigned)a<0x80000000. */
15122 if (mode == DImode || op1 != const0_rtx)
15124 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15125 code = (code == LT ? GEU : LTU);
15129 if (mode == DImode || op1 != constm1_rtx)
15131 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15132 code = (code == LE ? GEU : LTU);
15138 /* Swapping operands may cause constant to appear as first operand. */
15139 if (!nonimmediate_operand (op0, VOIDmode))
15141 if (!can_create_pseudo_p ())
15143 op0 = force_reg (mode, op0);
15145 ix86_compare_op0 = op0;
15146 ix86_compare_op1 = op1;
15147 *pop = ix86_expand_compare (code, NULL, NULL);
15148 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15153 ix86_expand_int_movcc (rtx operands[])
15155 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15156 rtx compare_seq, compare_op;
15157 rtx second_test, bypass_test;
15158 enum machine_mode mode = GET_MODE (operands[0]);
15159 bool sign_bit_compare_p = false;;
15162 ix86_compare_op0 = XEXP (operands[1], 0);
15163 ix86_compare_op1 = XEXP (operands[1], 1);
15164 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15165 compare_seq = get_insns ();
15168 compare_code = GET_CODE (compare_op);
15170 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15171 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15172 sign_bit_compare_p = true;
15174 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15175 HImode insns, we'd be swallowed in word prefix ops. */
15177 if ((mode != HImode || TARGET_FAST_PREFIX)
15178 && (mode != (TARGET_64BIT ? TImode : DImode))
15179 && CONST_INT_P (operands[2])
15180 && CONST_INT_P (operands[3]))
15182 rtx out = operands[0];
15183 HOST_WIDE_INT ct = INTVAL (operands[2]);
15184 HOST_WIDE_INT cf = INTVAL (operands[3]);
15185 HOST_WIDE_INT diff;
15188 /* Sign bit compares are better done using shifts than we do by using
15190 if (sign_bit_compare_p
15191 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15192 ix86_compare_op1, &compare_op))
15194 /* Detect overlap between destination and compare sources. */
15197 if (!sign_bit_compare_p)
15199 bool fpcmp = false;
15201 compare_code = GET_CODE (compare_op);
15203 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15204 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15207 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15210 /* To simplify rest of code, restrict to the GEU case. */
15211 if (compare_code == LTU)
15213 HOST_WIDE_INT tmp = ct;
15216 compare_code = reverse_condition (compare_code);
15217 code = reverse_condition (code);
15222 PUT_CODE (compare_op,
15223 reverse_condition_maybe_unordered
15224 (GET_CODE (compare_op)));
15226 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15230 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15231 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15232 tmp = gen_reg_rtx (mode);
15234 if (mode == DImode)
15235 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15237 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15241 if (code == GT || code == GE)
15242 code = reverse_condition (code);
15245 HOST_WIDE_INT tmp = ct;
15250 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15251 ix86_compare_op1, VOIDmode, 0, -1);
15264 tmp = expand_simple_binop (mode, PLUS,
15266 copy_rtx (tmp), 1, OPTAB_DIRECT);
15277 tmp = expand_simple_binop (mode, IOR,
15279 copy_rtx (tmp), 1, OPTAB_DIRECT);
15281 else if (diff == -1 && ct)
15291 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15293 tmp = expand_simple_binop (mode, PLUS,
15294 copy_rtx (tmp), GEN_INT (cf),
15295 copy_rtx (tmp), 1, OPTAB_DIRECT);
15303 * andl cf - ct, dest
15313 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15316 tmp = expand_simple_binop (mode, AND,
15318 gen_int_mode (cf - ct, mode),
15319 copy_rtx (tmp), 1, OPTAB_DIRECT);
15321 tmp = expand_simple_binop (mode, PLUS,
15322 copy_rtx (tmp), GEN_INT (ct),
15323 copy_rtx (tmp), 1, OPTAB_DIRECT);
15326 if (!rtx_equal_p (tmp, out))
15327 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15329 return 1; /* DONE */
15334 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15337 tmp = ct, ct = cf, cf = tmp;
15340 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15342 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15344 /* We may be reversing unordered compare to normal compare, that
15345 is not valid in general (we may convert non-trapping condition
15346 to trapping one), however on i386 we currently emit all
15347 comparisons unordered. */
15348 compare_code = reverse_condition_maybe_unordered (compare_code);
15349 code = reverse_condition_maybe_unordered (code);
15353 compare_code = reverse_condition (compare_code);
15354 code = reverse_condition (code);
15358 compare_code = UNKNOWN;
15359 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15360 && CONST_INT_P (ix86_compare_op1))
15362 if (ix86_compare_op1 == const0_rtx
15363 && (code == LT || code == GE))
15364 compare_code = code;
15365 else if (ix86_compare_op1 == constm1_rtx)
15369 else if (code == GT)
15374 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15375 if (compare_code != UNKNOWN
15376 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15377 && (cf == -1 || ct == -1))
15379 /* If lea code below could be used, only optimize
15380 if it results in a 2 insn sequence. */
15382 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15383 || diff == 3 || diff == 5 || diff == 9)
15384 || (compare_code == LT && ct == -1)
15385 || (compare_code == GE && cf == -1))
15388 * notl op1 (if necessary)
15396 code = reverse_condition (code);
15399 out = emit_store_flag (out, code, ix86_compare_op0,
15400 ix86_compare_op1, VOIDmode, 0, -1);
15402 out = expand_simple_binop (mode, IOR,
15404 out, 1, OPTAB_DIRECT);
15405 if (out != operands[0])
15406 emit_move_insn (operands[0], out);
15408 return 1; /* DONE */
15413 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15414 || diff == 3 || diff == 5 || diff == 9)
15415 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15417 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15423 * lea cf(dest*(ct-cf)),dest
15427 * This also catches the degenerate setcc-only case.
15433 out = emit_store_flag (out, code, ix86_compare_op0,
15434 ix86_compare_op1, VOIDmode, 0, 1);
15437 /* On x86_64 the lea instruction operates on Pmode, so we need
15438 to get arithmetics done in proper mode to match. */
15440 tmp = copy_rtx (out);
15444 out1 = copy_rtx (out);
15445 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15449 tmp = gen_rtx_PLUS (mode, tmp, out1);
15455 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15458 if (!rtx_equal_p (tmp, out))
15461 out = force_operand (tmp, copy_rtx (out));
15463 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15465 if (!rtx_equal_p (out, operands[0]))
15466 emit_move_insn (operands[0], copy_rtx (out));
15468 return 1; /* DONE */
15472 * General case: Jumpful:
15473 * xorl dest,dest cmpl op1, op2
15474 * cmpl op1, op2 movl ct, dest
15475 * setcc dest jcc 1f
15476 * decl dest movl cf, dest
15477 * andl (cf-ct),dest 1:
15480 * Size 20. Size 14.
15482 * This is reasonably steep, but branch mispredict costs are
15483 * high on modern cpus, so consider failing only if optimizing
15487 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15488 && BRANCH_COST (optimize_insn_for_speed_p (),
15493 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15498 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15500 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15502 /* We may be reversing unordered compare to normal compare,
15503 that is not valid in general (we may convert non-trapping
15504 condition to trapping one), however on i386 we currently
15505 emit all comparisons unordered. */
15506 code = reverse_condition_maybe_unordered (code);
15510 code = reverse_condition (code);
15511 if (compare_code != UNKNOWN)
15512 compare_code = reverse_condition (compare_code);
15516 if (compare_code != UNKNOWN)
15518 /* notl op1 (if needed)
15523 For x < 0 (resp. x <= -1) there will be no notl,
15524 so if possible swap the constants to get rid of the
15526 True/false will be -1/0 while code below (store flag
15527 followed by decrement) is 0/-1, so the constants need
15528 to be exchanged once more. */
15530 if (compare_code == GE || !cf)
15532 code = reverse_condition (code);
15537 HOST_WIDE_INT tmp = cf;
15542 out = emit_store_flag (out, code, ix86_compare_op0,
15543 ix86_compare_op1, VOIDmode, 0, -1);
15547 out = emit_store_flag (out, code, ix86_compare_op0,
15548 ix86_compare_op1, VOIDmode, 0, 1);
15550 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15551 copy_rtx (out), 1, OPTAB_DIRECT);
15554 out = expand_simple_binop (mode, AND, copy_rtx (out),
15555 gen_int_mode (cf - ct, mode),
15556 copy_rtx (out), 1, OPTAB_DIRECT);
15558 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15559 copy_rtx (out), 1, OPTAB_DIRECT);
15560 if (!rtx_equal_p (out, operands[0]))
15561 emit_move_insn (operands[0], copy_rtx (out));
15563 return 1; /* DONE */
15567 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15569 /* Try a few things more with specific constants and a variable. */
15572 rtx var, orig_out, out, tmp;
15574 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15575 return 0; /* FAIL */
15577 /* If one of the two operands is an interesting constant, load a
15578 constant with the above and mask it in with a logical operation. */
15580 if (CONST_INT_P (operands[2]))
15583 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15584 operands[3] = constm1_rtx, op = and_optab;
15585 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15586 operands[3] = const0_rtx, op = ior_optab;
15588 return 0; /* FAIL */
15590 else if (CONST_INT_P (operands[3]))
15593 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15594 operands[2] = constm1_rtx, op = and_optab;
15595 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15596 operands[2] = const0_rtx, op = ior_optab;
15598 return 0; /* FAIL */
15601 return 0; /* FAIL */
15603 orig_out = operands[0];
15604 tmp = gen_reg_rtx (mode);
15607 /* Recurse to get the constant loaded. */
15608 if (ix86_expand_int_movcc (operands) == 0)
15609 return 0; /* FAIL */
15611 /* Mask in the interesting variable. */
15612 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15614 if (!rtx_equal_p (out, orig_out))
15615 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15617 return 1; /* DONE */
15621 * For comparison with above,
15631 if (! nonimmediate_operand (operands[2], mode))
15632 operands[2] = force_reg (mode, operands[2]);
15633 if (! nonimmediate_operand (operands[3], mode))
15634 operands[3] = force_reg (mode, operands[3]);
15636 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15638 rtx tmp = gen_reg_rtx (mode);
15639 emit_move_insn (tmp, operands[3]);
15642 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15644 rtx tmp = gen_reg_rtx (mode);
15645 emit_move_insn (tmp, operands[2]);
15649 if (! register_operand (operands[2], VOIDmode)
15651 || ! register_operand (operands[3], VOIDmode)))
15652 operands[2] = force_reg (mode, operands[2]);
15655 && ! register_operand (operands[3], VOIDmode))
15656 operands[3] = force_reg (mode, operands[3]);
15658 emit_insn (compare_seq);
15659 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15660 gen_rtx_IF_THEN_ELSE (mode,
15661 compare_op, operands[2],
15664 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15665 gen_rtx_IF_THEN_ELSE (mode,
15667 copy_rtx (operands[3]),
15668 copy_rtx (operands[0]))));
15670 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15671 gen_rtx_IF_THEN_ELSE (mode,
15673 copy_rtx (operands[2]),
15674 copy_rtx (operands[0]))));
15676 return 1; /* DONE */
15679 /* Swap, force into registers, or otherwise massage the two operands
15680 to an sse comparison with a mask result. Thus we differ a bit from
15681 ix86_prepare_fp_compare_args which expects to produce a flags result.
15683 The DEST operand exists to help determine whether to commute commutative
15684 operators. The POP0/POP1 operands are updated in place. The new
15685 comparison code is returned, or UNKNOWN if not implementable. */
15687 static enum rtx_code
15688 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15689 rtx *pop0, rtx *pop1)
15697 /* We have no LTGT as an operator. We could implement it with
15698 NE & ORDERED, but this requires an extra temporary. It's
15699 not clear that it's worth it. */
15706 /* These are supported directly. */
15713 /* For commutative operators, try to canonicalize the destination
15714 operand to be first in the comparison - this helps reload to
15715 avoid extra moves. */
15716 if (!dest || !rtx_equal_p (dest, *pop1))
15724 /* These are not supported directly. Swap the comparison operands
15725 to transform into something that is supported. */
15729 code = swap_condition (code);
15733 gcc_unreachable ();
15739 /* Detect conditional moves that exactly match min/max operational
15740 semantics. Note that this is IEEE safe, as long as we don't
15741 interchange the operands.
15743 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15744 and TRUE if the operation is successful and instructions are emitted. */
15747 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15748 rtx cmp_op1, rtx if_true, rtx if_false)
15750 enum machine_mode mode;
15756 else if (code == UNGE)
15759 if_true = if_false;
15765 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15767 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15772 mode = GET_MODE (dest);
15774 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15775 but MODE may be a vector mode and thus not appropriate. */
15776 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15778 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15781 if_true = force_reg (mode, if_true);
15782 v = gen_rtvec (2, if_true, if_false);
15783 tmp = gen_rtx_UNSPEC (mode, v, u);
15787 code = is_min ? SMIN : SMAX;
15788 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15791 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15795 /* Expand an sse vector comparison. Return the register with the result. */
15798 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15799 rtx op_true, rtx op_false)
15801 enum machine_mode mode = GET_MODE (dest);
15804 cmp_op0 = force_reg (mode, cmp_op0);
15805 if (!nonimmediate_operand (cmp_op1, mode))
15806 cmp_op1 = force_reg (mode, cmp_op1);
15809 || reg_overlap_mentioned_p (dest, op_true)
15810 || reg_overlap_mentioned_p (dest, op_false))
15811 dest = gen_reg_rtx (mode);
15813 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15814 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15819 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15820 operations. This is used for both scalar and vector conditional moves. */
15823 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15825 enum machine_mode mode = GET_MODE (dest);
15828 if (op_false == CONST0_RTX (mode))
15830 op_true = force_reg (mode, op_true);
15831 x = gen_rtx_AND (mode, cmp, op_true);
15832 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15834 else if (op_true == CONST0_RTX (mode))
15836 op_false = force_reg (mode, op_false);
15837 x = gen_rtx_NOT (mode, cmp);
15838 x = gen_rtx_AND (mode, x, op_false);
15839 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15841 else if (TARGET_SSE5)
15843 rtx pcmov = gen_rtx_SET (mode, dest,
15844 gen_rtx_IF_THEN_ELSE (mode, cmp,
15851 op_true = force_reg (mode, op_true);
15852 op_false = force_reg (mode, op_false);
15854 t2 = gen_reg_rtx (mode);
15856 t3 = gen_reg_rtx (mode);
15860 x = gen_rtx_AND (mode, op_true, cmp);
15861 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15863 x = gen_rtx_NOT (mode, cmp);
15864 x = gen_rtx_AND (mode, x, op_false);
15865 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15867 x = gen_rtx_IOR (mode, t3, t2);
15868 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15872 /* Expand a floating-point conditional move. Return true if successful. */
15875 ix86_expand_fp_movcc (rtx operands[])
15877 enum machine_mode mode = GET_MODE (operands[0]);
15878 enum rtx_code code = GET_CODE (operands[1]);
15879 rtx tmp, compare_op, second_test, bypass_test;
15881 ix86_compare_op0 = XEXP (operands[1], 0);
15882 ix86_compare_op1 = XEXP (operands[1], 1);
15883 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15885 enum machine_mode cmode;
15887 /* Since we've no cmove for sse registers, don't force bad register
15888 allocation just to gain access to it. Deny movcc when the
15889 comparison mode doesn't match the move mode. */
15890 cmode = GET_MODE (ix86_compare_op0);
15891 if (cmode == VOIDmode)
15892 cmode = GET_MODE (ix86_compare_op1);
15896 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15898 &ix86_compare_op1);
15899 if (code == UNKNOWN)
15902 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15903 ix86_compare_op1, operands[2],
15907 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15908 ix86_compare_op1, operands[2], operands[3]);
15909 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15913 /* The floating point conditional move instructions don't directly
15914 support conditions resulting from a signed integer comparison. */
15916 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15918 /* The floating point conditional move instructions don't directly
15919 support signed integer comparisons. */
15921 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15923 gcc_assert (!second_test && !bypass_test);
15924 tmp = gen_reg_rtx (QImode);
15925 ix86_expand_setcc (code, tmp);
15927 ix86_compare_op0 = tmp;
15928 ix86_compare_op1 = const0_rtx;
15929 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15931 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15933 tmp = gen_reg_rtx (mode);
15934 emit_move_insn (tmp, operands[3]);
15937 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15939 tmp = gen_reg_rtx (mode);
15940 emit_move_insn (tmp, operands[2]);
15944 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15945 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15946 operands[2], operands[3])));
15948 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15949 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15950 operands[3], operands[0])));
15952 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15953 gen_rtx_IF_THEN_ELSE (mode, second_test,
15954 operands[2], operands[0])));
15959 /* Expand a floating-point vector conditional move; a vcond operation
15960 rather than a movcc operation. */
15963 ix86_expand_fp_vcond (rtx operands[])
15965 enum rtx_code code = GET_CODE (operands[3]);
15968 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15969 &operands[4], &operands[5]);
15970 if (code == UNKNOWN)
15973 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15974 operands[5], operands[1], operands[2]))
15977 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15978 operands[1], operands[2]);
15979 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15983 /* Expand a signed/unsigned integral vector conditional move. */
15986 ix86_expand_int_vcond (rtx operands[])
15988 enum machine_mode mode = GET_MODE (operands[0]);
15989 enum rtx_code code = GET_CODE (operands[3]);
15990 bool negate = false;
15993 cop0 = operands[4];
15994 cop1 = operands[5];
15996 /* SSE5 supports all of the comparisons on all vector int types. */
15999 /* Canonicalize the comparison to EQ, GT, GTU. */
16010 code = reverse_condition (code);
16016 code = reverse_condition (code);
16022 code = swap_condition (code);
16023 x = cop0, cop0 = cop1, cop1 = x;
16027 gcc_unreachable ();
16030 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16031 if (mode == V2DImode)
16036 /* SSE4.1 supports EQ. */
16037 if (!TARGET_SSE4_1)
16043 /* SSE4.2 supports GT/GTU. */
16044 if (!TARGET_SSE4_2)
16049 gcc_unreachable ();
16053 /* Unsigned parallel compare is not supported by the hardware. Play some
16054 tricks to turn this into a signed comparison against 0. */
16057 cop0 = force_reg (mode, cop0);
16066 /* Perform a parallel modulo subtraction. */
16067 t1 = gen_reg_rtx (mode);
16068 emit_insn ((mode == V4SImode
16070 : gen_subv2di3) (t1, cop0, cop1));
16072 /* Extract the original sign bit of op0. */
16073 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16075 t2 = gen_reg_rtx (mode);
16076 emit_insn ((mode == V4SImode
16078 : gen_andv2di3) (t2, cop0, mask));
16080 /* XOR it back into the result of the subtraction. This results
16081 in the sign bit set iff we saw unsigned underflow. */
16082 x = gen_reg_rtx (mode);
16083 emit_insn ((mode == V4SImode
16085 : gen_xorv2di3) (x, t1, t2));
16093 /* Perform a parallel unsigned saturating subtraction. */
16094 x = gen_reg_rtx (mode);
16095 emit_insn (gen_rtx_SET (VOIDmode, x,
16096 gen_rtx_US_MINUS (mode, cop0, cop1)));
16103 gcc_unreachable ();
16107 cop1 = CONST0_RTX (mode);
16111 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16112 operands[1+negate], operands[2-negate]);
16114 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16115 operands[2-negate]);
16119 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16120 true if we should do zero extension, else sign extension. HIGH_P is
16121 true if we want the N/2 high elements, else the low elements. */
16124 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16126 enum machine_mode imode = GET_MODE (operands[1]);
16127 rtx (*unpack)(rtx, rtx, rtx);
16134 unpack = gen_vec_interleave_highv16qi;
16136 unpack = gen_vec_interleave_lowv16qi;
16140 unpack = gen_vec_interleave_highv8hi;
16142 unpack = gen_vec_interleave_lowv8hi;
16146 unpack = gen_vec_interleave_highv4si;
16148 unpack = gen_vec_interleave_lowv4si;
16151 gcc_unreachable ();
16154 dest = gen_lowpart (imode, operands[0]);
16157 se = force_reg (imode, CONST0_RTX (imode));
16159 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16160 operands[1], pc_rtx, pc_rtx);
16162 emit_insn (unpack (dest, operands[1], se));
16165 /* This function performs the same task as ix86_expand_sse_unpack,
16166 but with SSE4.1 instructions. */
16169 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16171 enum machine_mode imode = GET_MODE (operands[1]);
16172 rtx (*unpack)(rtx, rtx);
16179 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16181 unpack = gen_sse4_1_extendv8qiv8hi2;
16185 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16187 unpack = gen_sse4_1_extendv4hiv4si2;
16191 unpack = gen_sse4_1_zero_extendv2siv2di2;
16193 unpack = gen_sse4_1_extendv2siv2di2;
16196 gcc_unreachable ();
16199 dest = operands[0];
16202 /* Shift higher 8 bytes to lower 8 bytes. */
16203 src = gen_reg_rtx (imode);
16204 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16205 gen_lowpart (TImode, operands[1]),
16211 emit_insn (unpack (dest, src));
16214 /* This function performs the same task as ix86_expand_sse_unpack,
16215 but with sse5 instructions. */
16218 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16220 enum machine_mode imode = GET_MODE (operands[1]);
16221 int pperm_bytes[16];
16223 int h = (high_p) ? 8 : 0;
16226 rtvec v = rtvec_alloc (16);
16229 rtx op0 = operands[0], op1 = operands[1];
16234 vs = rtvec_alloc (8);
16235 h2 = (high_p) ? 8 : 0;
16236 for (i = 0; i < 8; i++)
16238 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16239 pperm_bytes[2*i+1] = ((unsigned_p)
16241 : PPERM_SIGN | PPERM_SRC2 | i | h);
16244 for (i = 0; i < 16; i++)
16245 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16247 for (i = 0; i < 8; i++)
16248 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16250 p = gen_rtx_PARALLEL (VOIDmode, vs);
16251 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16253 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16255 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16259 vs = rtvec_alloc (4);
16260 h2 = (high_p) ? 4 : 0;
16261 for (i = 0; i < 4; i++)
16263 sign_extend = ((unsigned_p)
16265 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16266 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16267 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16268 pperm_bytes[4*i+2] = sign_extend;
16269 pperm_bytes[4*i+3] = sign_extend;
16272 for (i = 0; i < 16; i++)
16273 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16275 for (i = 0; i < 4; i++)
16276 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16278 p = gen_rtx_PARALLEL (VOIDmode, vs);
16279 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16281 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16283 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16287 vs = rtvec_alloc (2);
16288 h2 = (high_p) ? 2 : 0;
16289 for (i = 0; i < 2; i++)
16291 sign_extend = ((unsigned_p)
16293 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16294 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16295 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16296 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16297 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16298 pperm_bytes[8*i+4] = sign_extend;
16299 pperm_bytes[8*i+5] = sign_extend;
16300 pperm_bytes[8*i+6] = sign_extend;
16301 pperm_bytes[8*i+7] = sign_extend;
16304 for (i = 0; i < 16; i++)
16305 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16307 for (i = 0; i < 2; i++)
16308 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16310 p = gen_rtx_PARALLEL (VOIDmode, vs);
16311 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16313 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16315 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16319 gcc_unreachable ();
16325 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16326 next narrower integer vector type */
16328 ix86_expand_sse5_pack (rtx operands[3])
16330 enum machine_mode imode = GET_MODE (operands[0]);
16331 int pperm_bytes[16];
16333 rtvec v = rtvec_alloc (16);
16335 rtx op0 = operands[0];
16336 rtx op1 = operands[1];
16337 rtx op2 = operands[2];
16342 for (i = 0; i < 8; i++)
16344 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16345 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16348 for (i = 0; i < 16; i++)
16349 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16351 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16352 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16356 for (i = 0; i < 4; i++)
16358 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16359 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16360 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16361 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16364 for (i = 0; i < 16; i++)
16365 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16367 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16368 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16372 for (i = 0; i < 2; i++)
16374 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16375 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16376 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16377 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16378 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16379 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16380 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16381 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16384 for (i = 0; i < 16; i++)
16385 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16387 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16388 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16392 gcc_unreachable ();
16398 /* Expand conditional increment or decrement using adb/sbb instructions.
16399 The default case using setcc followed by the conditional move can be
16400 done by generic code. */
16402 ix86_expand_int_addcc (rtx operands[])
16404 enum rtx_code code = GET_CODE (operands[1]);
16406 rtx val = const0_rtx;
16407 bool fpcmp = false;
16408 enum machine_mode mode = GET_MODE (operands[0]);
16410 ix86_compare_op0 = XEXP (operands[1], 0);
16411 ix86_compare_op1 = XEXP (operands[1], 1);
16412 if (operands[3] != const1_rtx
16413 && operands[3] != constm1_rtx)
16415 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16416 ix86_compare_op1, &compare_op))
16418 code = GET_CODE (compare_op);
16420 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16421 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16424 code = ix86_fp_compare_code_to_integer (code);
16431 PUT_CODE (compare_op,
16432 reverse_condition_maybe_unordered
16433 (GET_CODE (compare_op)));
16435 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16437 PUT_MODE (compare_op, mode);
16439 /* Construct either adc or sbb insn. */
16440 if ((code == LTU) == (operands[3] == constm1_rtx))
16442 switch (GET_MODE (operands[0]))
16445 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16448 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16451 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16454 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16457 gcc_unreachable ();
16462 switch (GET_MODE (operands[0]))
16465 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16468 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16471 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16474 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16477 gcc_unreachable ();
16480 return 1; /* DONE */
16484 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16485 works for floating pointer parameters and nonoffsetable memories.
16486 For pushes, it returns just stack offsets; the values will be saved
16487 in the right order. Maximally three parts are generated. */
16490 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16495 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16497 size = (GET_MODE_SIZE (mode) + 4) / 8;
16499 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16500 gcc_assert (size >= 2 && size <= 4);
16502 /* Optimize constant pool reference to immediates. This is used by fp
16503 moves, that force all constants to memory to allow combining. */
16504 if (MEM_P (operand) && MEM_READONLY_P (operand))
16506 rtx tmp = maybe_get_pool_constant (operand);
16511 if (MEM_P (operand) && !offsettable_memref_p (operand))
16513 /* The only non-offsetable memories we handle are pushes. */
16514 int ok = push_operand (operand, VOIDmode);
16518 operand = copy_rtx (operand);
16519 PUT_MODE (operand, Pmode);
16520 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16524 if (GET_CODE (operand) == CONST_VECTOR)
16526 enum machine_mode imode = int_mode_for_mode (mode);
16527 /* Caution: if we looked through a constant pool memory above,
16528 the operand may actually have a different mode now. That's
16529 ok, since we want to pun this all the way back to an integer. */
16530 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16531 gcc_assert (operand != NULL);
16537 if (mode == DImode)
16538 split_di (&operand, 1, &parts[0], &parts[1]);
16543 if (REG_P (operand))
16545 gcc_assert (reload_completed);
16546 for (i = 0; i < size; i++)
16547 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16549 else if (offsettable_memref_p (operand))
16551 operand = adjust_address (operand, SImode, 0);
16552 parts[0] = operand;
16553 for (i = 1; i < size; i++)
16554 parts[i] = adjust_address (operand, SImode, 4 * i);
16556 else if (GET_CODE (operand) == CONST_DOUBLE)
16561 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16565 real_to_target (l, &r, mode);
16566 parts[3] = gen_int_mode (l[3], SImode);
16567 parts[2] = gen_int_mode (l[2], SImode);
16570 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16571 parts[2] = gen_int_mode (l[2], SImode);
16574 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16577 gcc_unreachable ();
16579 parts[1] = gen_int_mode (l[1], SImode);
16580 parts[0] = gen_int_mode (l[0], SImode);
16583 gcc_unreachable ();
16588 if (mode == TImode)
16589 split_ti (&operand, 1, &parts[0], &parts[1]);
16590 if (mode == XFmode || mode == TFmode)
16592 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16593 if (REG_P (operand))
16595 gcc_assert (reload_completed);
16596 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16597 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16599 else if (offsettable_memref_p (operand))
16601 operand = adjust_address (operand, DImode, 0);
16602 parts[0] = operand;
16603 parts[1] = adjust_address (operand, upper_mode, 8);
16605 else if (GET_CODE (operand) == CONST_DOUBLE)
16610 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16611 real_to_target (l, &r, mode);
16613 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16614 if (HOST_BITS_PER_WIDE_INT >= 64)
16617 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16618 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16621 parts[0] = immed_double_const (l[0], l[1], DImode);
16623 if (upper_mode == SImode)
16624 parts[1] = gen_int_mode (l[2], SImode);
16625 else if (HOST_BITS_PER_WIDE_INT >= 64)
16628 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16629 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16632 parts[1] = immed_double_const (l[2], l[3], DImode);
16635 gcc_unreachable ();
16642 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16643 Return false when normal moves are needed; true when all required
16644 insns have been emitted. Operands 2-4 contain the input values
16645 int the correct order; operands 5-7 contain the output values. */
16648 ix86_split_long_move (rtx operands[])
16653 int collisions = 0;
16654 enum machine_mode mode = GET_MODE (operands[0]);
16655 bool collisionparts[4];
16657 /* The DFmode expanders may ask us to move double.
16658 For 64bit target this is single move. By hiding the fact
16659 here we simplify i386.md splitters. */
16660 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16662 /* Optimize constant pool reference to immediates. This is used by
16663 fp moves, that force all constants to memory to allow combining. */
16665 if (MEM_P (operands[1])
16666 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16667 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16668 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16669 if (push_operand (operands[0], VOIDmode))
16671 operands[0] = copy_rtx (operands[0]);
16672 PUT_MODE (operands[0], Pmode);
16675 operands[0] = gen_lowpart (DImode, operands[0]);
16676 operands[1] = gen_lowpart (DImode, operands[1]);
16677 emit_move_insn (operands[0], operands[1]);
16681 /* The only non-offsettable memory we handle is push. */
16682 if (push_operand (operands[0], VOIDmode))
16685 gcc_assert (!MEM_P (operands[0])
16686 || offsettable_memref_p (operands[0]));
16688 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16689 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16691 /* When emitting push, take care for source operands on the stack. */
16692 if (push && MEM_P (operands[1])
16693 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16694 for (i = 0; i < nparts - 1; i++)
16695 part[1][i] = change_address (part[1][i],
16696 GET_MODE (part[1][i]),
16697 XEXP (part[1][i + 1], 0));
16699 /* We need to do copy in the right order in case an address register
16700 of the source overlaps the destination. */
16701 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16705 for (i = 0; i < nparts; i++)
16708 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16709 if (collisionparts[i])
16713 /* Collision in the middle part can be handled by reordering. */
16714 if (collisions == 1 && nparts == 3 && collisionparts [1])
16716 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16717 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16719 else if (collisions == 1
16721 && (collisionparts [1] || collisionparts [2]))
16723 if (collisionparts [1])
16725 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16726 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16730 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16731 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16735 /* If there are more collisions, we can't handle it by reordering.
16736 Do an lea to the last part and use only one colliding move. */
16737 else if (collisions > 1)
16743 base = part[0][nparts - 1];
16745 /* Handle the case when the last part isn't valid for lea.
16746 Happens in 64-bit mode storing the 12-byte XFmode. */
16747 if (GET_MODE (base) != Pmode)
16748 base = gen_rtx_REG (Pmode, REGNO (base));
16750 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16751 part[1][0] = replace_equiv_address (part[1][0], base);
16752 for (i = 1; i < nparts; i++)
16754 tmp = plus_constant (base, UNITS_PER_WORD * i);
16755 part[1][i] = replace_equiv_address (part[1][i], tmp);
16766 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16767 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16768 emit_move_insn (part[0][2], part[1][2]);
16770 else if (nparts == 4)
16772 emit_move_insn (part[0][3], part[1][3]);
16773 emit_move_insn (part[0][2], part[1][2]);
16778 /* In 64bit mode we don't have 32bit push available. In case this is
16779 register, it is OK - we will just use larger counterpart. We also
16780 retype memory - these comes from attempt to avoid REX prefix on
16781 moving of second half of TFmode value. */
16782 if (GET_MODE (part[1][1]) == SImode)
16784 switch (GET_CODE (part[1][1]))
16787 part[1][1] = adjust_address (part[1][1], DImode, 0);
16791 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16795 gcc_unreachable ();
16798 if (GET_MODE (part[1][0]) == SImode)
16799 part[1][0] = part[1][1];
16802 emit_move_insn (part[0][1], part[1][1]);
16803 emit_move_insn (part[0][0], part[1][0]);
16807 /* Choose correct order to not overwrite the source before it is copied. */
16808 if ((REG_P (part[0][0])
16809 && REG_P (part[1][1])
16810 && (REGNO (part[0][0]) == REGNO (part[1][1])
16812 && REGNO (part[0][0]) == REGNO (part[1][2]))
16814 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16816 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16818 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16820 operands[2 + i] = part[0][j];
16821 operands[6 + i] = part[1][j];
16826 for (i = 0; i < nparts; i++)
16828 operands[2 + i] = part[0][i];
16829 operands[6 + i] = part[1][i];
16833 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16834 if (optimize_insn_for_size_p ())
16836 for (j = 0; j < nparts - 1; j++)
16837 if (CONST_INT_P (operands[6 + j])
16838 && operands[6 + j] != const0_rtx
16839 && REG_P (operands[2 + j]))
16840 for (i = j; i < nparts - 1; i++)
16841 if (CONST_INT_P (operands[7 + i])
16842 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16843 operands[7 + i] = operands[2 + j];
16846 for (i = 0; i < nparts; i++)
16847 emit_move_insn (operands[2 + i], operands[6 + i]);
16852 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16853 left shift by a constant, either using a single shift or
16854 a sequence of add instructions. */
16857 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16861 emit_insn ((mode == DImode
16863 : gen_adddi3) (operand, operand, operand));
16865 else if (!optimize_insn_for_size_p ()
16866 && count * ix86_cost->add <= ix86_cost->shift_const)
16869 for (i=0; i<count; i++)
16871 emit_insn ((mode == DImode
16873 : gen_adddi3) (operand, operand, operand));
16877 emit_insn ((mode == DImode
16879 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16883 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16885 rtx low[2], high[2];
16887 const int single_width = mode == DImode ? 32 : 64;
16889 if (CONST_INT_P (operands[2]))
16891 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16892 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16894 if (count >= single_width)
16896 emit_move_insn (high[0], low[1]);
16897 emit_move_insn (low[0], const0_rtx);
16899 if (count > single_width)
16900 ix86_expand_ashl_const (high[0], count - single_width, mode);
16904 if (!rtx_equal_p (operands[0], operands[1]))
16905 emit_move_insn (operands[0], operands[1]);
16906 emit_insn ((mode == DImode
16908 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16909 ix86_expand_ashl_const (low[0], count, mode);
16914 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16916 if (operands[1] == const1_rtx)
16918 /* Assuming we've chosen a QImode capable registers, then 1 << N
16919 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16920 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16922 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16924 ix86_expand_clear (low[0]);
16925 ix86_expand_clear (high[0]);
16926 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16928 d = gen_lowpart (QImode, low[0]);
16929 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16930 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16931 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16933 d = gen_lowpart (QImode, high[0]);
16934 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16935 s = gen_rtx_NE (QImode, flags, const0_rtx);
16936 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16939 /* Otherwise, we can get the same results by manually performing
16940 a bit extract operation on bit 5/6, and then performing the two
16941 shifts. The two methods of getting 0/1 into low/high are exactly
16942 the same size. Avoiding the shift in the bit extract case helps
16943 pentium4 a bit; no one else seems to care much either way. */
16948 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16949 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16951 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16952 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16954 emit_insn ((mode == DImode
16956 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16957 emit_insn ((mode == DImode
16959 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16960 emit_move_insn (low[0], high[0]);
16961 emit_insn ((mode == DImode
16963 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16966 emit_insn ((mode == DImode
16968 : gen_ashldi3) (low[0], low[0], operands[2]));
16969 emit_insn ((mode == DImode
16971 : gen_ashldi3) (high[0], high[0], operands[2]));
16975 if (operands[1] == constm1_rtx)
16977 /* For -1 << N, we can avoid the shld instruction, because we
16978 know that we're shifting 0...31/63 ones into a -1. */
16979 emit_move_insn (low[0], constm1_rtx);
16980 if (optimize_insn_for_size_p ())
16981 emit_move_insn (high[0], low[0]);
16983 emit_move_insn (high[0], constm1_rtx);
16987 if (!rtx_equal_p (operands[0], operands[1]))
16988 emit_move_insn (operands[0], operands[1]);
16990 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16991 emit_insn ((mode == DImode
16993 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16996 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16998 if (TARGET_CMOVE && scratch)
17000 ix86_expand_clear (scratch);
17001 emit_insn ((mode == DImode
17002 ? gen_x86_shift_adj_1
17003 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17007 emit_insn ((mode == DImode
17008 ? gen_x86_shift_adj_2
17009 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17013 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17015 rtx low[2], high[2];
17017 const int single_width = mode == DImode ? 32 : 64;
17019 if (CONST_INT_P (operands[2]))
17021 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17022 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17024 if (count == single_width * 2 - 1)
17026 emit_move_insn (high[0], high[1]);
17027 emit_insn ((mode == DImode
17029 : gen_ashrdi3) (high[0], high[0],
17030 GEN_INT (single_width - 1)));
17031 emit_move_insn (low[0], high[0]);
17034 else if (count >= single_width)
17036 emit_move_insn (low[0], high[1]);
17037 emit_move_insn (high[0], low[0]);
17038 emit_insn ((mode == DImode
17040 : gen_ashrdi3) (high[0], high[0],
17041 GEN_INT (single_width - 1)));
17042 if (count > single_width)
17043 emit_insn ((mode == DImode
17045 : gen_ashrdi3) (low[0], low[0],
17046 GEN_INT (count - single_width)));
17050 if (!rtx_equal_p (operands[0], operands[1]))
17051 emit_move_insn (operands[0], operands[1]);
17052 emit_insn ((mode == DImode
17054 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17055 emit_insn ((mode == DImode
17057 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17062 if (!rtx_equal_p (operands[0], operands[1]))
17063 emit_move_insn (operands[0], operands[1]);
17065 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17067 emit_insn ((mode == DImode
17069 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17070 emit_insn ((mode == DImode
17072 : gen_ashrdi3) (high[0], high[0], operands[2]));
17074 if (TARGET_CMOVE && scratch)
17076 emit_move_insn (scratch, high[0]);
17077 emit_insn ((mode == DImode
17079 : gen_ashrdi3) (scratch, scratch,
17080 GEN_INT (single_width - 1)));
17081 emit_insn ((mode == DImode
17082 ? gen_x86_shift_adj_1
17083 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17087 emit_insn ((mode == DImode
17088 ? gen_x86_shift_adj_3
17089 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17094 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17096 rtx low[2], high[2];
17098 const int single_width = mode == DImode ? 32 : 64;
17100 if (CONST_INT_P (operands[2]))
17102 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17103 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17105 if (count >= single_width)
17107 emit_move_insn (low[0], high[1]);
17108 ix86_expand_clear (high[0]);
17110 if (count > single_width)
17111 emit_insn ((mode == DImode
17113 : gen_lshrdi3) (low[0], low[0],
17114 GEN_INT (count - single_width)));
17118 if (!rtx_equal_p (operands[0], operands[1]))
17119 emit_move_insn (operands[0], operands[1]);
17120 emit_insn ((mode == DImode
17122 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17123 emit_insn ((mode == DImode
17125 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17130 if (!rtx_equal_p (operands[0], operands[1]))
17131 emit_move_insn (operands[0], operands[1]);
17133 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17135 emit_insn ((mode == DImode
17137 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17138 emit_insn ((mode == DImode
17140 : gen_lshrdi3) (high[0], high[0], operands[2]));
17142 /* Heh. By reversing the arguments, we can reuse this pattern. */
17143 if (TARGET_CMOVE && scratch)
17145 ix86_expand_clear (scratch);
17146 emit_insn ((mode == DImode
17147 ? gen_x86_shift_adj_1
17148 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17152 emit_insn ((mode == DImode
17153 ? gen_x86_shift_adj_2
17154 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17158 /* Predict just emitted jump instruction to be taken with probability PROB. */
17160 predict_jump (int prob)
17162 rtx insn = get_last_insn ();
17163 gcc_assert (JUMP_P (insn));
17164 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17167 /* Helper function for the string operations below. Dest VARIABLE whether
17168 it is aligned to VALUE bytes. If true, jump to the label. */
17170 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17172 rtx label = gen_label_rtx ();
17173 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17174 if (GET_MODE (variable) == DImode)
17175 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17177 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17178 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17181 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17183 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17187 /* Adjust COUNTER by the VALUE. */
17189 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17191 if (GET_MODE (countreg) == DImode)
17192 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17194 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17197 /* Zero extend possibly SImode EXP to Pmode register. */
17199 ix86_zero_extend_to_Pmode (rtx exp)
17202 if (GET_MODE (exp) == VOIDmode)
17203 return force_reg (Pmode, exp);
17204 if (GET_MODE (exp) == Pmode)
17205 return copy_to_mode_reg (Pmode, exp);
17206 r = gen_reg_rtx (Pmode);
17207 emit_insn (gen_zero_extendsidi2 (r, exp));
17211 /* Divide COUNTREG by SCALE. */
17213 scale_counter (rtx countreg, int scale)
17216 rtx piece_size_mask;
17220 if (CONST_INT_P (countreg))
17221 return GEN_INT (INTVAL (countreg) / scale);
17222 gcc_assert (REG_P (countreg));
17224 piece_size_mask = GEN_INT (scale - 1);
17225 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17226 GEN_INT (exact_log2 (scale)),
17227 NULL, 1, OPTAB_DIRECT);
17231 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17232 DImode for constant loop counts. */
17234 static enum machine_mode
17235 counter_mode (rtx count_exp)
17237 if (GET_MODE (count_exp) != VOIDmode)
17238 return GET_MODE (count_exp);
17239 if (!CONST_INT_P (count_exp))
17241 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17246 /* When SRCPTR is non-NULL, output simple loop to move memory
17247 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17248 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17249 equivalent loop to set memory by VALUE (supposed to be in MODE).
17251 The size is rounded down to whole number of chunk size moved at once.
17252 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17256 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17257 rtx destptr, rtx srcptr, rtx value,
17258 rtx count, enum machine_mode mode, int unroll,
17261 rtx out_label, top_label, iter, tmp;
17262 enum machine_mode iter_mode = counter_mode (count);
17263 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17264 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17270 top_label = gen_label_rtx ();
17271 out_label = gen_label_rtx ();
17272 iter = gen_reg_rtx (iter_mode);
17274 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17275 NULL, 1, OPTAB_DIRECT);
17276 /* Those two should combine. */
17277 if (piece_size == const1_rtx)
17279 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17281 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17283 emit_move_insn (iter, const0_rtx);
17285 emit_label (top_label);
17287 tmp = convert_modes (Pmode, iter_mode, iter, true);
17288 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17289 destmem = change_address (destmem, mode, x_addr);
17293 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17294 srcmem = change_address (srcmem, mode, y_addr);
17296 /* When unrolling for chips that reorder memory reads and writes,
17297 we can save registers by using single temporary.
17298 Also using 4 temporaries is overkill in 32bit mode. */
17299 if (!TARGET_64BIT && 0)
17301 for (i = 0; i < unroll; i++)
17306 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17308 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17310 emit_move_insn (destmem, srcmem);
17316 gcc_assert (unroll <= 4);
17317 for (i = 0; i < unroll; i++)
17319 tmpreg[i] = gen_reg_rtx (mode);
17323 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17325 emit_move_insn (tmpreg[i], srcmem);
17327 for (i = 0; i < unroll; i++)
17332 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17334 emit_move_insn (destmem, tmpreg[i]);
17339 for (i = 0; i < unroll; i++)
17343 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17344 emit_move_insn (destmem, value);
17347 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17348 true, OPTAB_LIB_WIDEN);
17350 emit_move_insn (iter, tmp);
17352 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17354 if (expected_size != -1)
17356 expected_size /= GET_MODE_SIZE (mode) * unroll;
17357 if (expected_size == 0)
17359 else if (expected_size > REG_BR_PROB_BASE)
17360 predict_jump (REG_BR_PROB_BASE - 1);
17362 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17365 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17366 iter = ix86_zero_extend_to_Pmode (iter);
17367 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17368 true, OPTAB_LIB_WIDEN);
17369 if (tmp != destptr)
17370 emit_move_insn (destptr, tmp);
17373 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17374 true, OPTAB_LIB_WIDEN);
17376 emit_move_insn (srcptr, tmp);
17378 emit_label (out_label);
17381 /* Output "rep; mov" instruction.
17382 Arguments have same meaning as for previous function */
17384 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17385 rtx destptr, rtx srcptr,
17387 enum machine_mode mode)
17393 /* If the size is known, it is shorter to use rep movs. */
17394 if (mode == QImode && CONST_INT_P (count)
17395 && !(INTVAL (count) & 3))
17398 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17399 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17400 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17401 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17402 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17403 if (mode != QImode)
17405 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17406 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17407 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17408 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17409 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17410 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17414 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17415 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17417 if (CONST_INT_P (count))
17419 count = GEN_INT (INTVAL (count)
17420 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17421 destmem = shallow_copy_rtx (destmem);
17422 srcmem = shallow_copy_rtx (srcmem);
17423 set_mem_size (destmem, count);
17424 set_mem_size (srcmem, count);
17428 if (MEM_SIZE (destmem))
17429 set_mem_size (destmem, NULL_RTX);
17430 if (MEM_SIZE (srcmem))
17431 set_mem_size (srcmem, NULL_RTX);
17433 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17437 /* Output "rep; stos" instruction.
17438 Arguments have same meaning as for previous function */
17440 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17441 rtx count, enum machine_mode mode,
17447 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17448 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17449 value = force_reg (mode, gen_lowpart (mode, value));
17450 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17451 if (mode != QImode)
17453 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17454 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17455 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17458 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17459 if (orig_value == const0_rtx && CONST_INT_P (count))
17461 count = GEN_INT (INTVAL (count)
17462 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17463 destmem = shallow_copy_rtx (destmem);
17464 set_mem_size (destmem, count);
17466 else if (MEM_SIZE (destmem))
17467 set_mem_size (destmem, NULL_RTX);
17468 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17472 emit_strmov (rtx destmem, rtx srcmem,
17473 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17475 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17476 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17477 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17480 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17482 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17483 rtx destptr, rtx srcptr, rtx count, int max_size)
17486 if (CONST_INT_P (count))
17488 HOST_WIDE_INT countval = INTVAL (count);
17491 if ((countval & 0x10) && max_size > 16)
17495 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17496 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17499 gcc_unreachable ();
17502 if ((countval & 0x08) && max_size > 8)
17505 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17508 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17509 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17513 if ((countval & 0x04) && max_size > 4)
17515 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17518 if ((countval & 0x02) && max_size > 2)
17520 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17523 if ((countval & 0x01) && max_size > 1)
17525 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17532 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17533 count, 1, OPTAB_DIRECT);
17534 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17535 count, QImode, 1, 4);
17539 /* When there are stringops, we can cheaply increase dest and src pointers.
17540 Otherwise we save code size by maintaining offset (zero is readily
17541 available from preceding rep operation) and using x86 addressing modes.
17543 if (TARGET_SINGLE_STRINGOP)
17547 rtx label = ix86_expand_aligntest (count, 4, true);
17548 src = change_address (srcmem, SImode, srcptr);
17549 dest = change_address (destmem, SImode, destptr);
17550 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17551 emit_label (label);
17552 LABEL_NUSES (label) = 1;
17556 rtx label = ix86_expand_aligntest (count, 2, true);
17557 src = change_address (srcmem, HImode, srcptr);
17558 dest = change_address (destmem, HImode, destptr);
17559 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17560 emit_label (label);
17561 LABEL_NUSES (label) = 1;
17565 rtx label = ix86_expand_aligntest (count, 1, true);
17566 src = change_address (srcmem, QImode, srcptr);
17567 dest = change_address (destmem, QImode, destptr);
17568 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17569 emit_label (label);
17570 LABEL_NUSES (label) = 1;
17575 rtx offset = force_reg (Pmode, const0_rtx);
17580 rtx label = ix86_expand_aligntest (count, 4, true);
17581 src = change_address (srcmem, SImode, srcptr);
17582 dest = change_address (destmem, SImode, destptr);
17583 emit_move_insn (dest, src);
17584 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17585 true, OPTAB_LIB_WIDEN);
17587 emit_move_insn (offset, tmp);
17588 emit_label (label);
17589 LABEL_NUSES (label) = 1;
17593 rtx label = ix86_expand_aligntest (count, 2, true);
17594 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17595 src = change_address (srcmem, HImode, tmp);
17596 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17597 dest = change_address (destmem, HImode, tmp);
17598 emit_move_insn (dest, src);
17599 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17600 true, OPTAB_LIB_WIDEN);
17602 emit_move_insn (offset, tmp);
17603 emit_label (label);
17604 LABEL_NUSES (label) = 1;
17608 rtx label = ix86_expand_aligntest (count, 1, true);
17609 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17610 src = change_address (srcmem, QImode, tmp);
17611 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17612 dest = change_address (destmem, QImode, tmp);
17613 emit_move_insn (dest, src);
17614 emit_label (label);
17615 LABEL_NUSES (label) = 1;
17620 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17622 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17623 rtx count, int max_size)
17626 expand_simple_binop (counter_mode (count), AND, count,
17627 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17628 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17629 gen_lowpart (QImode, value), count, QImode,
17633 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17635 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17639 if (CONST_INT_P (count))
17641 HOST_WIDE_INT countval = INTVAL (count);
17644 if ((countval & 0x10) && max_size > 16)
17648 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17649 emit_insn (gen_strset (destptr, dest, value));
17650 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17651 emit_insn (gen_strset (destptr, dest, value));
17654 gcc_unreachable ();
17657 if ((countval & 0x08) && max_size > 8)
17661 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17662 emit_insn (gen_strset (destptr, dest, value));
17666 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17667 emit_insn (gen_strset (destptr, dest, value));
17668 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17669 emit_insn (gen_strset (destptr, dest, value));
17673 if ((countval & 0x04) && max_size > 4)
17675 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17676 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17679 if ((countval & 0x02) && max_size > 2)
17681 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17682 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17685 if ((countval & 0x01) && max_size > 1)
17687 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17688 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17695 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17700 rtx label = ix86_expand_aligntest (count, 16, true);
17703 dest = change_address (destmem, DImode, destptr);
17704 emit_insn (gen_strset (destptr, dest, value));
17705 emit_insn (gen_strset (destptr, dest, value));
17709 dest = change_address (destmem, SImode, destptr);
17710 emit_insn (gen_strset (destptr, dest, value));
17711 emit_insn (gen_strset (destptr, dest, value));
17712 emit_insn (gen_strset (destptr, dest, value));
17713 emit_insn (gen_strset (destptr, dest, value));
17715 emit_label (label);
17716 LABEL_NUSES (label) = 1;
17720 rtx label = ix86_expand_aligntest (count, 8, true);
17723 dest = change_address (destmem, DImode, destptr);
17724 emit_insn (gen_strset (destptr, dest, value));
17728 dest = change_address (destmem, SImode, destptr);
17729 emit_insn (gen_strset (destptr, dest, value));
17730 emit_insn (gen_strset (destptr, dest, value));
17732 emit_label (label);
17733 LABEL_NUSES (label) = 1;
17737 rtx label = ix86_expand_aligntest (count, 4, true);
17738 dest = change_address (destmem, SImode, destptr);
17739 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17740 emit_label (label);
17741 LABEL_NUSES (label) = 1;
17745 rtx label = ix86_expand_aligntest (count, 2, true);
17746 dest = change_address (destmem, HImode, destptr);
17747 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17748 emit_label (label);
17749 LABEL_NUSES (label) = 1;
17753 rtx label = ix86_expand_aligntest (count, 1, true);
17754 dest = change_address (destmem, QImode, destptr);
17755 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17756 emit_label (label);
17757 LABEL_NUSES (label) = 1;
17761 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17762 DESIRED_ALIGNMENT. */
17764 expand_movmem_prologue (rtx destmem, rtx srcmem,
17765 rtx destptr, rtx srcptr, rtx count,
17766 int align, int desired_alignment)
17768 if (align <= 1 && desired_alignment > 1)
17770 rtx label = ix86_expand_aligntest (destptr, 1, false);
17771 srcmem = change_address (srcmem, QImode, srcptr);
17772 destmem = change_address (destmem, QImode, destptr);
17773 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17774 ix86_adjust_counter (count, 1);
17775 emit_label (label);
17776 LABEL_NUSES (label) = 1;
17778 if (align <= 2 && desired_alignment > 2)
17780 rtx label = ix86_expand_aligntest (destptr, 2, false);
17781 srcmem = change_address (srcmem, HImode, srcptr);
17782 destmem = change_address (destmem, HImode, destptr);
17783 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17784 ix86_adjust_counter (count, 2);
17785 emit_label (label);
17786 LABEL_NUSES (label) = 1;
17788 if (align <= 4 && desired_alignment > 4)
17790 rtx label = ix86_expand_aligntest (destptr, 4, false);
17791 srcmem = change_address (srcmem, SImode, srcptr);
17792 destmem = change_address (destmem, SImode, destptr);
17793 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17794 ix86_adjust_counter (count, 4);
17795 emit_label (label);
17796 LABEL_NUSES (label) = 1;
17798 gcc_assert (desired_alignment <= 8);
17801 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17802 ALIGN_BYTES is how many bytes need to be copied. */
17804 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17805 int desired_align, int align_bytes)
17808 rtx src_size, dst_size;
17810 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17811 if (src_align_bytes >= 0)
17812 src_align_bytes = desired_align - src_align_bytes;
17813 src_size = MEM_SIZE (src);
17814 dst_size = MEM_SIZE (dst);
17815 if (align_bytes & 1)
17817 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17818 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17820 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17822 if (align_bytes & 2)
17824 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17825 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17826 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17827 set_mem_align (dst, 2 * BITS_PER_UNIT);
17828 if (src_align_bytes >= 0
17829 && (src_align_bytes & 1) == (align_bytes & 1)
17830 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17831 set_mem_align (src, 2 * BITS_PER_UNIT);
17833 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17835 if (align_bytes & 4)
17837 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17838 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17839 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17840 set_mem_align (dst, 4 * BITS_PER_UNIT);
17841 if (src_align_bytes >= 0)
17843 unsigned int src_align = 0;
17844 if ((src_align_bytes & 3) == (align_bytes & 3))
17846 else if ((src_align_bytes & 1) == (align_bytes & 1))
17848 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17849 set_mem_align (src, src_align * BITS_PER_UNIT);
17852 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17854 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17855 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17856 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17857 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17858 if (src_align_bytes >= 0)
17860 unsigned int src_align = 0;
17861 if ((src_align_bytes & 7) == (align_bytes & 7))
17863 else if ((src_align_bytes & 3) == (align_bytes & 3))
17865 else if ((src_align_bytes & 1) == (align_bytes & 1))
17867 if (src_align > (unsigned int) desired_align)
17868 src_align = desired_align;
17869 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17870 set_mem_align (src, src_align * BITS_PER_UNIT);
17873 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17875 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17880 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17881 DESIRED_ALIGNMENT. */
17883 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17884 int align, int desired_alignment)
17886 if (align <= 1 && desired_alignment > 1)
17888 rtx label = ix86_expand_aligntest (destptr, 1, false);
17889 destmem = change_address (destmem, QImode, destptr);
17890 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17891 ix86_adjust_counter (count, 1);
17892 emit_label (label);
17893 LABEL_NUSES (label) = 1;
17895 if (align <= 2 && desired_alignment > 2)
17897 rtx label = ix86_expand_aligntest (destptr, 2, false);
17898 destmem = change_address (destmem, HImode, destptr);
17899 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17900 ix86_adjust_counter (count, 2);
17901 emit_label (label);
17902 LABEL_NUSES (label) = 1;
17904 if (align <= 4 && desired_alignment > 4)
17906 rtx label = ix86_expand_aligntest (destptr, 4, false);
17907 destmem = change_address (destmem, SImode, destptr);
17908 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17909 ix86_adjust_counter (count, 4);
17910 emit_label (label);
17911 LABEL_NUSES (label) = 1;
17913 gcc_assert (desired_alignment <= 8);
17916 /* Set enough from DST to align DST known to by aligned by ALIGN to
17917 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17919 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17920 int desired_align, int align_bytes)
17923 rtx dst_size = MEM_SIZE (dst);
17924 if (align_bytes & 1)
17926 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17928 emit_insn (gen_strset (destreg, dst,
17929 gen_lowpart (QImode, value)));
17931 if (align_bytes & 2)
17933 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17934 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17935 set_mem_align (dst, 2 * BITS_PER_UNIT);
17937 emit_insn (gen_strset (destreg, dst,
17938 gen_lowpart (HImode, value)));
17940 if (align_bytes & 4)
17942 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17943 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17944 set_mem_align (dst, 4 * BITS_PER_UNIT);
17946 emit_insn (gen_strset (destreg, dst,
17947 gen_lowpart (SImode, value)));
17949 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17950 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17951 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17953 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17957 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17958 static enum stringop_alg
17959 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17960 int *dynamic_check)
17962 const struct stringop_algs * algs;
17963 bool optimize_for_speed;
17964 /* Algorithms using the rep prefix want at least edi and ecx;
17965 additionally, memset wants eax and memcpy wants esi. Don't
17966 consider such algorithms if the user has appropriated those
17967 registers for their own purposes. */
17968 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17970 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17972 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17973 || (alg != rep_prefix_1_byte \
17974 && alg != rep_prefix_4_byte \
17975 && alg != rep_prefix_8_byte))
17976 const struct processor_costs *cost;
17978 /* Even if the string operation call is cold, we still might spend a lot
17979 of time processing large blocks. */
17980 if (optimize_function_for_size_p (cfun)
17981 || (optimize_insn_for_size_p ()
17982 && expected_size != -1 && expected_size < 256))
17983 optimize_for_speed = false;
17985 optimize_for_speed = true;
17987 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17989 *dynamic_check = -1;
17991 algs = &cost->memset[TARGET_64BIT != 0];
17993 algs = &cost->memcpy[TARGET_64BIT != 0];
17994 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17995 return stringop_alg;
17996 /* rep; movq or rep; movl is the smallest variant. */
17997 else if (!optimize_for_speed)
17999 if (!count || (count & 3))
18000 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18002 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18004 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18006 else if (expected_size != -1 && expected_size < 4)
18007 return loop_1_byte;
18008 else if (expected_size != -1)
18011 enum stringop_alg alg = libcall;
18012 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18014 /* We get here if the algorithms that were not libcall-based
18015 were rep-prefix based and we are unable to use rep prefixes
18016 based on global register usage. Break out of the loop and
18017 use the heuristic below. */
18018 if (algs->size[i].max == 0)
18020 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18022 enum stringop_alg candidate = algs->size[i].alg;
18024 if (candidate != libcall && ALG_USABLE_P (candidate))
18026 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18027 last non-libcall inline algorithm. */
18028 if (TARGET_INLINE_ALL_STRINGOPS)
18030 /* When the current size is best to be copied by a libcall,
18031 but we are still forced to inline, run the heuristic below
18032 that will pick code for medium sized blocks. */
18033 if (alg != libcall)
18037 else if (ALG_USABLE_P (candidate))
18041 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18043 /* When asked to inline the call anyway, try to pick meaningful choice.
18044 We look for maximal size of block that is faster to copy by hand and
18045 take blocks of at most of that size guessing that average size will
18046 be roughly half of the block.
18048 If this turns out to be bad, we might simply specify the preferred
18049 choice in ix86_costs. */
18050 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18051 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18054 enum stringop_alg alg;
18056 bool any_alg_usable_p = true;
18058 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18060 enum stringop_alg candidate = algs->size[i].alg;
18061 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18063 if (candidate != libcall && candidate
18064 && ALG_USABLE_P (candidate))
18065 max = algs->size[i].max;
18067 /* If there aren't any usable algorithms, then recursing on
18068 smaller sizes isn't going to find anything. Just return the
18069 simple byte-at-a-time copy loop. */
18070 if (!any_alg_usable_p)
18072 /* Pick something reasonable. */
18073 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18074 *dynamic_check = 128;
18075 return loop_1_byte;
18079 alg = decide_alg (count, max / 2, memset, dynamic_check);
18080 gcc_assert (*dynamic_check == -1);
18081 gcc_assert (alg != libcall);
18082 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18083 *dynamic_check = max;
18086 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18087 #undef ALG_USABLE_P
18090 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18091 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18093 decide_alignment (int align,
18094 enum stringop_alg alg,
18097 int desired_align = 0;
18101 gcc_unreachable ();
18103 case unrolled_loop:
18104 desired_align = GET_MODE_SIZE (Pmode);
18106 case rep_prefix_8_byte:
18109 case rep_prefix_4_byte:
18110 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18111 copying whole cacheline at once. */
18112 if (TARGET_PENTIUMPRO)
18117 case rep_prefix_1_byte:
18118 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18119 copying whole cacheline at once. */
18120 if (TARGET_PENTIUMPRO)
18134 if (desired_align < align)
18135 desired_align = align;
18136 if (expected_size != -1 && expected_size < 4)
18137 desired_align = align;
18138 return desired_align;
18141 /* Return the smallest power of 2 greater than VAL. */
18143 smallest_pow2_greater_than (int val)
18151 /* Expand string move (memcpy) operation. Use i386 string operations when
18152 profitable. expand_setmem contains similar code. The code depends upon
18153 architecture, block size and alignment, but always has the same
18156 1) Prologue guard: Conditional that jumps up to epilogues for small
18157 blocks that can be handled by epilogue alone. This is faster but
18158 also needed for correctness, since prologue assume the block is larger
18159 than the desired alignment.
18161 Optional dynamic check for size and libcall for large
18162 blocks is emitted here too, with -minline-stringops-dynamically.
18164 2) Prologue: copy first few bytes in order to get destination aligned
18165 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18166 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18167 We emit either a jump tree on power of two sized blocks, or a byte loop.
18169 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18170 with specified algorithm.
18172 4) Epilogue: code copying tail of the block that is too small to be
18173 handled by main body (or up to size guarded by prologue guard). */
18176 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18177 rtx expected_align_exp, rtx expected_size_exp)
18183 rtx jump_around_label = NULL;
18184 HOST_WIDE_INT align = 1;
18185 unsigned HOST_WIDE_INT count = 0;
18186 HOST_WIDE_INT expected_size = -1;
18187 int size_needed = 0, epilogue_size_needed;
18188 int desired_align = 0, align_bytes = 0;
18189 enum stringop_alg alg;
18191 bool need_zero_guard = false;
18193 if (CONST_INT_P (align_exp))
18194 align = INTVAL (align_exp);
18195 /* i386 can do misaligned access on reasonably increased cost. */
18196 if (CONST_INT_P (expected_align_exp)
18197 && INTVAL (expected_align_exp) > align)
18198 align = INTVAL (expected_align_exp);
18199 /* ALIGN is the minimum of destination and source alignment, but we care here
18200 just about destination alignment. */
18201 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18202 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18204 if (CONST_INT_P (count_exp))
18205 count = expected_size = INTVAL (count_exp);
18206 if (CONST_INT_P (expected_size_exp) && count == 0)
18207 expected_size = INTVAL (expected_size_exp);
18209 /* Make sure we don't need to care about overflow later on. */
18210 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18213 /* Step 0: Decide on preferred algorithm, desired alignment and
18214 size of chunks to be copied by main loop. */
18216 alg = decide_alg (count, expected_size, false, &dynamic_check);
18217 desired_align = decide_alignment (align, alg, expected_size);
18219 if (!TARGET_ALIGN_STRINGOPS)
18220 align = desired_align;
18222 if (alg == libcall)
18224 gcc_assert (alg != no_stringop);
18226 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18227 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18228 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18233 gcc_unreachable ();
18235 need_zero_guard = true;
18236 size_needed = GET_MODE_SIZE (Pmode);
18238 case unrolled_loop:
18239 need_zero_guard = true;
18240 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18242 case rep_prefix_8_byte:
18245 case rep_prefix_4_byte:
18248 case rep_prefix_1_byte:
18252 need_zero_guard = true;
18257 epilogue_size_needed = size_needed;
18259 /* Step 1: Prologue guard. */
18261 /* Alignment code needs count to be in register. */
18262 if (CONST_INT_P (count_exp) && desired_align > align)
18264 if (INTVAL (count_exp) > desired_align
18265 && INTVAL (count_exp) > size_needed)
18268 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18269 if (align_bytes <= 0)
18272 align_bytes = desired_align - align_bytes;
18274 if (align_bytes == 0)
18275 count_exp = force_reg (counter_mode (count_exp), count_exp);
18277 gcc_assert (desired_align >= 1 && align >= 1);
18279 /* Ensure that alignment prologue won't copy past end of block. */
18280 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18282 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18283 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18284 Make sure it is power of 2. */
18285 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18289 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18291 /* If main algorithm works on QImode, no epilogue is needed.
18292 For small sizes just don't align anything. */
18293 if (size_needed == 1)
18294 desired_align = align;
18301 label = gen_label_rtx ();
18302 emit_cmp_and_jump_insns (count_exp,
18303 GEN_INT (epilogue_size_needed),
18304 LTU, 0, counter_mode (count_exp), 1, label);
18305 if (expected_size == -1 || expected_size < epilogue_size_needed)
18306 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18308 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18312 /* Emit code to decide on runtime whether library call or inline should be
18314 if (dynamic_check != -1)
18316 if (CONST_INT_P (count_exp))
18318 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18320 emit_block_move_via_libcall (dst, src, count_exp, false);
18321 count_exp = const0_rtx;
18327 rtx hot_label = gen_label_rtx ();
18328 jump_around_label = gen_label_rtx ();
18329 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18330 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18331 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18332 emit_block_move_via_libcall (dst, src, count_exp, false);
18333 emit_jump (jump_around_label);
18334 emit_label (hot_label);
18338 /* Step 2: Alignment prologue. */
18340 if (desired_align > align)
18342 if (align_bytes == 0)
18344 /* Except for the first move in epilogue, we no longer know
18345 constant offset in aliasing info. It don't seems to worth
18346 the pain to maintain it for the first move, so throw away
18348 src = change_address (src, BLKmode, srcreg);
18349 dst = change_address (dst, BLKmode, destreg);
18350 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18355 /* If we know how many bytes need to be stored before dst is
18356 sufficiently aligned, maintain aliasing info accurately. */
18357 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18358 desired_align, align_bytes);
18359 count_exp = plus_constant (count_exp, -align_bytes);
18360 count -= align_bytes;
18362 if (need_zero_guard
18363 && (count < (unsigned HOST_WIDE_INT) size_needed
18364 || (align_bytes == 0
18365 && count < ((unsigned HOST_WIDE_INT) size_needed
18366 + desired_align - align))))
18368 /* It is possible that we copied enough so the main loop will not
18370 gcc_assert (size_needed > 1);
18371 if (label == NULL_RTX)
18372 label = gen_label_rtx ();
18373 emit_cmp_and_jump_insns (count_exp,
18374 GEN_INT (size_needed),
18375 LTU, 0, counter_mode (count_exp), 1, label);
18376 if (expected_size == -1
18377 || expected_size < (desired_align - align) / 2 + size_needed)
18378 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18380 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18383 if (label && size_needed == 1)
18385 emit_label (label);
18386 LABEL_NUSES (label) = 1;
18388 epilogue_size_needed = 1;
18390 else if (label == NULL_RTX)
18391 epilogue_size_needed = size_needed;
18393 /* Step 3: Main loop. */
18399 gcc_unreachable ();
18401 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18402 count_exp, QImode, 1, expected_size);
18405 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18406 count_exp, Pmode, 1, expected_size);
18408 case unrolled_loop:
18409 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18410 registers for 4 temporaries anyway. */
18411 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18412 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18415 case rep_prefix_8_byte:
18416 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18419 case rep_prefix_4_byte:
18420 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18423 case rep_prefix_1_byte:
18424 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18428 /* Adjust properly the offset of src and dest memory for aliasing. */
18429 if (CONST_INT_P (count_exp))
18431 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18432 (count / size_needed) * size_needed);
18433 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18434 (count / size_needed) * size_needed);
18438 src = change_address (src, BLKmode, srcreg);
18439 dst = change_address (dst, BLKmode, destreg);
18442 /* Step 4: Epilogue to copy the remaining bytes. */
18446 /* When the main loop is done, COUNT_EXP might hold original count,
18447 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18448 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18449 bytes. Compensate if needed. */
18451 if (size_needed < epilogue_size_needed)
18454 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18455 GEN_INT (size_needed - 1), count_exp, 1,
18457 if (tmp != count_exp)
18458 emit_move_insn (count_exp, tmp);
18460 emit_label (label);
18461 LABEL_NUSES (label) = 1;
18464 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18465 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18466 epilogue_size_needed);
18467 if (jump_around_label)
18468 emit_label (jump_around_label);
18472 /* Helper function for memcpy. For QImode value 0xXY produce
18473 0xXYXYXYXY of wide specified by MODE. This is essentially
18474 a * 0x10101010, but we can do slightly better than
18475 synth_mult by unwinding the sequence by hand on CPUs with
18478 promote_duplicated_reg (enum machine_mode mode, rtx val)
18480 enum machine_mode valmode = GET_MODE (val);
18482 int nops = mode == DImode ? 3 : 2;
18484 gcc_assert (mode == SImode || mode == DImode);
18485 if (val == const0_rtx)
18486 return copy_to_mode_reg (mode, const0_rtx);
18487 if (CONST_INT_P (val))
18489 HOST_WIDE_INT v = INTVAL (val) & 255;
18493 if (mode == DImode)
18494 v |= (v << 16) << 16;
18495 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18498 if (valmode == VOIDmode)
18500 if (valmode != QImode)
18501 val = gen_lowpart (QImode, val);
18502 if (mode == QImode)
18504 if (!TARGET_PARTIAL_REG_STALL)
18506 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18507 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18508 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18509 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18511 rtx reg = convert_modes (mode, QImode, val, true);
18512 tmp = promote_duplicated_reg (mode, const1_rtx);
18513 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18518 rtx reg = convert_modes (mode, QImode, val, true);
18520 if (!TARGET_PARTIAL_REG_STALL)
18521 if (mode == SImode)
18522 emit_insn (gen_movsi_insv_1 (reg, reg));
18524 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18527 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18528 NULL, 1, OPTAB_DIRECT);
18530 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18532 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18533 NULL, 1, OPTAB_DIRECT);
18534 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18535 if (mode == SImode)
18537 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18538 NULL, 1, OPTAB_DIRECT);
18539 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18544 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18545 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18546 alignment from ALIGN to DESIRED_ALIGN. */
18548 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18553 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18554 promoted_val = promote_duplicated_reg (DImode, val);
18555 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18556 promoted_val = promote_duplicated_reg (SImode, val);
18557 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18558 promoted_val = promote_duplicated_reg (HImode, val);
18560 promoted_val = val;
18562 return promoted_val;
18565 /* Expand string clear operation (bzero). Use i386 string operations when
18566 profitable. See expand_movmem comment for explanation of individual
18567 steps performed. */
18569 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18570 rtx expected_align_exp, rtx expected_size_exp)
18575 rtx jump_around_label = NULL;
18576 HOST_WIDE_INT align = 1;
18577 unsigned HOST_WIDE_INT count = 0;
18578 HOST_WIDE_INT expected_size = -1;
18579 int size_needed = 0, epilogue_size_needed;
18580 int desired_align = 0, align_bytes = 0;
18581 enum stringop_alg alg;
18582 rtx promoted_val = NULL;
18583 bool force_loopy_epilogue = false;
18585 bool need_zero_guard = false;
18587 if (CONST_INT_P (align_exp))
18588 align = INTVAL (align_exp);
18589 /* i386 can do misaligned access on reasonably increased cost. */
18590 if (CONST_INT_P (expected_align_exp)
18591 && INTVAL (expected_align_exp) > align)
18592 align = INTVAL (expected_align_exp);
18593 if (CONST_INT_P (count_exp))
18594 count = expected_size = INTVAL (count_exp);
18595 if (CONST_INT_P (expected_size_exp) && count == 0)
18596 expected_size = INTVAL (expected_size_exp);
18598 /* Make sure we don't need to care about overflow later on. */
18599 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18602 /* Step 0: Decide on preferred algorithm, desired alignment and
18603 size of chunks to be copied by main loop. */
18605 alg = decide_alg (count, expected_size, true, &dynamic_check);
18606 desired_align = decide_alignment (align, alg, expected_size);
18608 if (!TARGET_ALIGN_STRINGOPS)
18609 align = desired_align;
18611 if (alg == libcall)
18613 gcc_assert (alg != no_stringop);
18615 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18616 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18621 gcc_unreachable ();
18623 need_zero_guard = true;
18624 size_needed = GET_MODE_SIZE (Pmode);
18626 case unrolled_loop:
18627 need_zero_guard = true;
18628 size_needed = GET_MODE_SIZE (Pmode) * 4;
18630 case rep_prefix_8_byte:
18633 case rep_prefix_4_byte:
18636 case rep_prefix_1_byte:
18640 need_zero_guard = true;
18644 epilogue_size_needed = size_needed;
18646 /* Step 1: Prologue guard. */
18648 /* Alignment code needs count to be in register. */
18649 if (CONST_INT_P (count_exp) && desired_align > align)
18651 if (INTVAL (count_exp) > desired_align
18652 && INTVAL (count_exp) > size_needed)
18655 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18656 if (align_bytes <= 0)
18659 align_bytes = desired_align - align_bytes;
18661 if (align_bytes == 0)
18663 enum machine_mode mode = SImode;
18664 if (TARGET_64BIT && (count & ~0xffffffff))
18666 count_exp = force_reg (mode, count_exp);
18669 /* Do the cheap promotion to allow better CSE across the
18670 main loop and epilogue (ie one load of the big constant in the
18671 front of all code. */
18672 if (CONST_INT_P (val_exp))
18673 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18674 desired_align, align);
18675 /* Ensure that alignment prologue won't copy past end of block. */
18676 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18678 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18679 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18680 Make sure it is power of 2. */
18681 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18683 /* To improve performance of small blocks, we jump around the VAL
18684 promoting mode. This mean that if the promoted VAL is not constant,
18685 we might not use it in the epilogue and have to use byte
18687 if (epilogue_size_needed > 2 && !promoted_val)
18688 force_loopy_epilogue = true;
18691 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18693 /* If main algorithm works on QImode, no epilogue is needed.
18694 For small sizes just don't align anything. */
18695 if (size_needed == 1)
18696 desired_align = align;
18703 label = gen_label_rtx ();
18704 emit_cmp_and_jump_insns (count_exp,
18705 GEN_INT (epilogue_size_needed),
18706 LTU, 0, counter_mode (count_exp), 1, label);
18707 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18708 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18710 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18713 if (dynamic_check != -1)
18715 rtx hot_label = gen_label_rtx ();
18716 jump_around_label = gen_label_rtx ();
18717 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18718 LEU, 0, counter_mode (count_exp), 1, hot_label);
18719 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18720 set_storage_via_libcall (dst, count_exp, val_exp, false);
18721 emit_jump (jump_around_label);
18722 emit_label (hot_label);
18725 /* Step 2: Alignment prologue. */
18727 /* Do the expensive promotion once we branched off the small blocks. */
18729 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18730 desired_align, align);
18731 gcc_assert (desired_align >= 1 && align >= 1);
18733 if (desired_align > align)
18735 if (align_bytes == 0)
18737 /* Except for the first move in epilogue, we no longer know
18738 constant offset in aliasing info. It don't seems to worth
18739 the pain to maintain it for the first move, so throw away
18741 dst = change_address (dst, BLKmode, destreg);
18742 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18747 /* If we know how many bytes need to be stored before dst is
18748 sufficiently aligned, maintain aliasing info accurately. */
18749 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18750 desired_align, align_bytes);
18751 count_exp = plus_constant (count_exp, -align_bytes);
18752 count -= align_bytes;
18754 if (need_zero_guard
18755 && (count < (unsigned HOST_WIDE_INT) size_needed
18756 || (align_bytes == 0
18757 && count < ((unsigned HOST_WIDE_INT) size_needed
18758 + desired_align - align))))
18760 /* It is possible that we copied enough so the main loop will not
18762 gcc_assert (size_needed > 1);
18763 if (label == NULL_RTX)
18764 label = gen_label_rtx ();
18765 emit_cmp_and_jump_insns (count_exp,
18766 GEN_INT (size_needed),
18767 LTU, 0, counter_mode (count_exp), 1, label);
18768 if (expected_size == -1
18769 || expected_size < (desired_align - align) / 2 + size_needed)
18770 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18772 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18775 if (label && size_needed == 1)
18777 emit_label (label);
18778 LABEL_NUSES (label) = 1;
18780 promoted_val = val_exp;
18781 epilogue_size_needed = 1;
18783 else if (label == NULL_RTX)
18784 epilogue_size_needed = size_needed;
18786 /* Step 3: Main loop. */
18792 gcc_unreachable ();
18794 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18795 count_exp, QImode, 1, expected_size);
18798 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18799 count_exp, Pmode, 1, expected_size);
18801 case unrolled_loop:
18802 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18803 count_exp, Pmode, 4, expected_size);
18805 case rep_prefix_8_byte:
18806 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18809 case rep_prefix_4_byte:
18810 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18813 case rep_prefix_1_byte:
18814 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18818 /* Adjust properly the offset of src and dest memory for aliasing. */
18819 if (CONST_INT_P (count_exp))
18820 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18821 (count / size_needed) * size_needed);
18823 dst = change_address (dst, BLKmode, destreg);
18825 /* Step 4: Epilogue to copy the remaining bytes. */
18829 /* When the main loop is done, COUNT_EXP might hold original count,
18830 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18831 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18832 bytes. Compensate if needed. */
18834 if (size_needed < epilogue_size_needed)
18837 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18838 GEN_INT (size_needed - 1), count_exp, 1,
18840 if (tmp != count_exp)
18841 emit_move_insn (count_exp, tmp);
18843 emit_label (label);
18844 LABEL_NUSES (label) = 1;
18847 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18849 if (force_loopy_epilogue)
18850 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18851 epilogue_size_needed);
18853 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18854 epilogue_size_needed);
18856 if (jump_around_label)
18857 emit_label (jump_around_label);
18861 /* Expand the appropriate insns for doing strlen if not just doing
18864 out = result, initialized with the start address
18865 align_rtx = alignment of the address.
18866 scratch = scratch register, initialized with the startaddress when
18867 not aligned, otherwise undefined
18869 This is just the body. It needs the initializations mentioned above and
18870 some address computing at the end. These things are done in i386.md. */
18873 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18877 rtx align_2_label = NULL_RTX;
18878 rtx align_3_label = NULL_RTX;
18879 rtx align_4_label = gen_label_rtx ();
18880 rtx end_0_label = gen_label_rtx ();
18882 rtx tmpreg = gen_reg_rtx (SImode);
18883 rtx scratch = gen_reg_rtx (SImode);
18887 if (CONST_INT_P (align_rtx))
18888 align = INTVAL (align_rtx);
18890 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18892 /* Is there a known alignment and is it less than 4? */
18895 rtx scratch1 = gen_reg_rtx (Pmode);
18896 emit_move_insn (scratch1, out);
18897 /* Is there a known alignment and is it not 2? */
18900 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18901 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18903 /* Leave just the 3 lower bits. */
18904 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18905 NULL_RTX, 0, OPTAB_WIDEN);
18907 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18908 Pmode, 1, align_4_label);
18909 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18910 Pmode, 1, align_2_label);
18911 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18912 Pmode, 1, align_3_label);
18916 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18917 check if is aligned to 4 - byte. */
18919 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18920 NULL_RTX, 0, OPTAB_WIDEN);
18922 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18923 Pmode, 1, align_4_label);
18926 mem = change_address (src, QImode, out);
18928 /* Now compare the bytes. */
18930 /* Compare the first n unaligned byte on a byte per byte basis. */
18931 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18932 QImode, 1, end_0_label);
18934 /* Increment the address. */
18935 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18937 /* Not needed with an alignment of 2 */
18940 emit_label (align_2_label);
18942 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18945 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18947 emit_label (align_3_label);
18950 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18953 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18956 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18957 align this loop. It gives only huge programs, but does not help to
18959 emit_label (align_4_label);
18961 mem = change_address (src, SImode, out);
18962 emit_move_insn (scratch, mem);
18963 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18965 /* This formula yields a nonzero result iff one of the bytes is zero.
18966 This saves three branches inside loop and many cycles. */
18968 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18969 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18970 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18971 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18972 gen_int_mode (0x80808080, SImode)));
18973 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18978 rtx reg = gen_reg_rtx (SImode);
18979 rtx reg2 = gen_reg_rtx (Pmode);
18980 emit_move_insn (reg, tmpreg);
18981 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18983 /* If zero is not in the first two bytes, move two bytes forward. */
18984 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18985 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18986 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18987 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18988 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18991 /* Emit lea manually to avoid clobbering of flags. */
18992 emit_insn (gen_rtx_SET (SImode, reg2,
18993 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18995 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18996 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18997 emit_insn (gen_rtx_SET (VOIDmode, out,
18998 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19005 rtx end_2_label = gen_label_rtx ();
19006 /* Is zero in the first two bytes? */
19008 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19009 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19010 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19011 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19012 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19014 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19015 JUMP_LABEL (tmp) = end_2_label;
19017 /* Not in the first two. Move two bytes forward. */
19018 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19019 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19021 emit_label (end_2_label);
19025 /* Avoid branch in fixing the byte. */
19026 tmpreg = gen_lowpart (QImode, tmpreg);
19027 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19028 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19029 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19031 emit_label (end_0_label);
19034 /* Expand strlen. */
19037 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19039 rtx addr, scratch1, scratch2, scratch3, scratch4;
19041 /* The generic case of strlen expander is long. Avoid it's
19042 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19044 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19045 && !TARGET_INLINE_ALL_STRINGOPS
19046 && !optimize_insn_for_size_p ()
19047 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19050 addr = force_reg (Pmode, XEXP (src, 0));
19051 scratch1 = gen_reg_rtx (Pmode);
19053 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19054 && !optimize_insn_for_size_p ())
19056 /* Well it seems that some optimizer does not combine a call like
19057 foo(strlen(bar), strlen(bar));
19058 when the move and the subtraction is done here. It does calculate
19059 the length just once when these instructions are done inside of
19060 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19061 often used and I use one fewer register for the lifetime of
19062 output_strlen_unroll() this is better. */
19064 emit_move_insn (out, addr);
19066 ix86_expand_strlensi_unroll_1 (out, src, align);
19068 /* strlensi_unroll_1 returns the address of the zero at the end of
19069 the string, like memchr(), so compute the length by subtracting
19070 the start address. */
19071 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19077 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19078 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19081 scratch2 = gen_reg_rtx (Pmode);
19082 scratch3 = gen_reg_rtx (Pmode);
19083 scratch4 = force_reg (Pmode, constm1_rtx);
19085 emit_move_insn (scratch3, addr);
19086 eoschar = force_reg (QImode, eoschar);
19088 src = replace_equiv_address_nv (src, scratch3);
19090 /* If .md starts supporting :P, this can be done in .md. */
19091 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19092 scratch4), UNSPEC_SCAS);
19093 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19094 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19095 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19100 /* For given symbol (function) construct code to compute address of it's PLT
19101 entry in large x86-64 PIC model. */
19103 construct_plt_address (rtx symbol)
19105 rtx tmp = gen_reg_rtx (Pmode);
19106 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19108 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19109 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19111 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19112 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19117 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19119 rtx pop, int sibcall)
19121 rtx use = NULL, call;
19123 if (pop == const0_rtx)
19125 gcc_assert (!TARGET_64BIT || !pop);
19127 if (TARGET_MACHO && !TARGET_64BIT)
19130 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19131 fnaddr = machopic_indirect_call_target (fnaddr);
19136 /* Static functions and indirect calls don't need the pic register. */
19137 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19138 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19139 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19140 use_reg (&use, pic_offset_table_rtx);
19143 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19145 rtx al = gen_rtx_REG (QImode, AX_REG);
19146 emit_move_insn (al, callarg2);
19147 use_reg (&use, al);
19150 if (ix86_cmodel == CM_LARGE_PIC
19152 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19153 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19154 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19155 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19157 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19158 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19160 if (sibcall && TARGET_64BIT
19161 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19164 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19165 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19166 emit_move_insn (fnaddr, addr);
19167 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19170 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19172 call = gen_rtx_SET (VOIDmode, retval, call);
19175 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19176 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19177 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19180 && ix86_cfun_abi () == MS_ABI
19181 && (!callarg2 || INTVAL (callarg2) != -2))
19183 /* We need to represent that SI and DI registers are clobbered
19185 static int clobbered_registers[] = {
19186 XMM6_REG, XMM7_REG, XMM8_REG,
19187 XMM9_REG, XMM10_REG, XMM11_REG,
19188 XMM12_REG, XMM13_REG, XMM14_REG,
19189 XMM15_REG, SI_REG, DI_REG
19192 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19193 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19194 UNSPEC_MS_TO_SYSV_CALL);
19198 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19199 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19202 (SSE_REGNO_P (clobbered_registers[i])
19204 clobbered_registers[i]));
19206 call = gen_rtx_PARALLEL (VOIDmode,
19207 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19211 call = emit_call_insn (call);
19213 CALL_INSN_FUNCTION_USAGE (call) = use;
19217 /* Clear stack slot assignments remembered from previous functions.
19218 This is called from INIT_EXPANDERS once before RTL is emitted for each
19221 static struct machine_function *
19222 ix86_init_machine_status (void)
19224 struct machine_function *f;
19226 f = GGC_CNEW (struct machine_function);
19227 f->use_fast_prologue_epilogue_nregs = -1;
19228 f->tls_descriptor_call_expanded_p = 0;
19229 f->call_abi = ix86_abi;
19234 /* Return a MEM corresponding to a stack slot with mode MODE.
19235 Allocate a new slot if necessary.
19237 The RTL for a function can have several slots available: N is
19238 which slot to use. */
19241 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19243 struct stack_local_entry *s;
19245 gcc_assert (n < MAX_386_STACK_LOCALS);
19247 /* Virtual slot is valid only before vregs are instantiated. */
19248 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19250 for (s = ix86_stack_locals; s; s = s->next)
19251 if (s->mode == mode && s->n == n)
19252 return copy_rtx (s->rtl);
19254 s = (struct stack_local_entry *)
19255 ggc_alloc (sizeof (struct stack_local_entry));
19258 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19260 s->next = ix86_stack_locals;
19261 ix86_stack_locals = s;
19265 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19267 static GTY(()) rtx ix86_tls_symbol;
19269 ix86_tls_get_addr (void)
19272 if (!ix86_tls_symbol)
19274 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19275 (TARGET_ANY_GNU_TLS
19277 ? "___tls_get_addr"
19278 : "__tls_get_addr");
19281 return ix86_tls_symbol;
19284 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19286 static GTY(()) rtx ix86_tls_module_base_symbol;
19288 ix86_tls_module_base (void)
19291 if (!ix86_tls_module_base_symbol)
19293 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19294 "_TLS_MODULE_BASE_");
19295 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19296 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19299 return ix86_tls_module_base_symbol;
19302 /* Calculate the length of the memory address in the instruction
19303 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19306 memory_address_length (rtx addr)
19308 struct ix86_address parts;
19309 rtx base, index, disp;
19313 if (GET_CODE (addr) == PRE_DEC
19314 || GET_CODE (addr) == POST_INC
19315 || GET_CODE (addr) == PRE_MODIFY
19316 || GET_CODE (addr) == POST_MODIFY)
19319 ok = ix86_decompose_address (addr, &parts);
19322 if (parts.base && GET_CODE (parts.base) == SUBREG)
19323 parts.base = SUBREG_REG (parts.base);
19324 if (parts.index && GET_CODE (parts.index) == SUBREG)
19325 parts.index = SUBREG_REG (parts.index);
19328 index = parts.index;
19333 - esp as the base always wants an index,
19334 - ebp as the base always wants a displacement,
19335 - r12 as the base always wants an index,
19336 - r13 as the base always wants a displacement. */
19338 /* Register Indirect. */
19339 if (base && !index && !disp)
19341 /* esp (for its index) and ebp (for its displacement) need
19342 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19345 && (addr == arg_pointer_rtx
19346 || addr == frame_pointer_rtx
19347 || REGNO (addr) == SP_REG
19348 || REGNO (addr) == BP_REG
19349 || REGNO (addr) == R12_REG
19350 || REGNO (addr) == R13_REG))
19354 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
19355 is not disp32, but disp32(%rip), so for disp32
19356 SIB byte is needed, unless print_operand_address
19357 optimizes it into disp32(%rip) or (%rip) is implied
19359 else if (disp && !base && !index)
19366 if (GET_CODE (disp) == CONST)
19367 symbol = XEXP (disp, 0);
19368 if (GET_CODE (symbol) == PLUS
19369 && CONST_INT_P (XEXP (symbol, 1)))
19370 symbol = XEXP (symbol, 0);
19372 if (GET_CODE (symbol) != LABEL_REF
19373 && (GET_CODE (symbol) != SYMBOL_REF
19374 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19375 && (GET_CODE (symbol) != UNSPEC
19376 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19377 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19384 /* Find the length of the displacement constant. */
19387 if (base && satisfies_constraint_K (disp))
19392 /* ebp always wants a displacement. Similarly r13. */
19393 else if (REG_P (base)
19394 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19397 /* An index requires the two-byte modrm form.... */
19399 /* ...like esp (or r12), which always wants an index. */
19400 || base == arg_pointer_rtx
19401 || base == frame_pointer_rtx
19403 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19420 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19421 is set, expect that insn have 8bit immediate alternative. */
19423 ix86_attr_length_immediate_default (rtx insn, int shortform)
19427 extract_insn_cached (insn);
19428 for (i = recog_data.n_operands - 1; i >= 0; --i)
19429 if (CONSTANT_P (recog_data.operand[i]))
19431 enum attr_mode mode = get_attr_mode (insn);
19434 if (shortform && CONST_INT_P (recog_data.operand[i]))
19436 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19443 ival = trunc_int_for_mode (ival, HImode);
19446 ival = trunc_int_for_mode (ival, SImode);
19451 if (IN_RANGE (ival, -128, 127))
19468 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19473 fatal_insn ("unknown insn mode", insn);
19478 /* Compute default value for "length_address" attribute. */
19480 ix86_attr_length_address_default (rtx insn)
19484 if (get_attr_type (insn) == TYPE_LEA)
19486 rtx set = PATTERN (insn), addr;
19488 if (GET_CODE (set) == PARALLEL)
19489 set = XVECEXP (set, 0, 0);
19491 gcc_assert (GET_CODE (set) == SET);
19493 addr = SET_SRC (set);
19494 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19496 if (GET_CODE (addr) == ZERO_EXTEND)
19497 addr = XEXP (addr, 0);
19498 if (GET_CODE (addr) == SUBREG)
19499 addr = SUBREG_REG (addr);
19502 return memory_address_length (addr);
19505 extract_insn_cached (insn);
19506 for (i = recog_data.n_operands - 1; i >= 0; --i)
19507 if (MEM_P (recog_data.operand[i]))
19509 constrain_operands_cached (reload_completed);
19510 if (which_alternative != -1)
19512 const char *constraints = recog_data.constraints[i];
19513 int alt = which_alternative;
19515 while (*constraints == '=' || *constraints == '+')
19518 while (*constraints++ != ',')
19520 /* Skip ignored operands. */
19521 if (*constraints == 'X')
19524 return memory_address_length (XEXP (recog_data.operand[i], 0));
19529 /* Compute default value for "length_vex" attribute. It includes
19530 2 or 3 byte VEX prefix and 1 opcode byte. */
19533 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19538 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19539 byte VEX prefix. */
19540 if (!has_0f_opcode || has_vex_w)
19543 /* We can always use 2 byte VEX prefix in 32bit. */
19547 extract_insn_cached (insn);
19549 for (i = recog_data.n_operands - 1; i >= 0; --i)
19550 if (REG_P (recog_data.operand[i]))
19552 /* REX.W bit uses 3 byte VEX prefix. */
19553 if (GET_MODE (recog_data.operand[i]) == DImode
19554 && GENERAL_REG_P (recog_data.operand[i]))
19559 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19560 if (MEM_P (recog_data.operand[i])
19561 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19568 /* Return the maximum number of instructions a cpu can issue. */
19571 ix86_issue_rate (void)
19575 case PROCESSOR_PENTIUM:
19576 case PROCESSOR_ATOM:
19580 case PROCESSOR_PENTIUMPRO:
19581 case PROCESSOR_PENTIUM4:
19582 case PROCESSOR_ATHLON:
19584 case PROCESSOR_AMDFAM10:
19585 case PROCESSOR_NOCONA:
19586 case PROCESSOR_GENERIC32:
19587 case PROCESSOR_GENERIC64:
19590 case PROCESSOR_CORE2:
19598 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19599 by DEP_INSN and nothing set by DEP_INSN. */
19602 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19606 /* Simplify the test for uninteresting insns. */
19607 if (insn_type != TYPE_SETCC
19608 && insn_type != TYPE_ICMOV
19609 && insn_type != TYPE_FCMOV
19610 && insn_type != TYPE_IBR)
19613 if ((set = single_set (dep_insn)) != 0)
19615 set = SET_DEST (set);
19618 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19619 && XVECLEN (PATTERN (dep_insn), 0) == 2
19620 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19621 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19623 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19624 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19629 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19632 /* This test is true if the dependent insn reads the flags but
19633 not any other potentially set register. */
19634 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19637 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19643 /* Return true iff USE_INSN has a memory address with operands set by
19647 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19650 extract_insn_cached (use_insn);
19651 for (i = recog_data.n_operands - 1; i >= 0; --i)
19652 if (MEM_P (recog_data.operand[i]))
19654 rtx addr = XEXP (recog_data.operand[i], 0);
19655 return modified_in_p (addr, set_insn) != 0;
19661 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19663 enum attr_type insn_type, dep_insn_type;
19664 enum attr_memory memory;
19666 int dep_insn_code_number;
19668 /* Anti and output dependencies have zero cost on all CPUs. */
19669 if (REG_NOTE_KIND (link) != 0)
19672 dep_insn_code_number = recog_memoized (dep_insn);
19674 /* If we can't recognize the insns, we can't really do anything. */
19675 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19678 insn_type = get_attr_type (insn);
19679 dep_insn_type = get_attr_type (dep_insn);
19683 case PROCESSOR_PENTIUM:
19684 /* Address Generation Interlock adds a cycle of latency. */
19685 if (insn_type == TYPE_LEA)
19687 rtx addr = PATTERN (insn);
19689 if (GET_CODE (addr) == PARALLEL)
19690 addr = XVECEXP (addr, 0, 0);
19692 gcc_assert (GET_CODE (addr) == SET);
19694 addr = SET_SRC (addr);
19695 if (modified_in_p (addr, dep_insn))
19698 else if (ix86_agi_dependent (dep_insn, insn))
19701 /* ??? Compares pair with jump/setcc. */
19702 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19705 /* Floating point stores require value to be ready one cycle earlier. */
19706 if (insn_type == TYPE_FMOV
19707 && get_attr_memory (insn) == MEMORY_STORE
19708 && !ix86_agi_dependent (dep_insn, insn))
19712 case PROCESSOR_PENTIUMPRO:
19713 memory = get_attr_memory (insn);
19715 /* INT->FP conversion is expensive. */
19716 if (get_attr_fp_int_src (dep_insn))
19719 /* There is one cycle extra latency between an FP op and a store. */
19720 if (insn_type == TYPE_FMOV
19721 && (set = single_set (dep_insn)) != NULL_RTX
19722 && (set2 = single_set (insn)) != NULL_RTX
19723 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19724 && MEM_P (SET_DEST (set2)))
19727 /* Show ability of reorder buffer to hide latency of load by executing
19728 in parallel with previous instruction in case
19729 previous instruction is not needed to compute the address. */
19730 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19731 && !ix86_agi_dependent (dep_insn, insn))
19733 /* Claim moves to take one cycle, as core can issue one load
19734 at time and the next load can start cycle later. */
19735 if (dep_insn_type == TYPE_IMOV
19736 || dep_insn_type == TYPE_FMOV)
19744 memory = get_attr_memory (insn);
19746 /* The esp dependency is resolved before the instruction is really
19748 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19749 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19752 /* INT->FP conversion is expensive. */
19753 if (get_attr_fp_int_src (dep_insn))
19756 /* Show ability of reorder buffer to hide latency of load by executing
19757 in parallel with previous instruction in case
19758 previous instruction is not needed to compute the address. */
19759 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19760 && !ix86_agi_dependent (dep_insn, insn))
19762 /* Claim moves to take one cycle, as core can issue one load
19763 at time and the next load can start cycle later. */
19764 if (dep_insn_type == TYPE_IMOV
19765 || dep_insn_type == TYPE_FMOV)
19774 case PROCESSOR_ATHLON:
19776 case PROCESSOR_AMDFAM10:
19777 case PROCESSOR_ATOM:
19778 case PROCESSOR_GENERIC32:
19779 case PROCESSOR_GENERIC64:
19780 memory = get_attr_memory (insn);
19782 /* Show ability of reorder buffer to hide latency of load by executing
19783 in parallel with previous instruction in case
19784 previous instruction is not needed to compute the address. */
19785 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19786 && !ix86_agi_dependent (dep_insn, insn))
19788 enum attr_unit unit = get_attr_unit (insn);
19791 /* Because of the difference between the length of integer and
19792 floating unit pipeline preparation stages, the memory operands
19793 for floating point are cheaper.
19795 ??? For Athlon it the difference is most probably 2. */
19796 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19799 loadcost = TARGET_ATHLON ? 2 : 0;
19801 if (cost >= loadcost)
19814 /* How many alternative schedules to try. This should be as wide as the
19815 scheduling freedom in the DFA, but no wider. Making this value too
19816 large results extra work for the scheduler. */
19819 ia32_multipass_dfa_lookahead (void)
19823 case PROCESSOR_PENTIUM:
19826 case PROCESSOR_PENTIUMPRO:
19836 /* Compute the alignment given to a constant that is being placed in memory.
19837 EXP is the constant and ALIGN is the alignment that the object would
19839 The value of this function is used instead of that alignment to align
19843 ix86_constant_alignment (tree exp, int align)
19845 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19846 || TREE_CODE (exp) == INTEGER_CST)
19848 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19850 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19853 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19854 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19855 return BITS_PER_WORD;
19860 /* Compute the alignment for a static variable.
19861 TYPE is the data type, and ALIGN is the alignment that
19862 the object would ordinarily have. The value of this function is used
19863 instead of that alignment to align the object. */
19866 ix86_data_alignment (tree type, int align)
19868 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19870 if (AGGREGATE_TYPE_P (type)
19871 && TYPE_SIZE (type)
19872 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19873 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19874 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19875 && align < max_align)
19878 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19879 to 16byte boundary. */
19882 if (AGGREGATE_TYPE_P (type)
19883 && TYPE_SIZE (type)
19884 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19885 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19886 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19890 if (TREE_CODE (type) == ARRAY_TYPE)
19892 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19894 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19897 else if (TREE_CODE (type) == COMPLEX_TYPE)
19900 if (TYPE_MODE (type) == DCmode && align < 64)
19902 if ((TYPE_MODE (type) == XCmode
19903 || TYPE_MODE (type) == TCmode) && align < 128)
19906 else if ((TREE_CODE (type) == RECORD_TYPE
19907 || TREE_CODE (type) == UNION_TYPE
19908 || TREE_CODE (type) == QUAL_UNION_TYPE)
19909 && TYPE_FIELDS (type))
19911 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19913 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19916 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19917 || TREE_CODE (type) == INTEGER_TYPE)
19919 if (TYPE_MODE (type) == DFmode && align < 64)
19921 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19928 /* Compute the alignment for a local variable or a stack slot. EXP is
19929 the data type or decl itself, MODE is the widest mode available and
19930 ALIGN is the alignment that the object would ordinarily have. The
19931 value of this macro is used instead of that alignment to align the
19935 ix86_local_alignment (tree exp, enum machine_mode mode,
19936 unsigned int align)
19940 if (exp && DECL_P (exp))
19942 type = TREE_TYPE (exp);
19951 /* Don't do dynamic stack realignment for long long objects with
19952 -mpreferred-stack-boundary=2. */
19955 && ix86_preferred_stack_boundary < 64
19956 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19957 && (!type || !TYPE_USER_ALIGN (type))
19958 && (!decl || !DECL_USER_ALIGN (decl)))
19961 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19962 register in MODE. We will return the largest alignment of XF
19966 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19967 align = GET_MODE_ALIGNMENT (DFmode);
19971 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19972 to 16byte boundary. */
19975 if (AGGREGATE_TYPE_P (type)
19976 && TYPE_SIZE (type)
19977 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19978 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19979 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19982 if (TREE_CODE (type) == ARRAY_TYPE)
19984 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19986 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19989 else if (TREE_CODE (type) == COMPLEX_TYPE)
19991 if (TYPE_MODE (type) == DCmode && align < 64)
19993 if ((TYPE_MODE (type) == XCmode
19994 || TYPE_MODE (type) == TCmode) && align < 128)
19997 else if ((TREE_CODE (type) == RECORD_TYPE
19998 || TREE_CODE (type) == UNION_TYPE
19999 || TREE_CODE (type) == QUAL_UNION_TYPE)
20000 && TYPE_FIELDS (type))
20002 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20004 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20007 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20008 || TREE_CODE (type) == INTEGER_TYPE)
20011 if (TYPE_MODE (type) == DFmode && align < 64)
20013 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20019 /* Emit RTL insns to initialize the variable parts of a trampoline.
20020 FNADDR is an RTX for the address of the function's pure code.
20021 CXT is an RTX for the static chain value for the function. */
20023 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
20027 /* Compute offset from the end of the jmp to the target function. */
20028 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
20029 plus_constant (tramp, 10),
20030 NULL_RTX, 1, OPTAB_DIRECT);
20031 emit_move_insn (gen_rtx_MEM (QImode, tramp),
20032 gen_int_mode (0xb9, QImode));
20033 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
20034 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
20035 gen_int_mode (0xe9, QImode));
20036 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
20041 /* Try to load address using shorter movl instead of movabs.
20042 We may want to support movq for kernel mode, but kernel does not use
20043 trampolines at the moment. */
20044 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20046 fnaddr = copy_to_mode_reg (DImode, fnaddr);
20047 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20048 gen_int_mode (0xbb41, HImode));
20049 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
20050 gen_lowpart (SImode, fnaddr));
20055 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20056 gen_int_mode (0xbb49, HImode));
20057 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20061 /* Load static chain using movabs to r10. */
20062 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20063 gen_int_mode (0xba49, HImode));
20064 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20067 /* Jump to the r11 */
20068 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20069 gen_int_mode (0xff49, HImode));
20070 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
20071 gen_int_mode (0xe3, QImode));
20073 gcc_assert (offset <= TRAMPOLINE_SIZE);
20076 #ifdef ENABLE_EXECUTE_STACK
20077 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20078 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
20082 /* Codes for all the SSE/MMX builtins. */
20085 IX86_BUILTIN_ADDPS,
20086 IX86_BUILTIN_ADDSS,
20087 IX86_BUILTIN_DIVPS,
20088 IX86_BUILTIN_DIVSS,
20089 IX86_BUILTIN_MULPS,
20090 IX86_BUILTIN_MULSS,
20091 IX86_BUILTIN_SUBPS,
20092 IX86_BUILTIN_SUBSS,
20094 IX86_BUILTIN_CMPEQPS,
20095 IX86_BUILTIN_CMPLTPS,
20096 IX86_BUILTIN_CMPLEPS,
20097 IX86_BUILTIN_CMPGTPS,
20098 IX86_BUILTIN_CMPGEPS,
20099 IX86_BUILTIN_CMPNEQPS,
20100 IX86_BUILTIN_CMPNLTPS,
20101 IX86_BUILTIN_CMPNLEPS,
20102 IX86_BUILTIN_CMPNGTPS,
20103 IX86_BUILTIN_CMPNGEPS,
20104 IX86_BUILTIN_CMPORDPS,
20105 IX86_BUILTIN_CMPUNORDPS,
20106 IX86_BUILTIN_CMPEQSS,
20107 IX86_BUILTIN_CMPLTSS,
20108 IX86_BUILTIN_CMPLESS,
20109 IX86_BUILTIN_CMPNEQSS,
20110 IX86_BUILTIN_CMPNLTSS,
20111 IX86_BUILTIN_CMPNLESS,
20112 IX86_BUILTIN_CMPNGTSS,
20113 IX86_BUILTIN_CMPNGESS,
20114 IX86_BUILTIN_CMPORDSS,
20115 IX86_BUILTIN_CMPUNORDSS,
20117 IX86_BUILTIN_COMIEQSS,
20118 IX86_BUILTIN_COMILTSS,
20119 IX86_BUILTIN_COMILESS,
20120 IX86_BUILTIN_COMIGTSS,
20121 IX86_BUILTIN_COMIGESS,
20122 IX86_BUILTIN_COMINEQSS,
20123 IX86_BUILTIN_UCOMIEQSS,
20124 IX86_BUILTIN_UCOMILTSS,
20125 IX86_BUILTIN_UCOMILESS,
20126 IX86_BUILTIN_UCOMIGTSS,
20127 IX86_BUILTIN_UCOMIGESS,
20128 IX86_BUILTIN_UCOMINEQSS,
20130 IX86_BUILTIN_CVTPI2PS,
20131 IX86_BUILTIN_CVTPS2PI,
20132 IX86_BUILTIN_CVTSI2SS,
20133 IX86_BUILTIN_CVTSI642SS,
20134 IX86_BUILTIN_CVTSS2SI,
20135 IX86_BUILTIN_CVTSS2SI64,
20136 IX86_BUILTIN_CVTTPS2PI,
20137 IX86_BUILTIN_CVTTSS2SI,
20138 IX86_BUILTIN_CVTTSS2SI64,
20140 IX86_BUILTIN_MAXPS,
20141 IX86_BUILTIN_MAXSS,
20142 IX86_BUILTIN_MINPS,
20143 IX86_BUILTIN_MINSS,
20145 IX86_BUILTIN_LOADUPS,
20146 IX86_BUILTIN_STOREUPS,
20147 IX86_BUILTIN_MOVSS,
20149 IX86_BUILTIN_MOVHLPS,
20150 IX86_BUILTIN_MOVLHPS,
20151 IX86_BUILTIN_LOADHPS,
20152 IX86_BUILTIN_LOADLPS,
20153 IX86_BUILTIN_STOREHPS,
20154 IX86_BUILTIN_STORELPS,
20156 IX86_BUILTIN_MASKMOVQ,
20157 IX86_BUILTIN_MOVMSKPS,
20158 IX86_BUILTIN_PMOVMSKB,
20160 IX86_BUILTIN_MOVNTPS,
20161 IX86_BUILTIN_MOVNTQ,
20163 IX86_BUILTIN_LOADDQU,
20164 IX86_BUILTIN_STOREDQU,
20166 IX86_BUILTIN_PACKSSWB,
20167 IX86_BUILTIN_PACKSSDW,
20168 IX86_BUILTIN_PACKUSWB,
20170 IX86_BUILTIN_PADDB,
20171 IX86_BUILTIN_PADDW,
20172 IX86_BUILTIN_PADDD,
20173 IX86_BUILTIN_PADDQ,
20174 IX86_BUILTIN_PADDSB,
20175 IX86_BUILTIN_PADDSW,
20176 IX86_BUILTIN_PADDUSB,
20177 IX86_BUILTIN_PADDUSW,
20178 IX86_BUILTIN_PSUBB,
20179 IX86_BUILTIN_PSUBW,
20180 IX86_BUILTIN_PSUBD,
20181 IX86_BUILTIN_PSUBQ,
20182 IX86_BUILTIN_PSUBSB,
20183 IX86_BUILTIN_PSUBSW,
20184 IX86_BUILTIN_PSUBUSB,
20185 IX86_BUILTIN_PSUBUSW,
20188 IX86_BUILTIN_PANDN,
20192 IX86_BUILTIN_PAVGB,
20193 IX86_BUILTIN_PAVGW,
20195 IX86_BUILTIN_PCMPEQB,
20196 IX86_BUILTIN_PCMPEQW,
20197 IX86_BUILTIN_PCMPEQD,
20198 IX86_BUILTIN_PCMPGTB,
20199 IX86_BUILTIN_PCMPGTW,
20200 IX86_BUILTIN_PCMPGTD,
20202 IX86_BUILTIN_PMADDWD,
20204 IX86_BUILTIN_PMAXSW,
20205 IX86_BUILTIN_PMAXUB,
20206 IX86_BUILTIN_PMINSW,
20207 IX86_BUILTIN_PMINUB,
20209 IX86_BUILTIN_PMULHUW,
20210 IX86_BUILTIN_PMULHW,
20211 IX86_BUILTIN_PMULLW,
20213 IX86_BUILTIN_PSADBW,
20214 IX86_BUILTIN_PSHUFW,
20216 IX86_BUILTIN_PSLLW,
20217 IX86_BUILTIN_PSLLD,
20218 IX86_BUILTIN_PSLLQ,
20219 IX86_BUILTIN_PSRAW,
20220 IX86_BUILTIN_PSRAD,
20221 IX86_BUILTIN_PSRLW,
20222 IX86_BUILTIN_PSRLD,
20223 IX86_BUILTIN_PSRLQ,
20224 IX86_BUILTIN_PSLLWI,
20225 IX86_BUILTIN_PSLLDI,
20226 IX86_BUILTIN_PSLLQI,
20227 IX86_BUILTIN_PSRAWI,
20228 IX86_BUILTIN_PSRADI,
20229 IX86_BUILTIN_PSRLWI,
20230 IX86_BUILTIN_PSRLDI,
20231 IX86_BUILTIN_PSRLQI,
20233 IX86_BUILTIN_PUNPCKHBW,
20234 IX86_BUILTIN_PUNPCKHWD,
20235 IX86_BUILTIN_PUNPCKHDQ,
20236 IX86_BUILTIN_PUNPCKLBW,
20237 IX86_BUILTIN_PUNPCKLWD,
20238 IX86_BUILTIN_PUNPCKLDQ,
20240 IX86_BUILTIN_SHUFPS,
20242 IX86_BUILTIN_RCPPS,
20243 IX86_BUILTIN_RCPSS,
20244 IX86_BUILTIN_RSQRTPS,
20245 IX86_BUILTIN_RSQRTPS_NR,
20246 IX86_BUILTIN_RSQRTSS,
20247 IX86_BUILTIN_RSQRTF,
20248 IX86_BUILTIN_SQRTPS,
20249 IX86_BUILTIN_SQRTPS_NR,
20250 IX86_BUILTIN_SQRTSS,
20252 IX86_BUILTIN_UNPCKHPS,
20253 IX86_BUILTIN_UNPCKLPS,
20255 IX86_BUILTIN_ANDPS,
20256 IX86_BUILTIN_ANDNPS,
20258 IX86_BUILTIN_XORPS,
20261 IX86_BUILTIN_LDMXCSR,
20262 IX86_BUILTIN_STMXCSR,
20263 IX86_BUILTIN_SFENCE,
20265 /* 3DNow! Original */
20266 IX86_BUILTIN_FEMMS,
20267 IX86_BUILTIN_PAVGUSB,
20268 IX86_BUILTIN_PF2ID,
20269 IX86_BUILTIN_PFACC,
20270 IX86_BUILTIN_PFADD,
20271 IX86_BUILTIN_PFCMPEQ,
20272 IX86_BUILTIN_PFCMPGE,
20273 IX86_BUILTIN_PFCMPGT,
20274 IX86_BUILTIN_PFMAX,
20275 IX86_BUILTIN_PFMIN,
20276 IX86_BUILTIN_PFMUL,
20277 IX86_BUILTIN_PFRCP,
20278 IX86_BUILTIN_PFRCPIT1,
20279 IX86_BUILTIN_PFRCPIT2,
20280 IX86_BUILTIN_PFRSQIT1,
20281 IX86_BUILTIN_PFRSQRT,
20282 IX86_BUILTIN_PFSUB,
20283 IX86_BUILTIN_PFSUBR,
20284 IX86_BUILTIN_PI2FD,
20285 IX86_BUILTIN_PMULHRW,
20287 /* 3DNow! Athlon Extensions */
20288 IX86_BUILTIN_PF2IW,
20289 IX86_BUILTIN_PFNACC,
20290 IX86_BUILTIN_PFPNACC,
20291 IX86_BUILTIN_PI2FW,
20292 IX86_BUILTIN_PSWAPDSI,
20293 IX86_BUILTIN_PSWAPDSF,
20296 IX86_BUILTIN_ADDPD,
20297 IX86_BUILTIN_ADDSD,
20298 IX86_BUILTIN_DIVPD,
20299 IX86_BUILTIN_DIVSD,
20300 IX86_BUILTIN_MULPD,
20301 IX86_BUILTIN_MULSD,
20302 IX86_BUILTIN_SUBPD,
20303 IX86_BUILTIN_SUBSD,
20305 IX86_BUILTIN_CMPEQPD,
20306 IX86_BUILTIN_CMPLTPD,
20307 IX86_BUILTIN_CMPLEPD,
20308 IX86_BUILTIN_CMPGTPD,
20309 IX86_BUILTIN_CMPGEPD,
20310 IX86_BUILTIN_CMPNEQPD,
20311 IX86_BUILTIN_CMPNLTPD,
20312 IX86_BUILTIN_CMPNLEPD,
20313 IX86_BUILTIN_CMPNGTPD,
20314 IX86_BUILTIN_CMPNGEPD,
20315 IX86_BUILTIN_CMPORDPD,
20316 IX86_BUILTIN_CMPUNORDPD,
20317 IX86_BUILTIN_CMPEQSD,
20318 IX86_BUILTIN_CMPLTSD,
20319 IX86_BUILTIN_CMPLESD,
20320 IX86_BUILTIN_CMPNEQSD,
20321 IX86_BUILTIN_CMPNLTSD,
20322 IX86_BUILTIN_CMPNLESD,
20323 IX86_BUILTIN_CMPORDSD,
20324 IX86_BUILTIN_CMPUNORDSD,
20326 IX86_BUILTIN_COMIEQSD,
20327 IX86_BUILTIN_COMILTSD,
20328 IX86_BUILTIN_COMILESD,
20329 IX86_BUILTIN_COMIGTSD,
20330 IX86_BUILTIN_COMIGESD,
20331 IX86_BUILTIN_COMINEQSD,
20332 IX86_BUILTIN_UCOMIEQSD,
20333 IX86_BUILTIN_UCOMILTSD,
20334 IX86_BUILTIN_UCOMILESD,
20335 IX86_BUILTIN_UCOMIGTSD,
20336 IX86_BUILTIN_UCOMIGESD,
20337 IX86_BUILTIN_UCOMINEQSD,
20339 IX86_BUILTIN_MAXPD,
20340 IX86_BUILTIN_MAXSD,
20341 IX86_BUILTIN_MINPD,
20342 IX86_BUILTIN_MINSD,
20344 IX86_BUILTIN_ANDPD,
20345 IX86_BUILTIN_ANDNPD,
20347 IX86_BUILTIN_XORPD,
20349 IX86_BUILTIN_SQRTPD,
20350 IX86_BUILTIN_SQRTSD,
20352 IX86_BUILTIN_UNPCKHPD,
20353 IX86_BUILTIN_UNPCKLPD,
20355 IX86_BUILTIN_SHUFPD,
20357 IX86_BUILTIN_LOADUPD,
20358 IX86_BUILTIN_STOREUPD,
20359 IX86_BUILTIN_MOVSD,
20361 IX86_BUILTIN_LOADHPD,
20362 IX86_BUILTIN_LOADLPD,
20364 IX86_BUILTIN_CVTDQ2PD,
20365 IX86_BUILTIN_CVTDQ2PS,
20367 IX86_BUILTIN_CVTPD2DQ,
20368 IX86_BUILTIN_CVTPD2PI,
20369 IX86_BUILTIN_CVTPD2PS,
20370 IX86_BUILTIN_CVTTPD2DQ,
20371 IX86_BUILTIN_CVTTPD2PI,
20373 IX86_BUILTIN_CVTPI2PD,
20374 IX86_BUILTIN_CVTSI2SD,
20375 IX86_BUILTIN_CVTSI642SD,
20377 IX86_BUILTIN_CVTSD2SI,
20378 IX86_BUILTIN_CVTSD2SI64,
20379 IX86_BUILTIN_CVTSD2SS,
20380 IX86_BUILTIN_CVTSS2SD,
20381 IX86_BUILTIN_CVTTSD2SI,
20382 IX86_BUILTIN_CVTTSD2SI64,
20384 IX86_BUILTIN_CVTPS2DQ,
20385 IX86_BUILTIN_CVTPS2PD,
20386 IX86_BUILTIN_CVTTPS2DQ,
20388 IX86_BUILTIN_MOVNTI,
20389 IX86_BUILTIN_MOVNTPD,
20390 IX86_BUILTIN_MOVNTDQ,
20392 IX86_BUILTIN_MOVQ128,
20395 IX86_BUILTIN_MASKMOVDQU,
20396 IX86_BUILTIN_MOVMSKPD,
20397 IX86_BUILTIN_PMOVMSKB128,
20399 IX86_BUILTIN_PACKSSWB128,
20400 IX86_BUILTIN_PACKSSDW128,
20401 IX86_BUILTIN_PACKUSWB128,
20403 IX86_BUILTIN_PADDB128,
20404 IX86_BUILTIN_PADDW128,
20405 IX86_BUILTIN_PADDD128,
20406 IX86_BUILTIN_PADDQ128,
20407 IX86_BUILTIN_PADDSB128,
20408 IX86_BUILTIN_PADDSW128,
20409 IX86_BUILTIN_PADDUSB128,
20410 IX86_BUILTIN_PADDUSW128,
20411 IX86_BUILTIN_PSUBB128,
20412 IX86_BUILTIN_PSUBW128,
20413 IX86_BUILTIN_PSUBD128,
20414 IX86_BUILTIN_PSUBQ128,
20415 IX86_BUILTIN_PSUBSB128,
20416 IX86_BUILTIN_PSUBSW128,
20417 IX86_BUILTIN_PSUBUSB128,
20418 IX86_BUILTIN_PSUBUSW128,
20420 IX86_BUILTIN_PAND128,
20421 IX86_BUILTIN_PANDN128,
20422 IX86_BUILTIN_POR128,
20423 IX86_BUILTIN_PXOR128,
20425 IX86_BUILTIN_PAVGB128,
20426 IX86_BUILTIN_PAVGW128,
20428 IX86_BUILTIN_PCMPEQB128,
20429 IX86_BUILTIN_PCMPEQW128,
20430 IX86_BUILTIN_PCMPEQD128,
20431 IX86_BUILTIN_PCMPGTB128,
20432 IX86_BUILTIN_PCMPGTW128,
20433 IX86_BUILTIN_PCMPGTD128,
20435 IX86_BUILTIN_PMADDWD128,
20437 IX86_BUILTIN_PMAXSW128,
20438 IX86_BUILTIN_PMAXUB128,
20439 IX86_BUILTIN_PMINSW128,
20440 IX86_BUILTIN_PMINUB128,
20442 IX86_BUILTIN_PMULUDQ,
20443 IX86_BUILTIN_PMULUDQ128,
20444 IX86_BUILTIN_PMULHUW128,
20445 IX86_BUILTIN_PMULHW128,
20446 IX86_BUILTIN_PMULLW128,
20448 IX86_BUILTIN_PSADBW128,
20449 IX86_BUILTIN_PSHUFHW,
20450 IX86_BUILTIN_PSHUFLW,
20451 IX86_BUILTIN_PSHUFD,
20453 IX86_BUILTIN_PSLLDQI128,
20454 IX86_BUILTIN_PSLLWI128,
20455 IX86_BUILTIN_PSLLDI128,
20456 IX86_BUILTIN_PSLLQI128,
20457 IX86_BUILTIN_PSRAWI128,
20458 IX86_BUILTIN_PSRADI128,
20459 IX86_BUILTIN_PSRLDQI128,
20460 IX86_BUILTIN_PSRLWI128,
20461 IX86_BUILTIN_PSRLDI128,
20462 IX86_BUILTIN_PSRLQI128,
20464 IX86_BUILTIN_PSLLDQ128,
20465 IX86_BUILTIN_PSLLW128,
20466 IX86_BUILTIN_PSLLD128,
20467 IX86_BUILTIN_PSLLQ128,
20468 IX86_BUILTIN_PSRAW128,
20469 IX86_BUILTIN_PSRAD128,
20470 IX86_BUILTIN_PSRLW128,
20471 IX86_BUILTIN_PSRLD128,
20472 IX86_BUILTIN_PSRLQ128,
20474 IX86_BUILTIN_PUNPCKHBW128,
20475 IX86_BUILTIN_PUNPCKHWD128,
20476 IX86_BUILTIN_PUNPCKHDQ128,
20477 IX86_BUILTIN_PUNPCKHQDQ128,
20478 IX86_BUILTIN_PUNPCKLBW128,
20479 IX86_BUILTIN_PUNPCKLWD128,
20480 IX86_BUILTIN_PUNPCKLDQ128,
20481 IX86_BUILTIN_PUNPCKLQDQ128,
20483 IX86_BUILTIN_CLFLUSH,
20484 IX86_BUILTIN_MFENCE,
20485 IX86_BUILTIN_LFENCE,
20488 IX86_BUILTIN_ADDSUBPS,
20489 IX86_BUILTIN_HADDPS,
20490 IX86_BUILTIN_HSUBPS,
20491 IX86_BUILTIN_MOVSHDUP,
20492 IX86_BUILTIN_MOVSLDUP,
20493 IX86_BUILTIN_ADDSUBPD,
20494 IX86_BUILTIN_HADDPD,
20495 IX86_BUILTIN_HSUBPD,
20496 IX86_BUILTIN_LDDQU,
20498 IX86_BUILTIN_MONITOR,
20499 IX86_BUILTIN_MWAIT,
20502 IX86_BUILTIN_PHADDW,
20503 IX86_BUILTIN_PHADDD,
20504 IX86_BUILTIN_PHADDSW,
20505 IX86_BUILTIN_PHSUBW,
20506 IX86_BUILTIN_PHSUBD,
20507 IX86_BUILTIN_PHSUBSW,
20508 IX86_BUILTIN_PMADDUBSW,
20509 IX86_BUILTIN_PMULHRSW,
20510 IX86_BUILTIN_PSHUFB,
20511 IX86_BUILTIN_PSIGNB,
20512 IX86_BUILTIN_PSIGNW,
20513 IX86_BUILTIN_PSIGND,
20514 IX86_BUILTIN_PALIGNR,
20515 IX86_BUILTIN_PABSB,
20516 IX86_BUILTIN_PABSW,
20517 IX86_BUILTIN_PABSD,
20519 IX86_BUILTIN_PHADDW128,
20520 IX86_BUILTIN_PHADDD128,
20521 IX86_BUILTIN_PHADDSW128,
20522 IX86_BUILTIN_PHSUBW128,
20523 IX86_BUILTIN_PHSUBD128,
20524 IX86_BUILTIN_PHSUBSW128,
20525 IX86_BUILTIN_PMADDUBSW128,
20526 IX86_BUILTIN_PMULHRSW128,
20527 IX86_BUILTIN_PSHUFB128,
20528 IX86_BUILTIN_PSIGNB128,
20529 IX86_BUILTIN_PSIGNW128,
20530 IX86_BUILTIN_PSIGND128,
20531 IX86_BUILTIN_PALIGNR128,
20532 IX86_BUILTIN_PABSB128,
20533 IX86_BUILTIN_PABSW128,
20534 IX86_BUILTIN_PABSD128,
20536 /* AMDFAM10 - SSE4A New Instructions. */
20537 IX86_BUILTIN_MOVNTSD,
20538 IX86_BUILTIN_MOVNTSS,
20539 IX86_BUILTIN_EXTRQI,
20540 IX86_BUILTIN_EXTRQ,
20541 IX86_BUILTIN_INSERTQI,
20542 IX86_BUILTIN_INSERTQ,
20545 IX86_BUILTIN_BLENDPD,
20546 IX86_BUILTIN_BLENDPS,
20547 IX86_BUILTIN_BLENDVPD,
20548 IX86_BUILTIN_BLENDVPS,
20549 IX86_BUILTIN_PBLENDVB128,
20550 IX86_BUILTIN_PBLENDW128,
20555 IX86_BUILTIN_INSERTPS128,
20557 IX86_BUILTIN_MOVNTDQA,
20558 IX86_BUILTIN_MPSADBW128,
20559 IX86_BUILTIN_PACKUSDW128,
20560 IX86_BUILTIN_PCMPEQQ,
20561 IX86_BUILTIN_PHMINPOSUW128,
20563 IX86_BUILTIN_PMAXSB128,
20564 IX86_BUILTIN_PMAXSD128,
20565 IX86_BUILTIN_PMAXUD128,
20566 IX86_BUILTIN_PMAXUW128,
20568 IX86_BUILTIN_PMINSB128,
20569 IX86_BUILTIN_PMINSD128,
20570 IX86_BUILTIN_PMINUD128,
20571 IX86_BUILTIN_PMINUW128,
20573 IX86_BUILTIN_PMOVSXBW128,
20574 IX86_BUILTIN_PMOVSXBD128,
20575 IX86_BUILTIN_PMOVSXBQ128,
20576 IX86_BUILTIN_PMOVSXWD128,
20577 IX86_BUILTIN_PMOVSXWQ128,
20578 IX86_BUILTIN_PMOVSXDQ128,
20580 IX86_BUILTIN_PMOVZXBW128,
20581 IX86_BUILTIN_PMOVZXBD128,
20582 IX86_BUILTIN_PMOVZXBQ128,
20583 IX86_BUILTIN_PMOVZXWD128,
20584 IX86_BUILTIN_PMOVZXWQ128,
20585 IX86_BUILTIN_PMOVZXDQ128,
20587 IX86_BUILTIN_PMULDQ128,
20588 IX86_BUILTIN_PMULLD128,
20590 IX86_BUILTIN_ROUNDPD,
20591 IX86_BUILTIN_ROUNDPS,
20592 IX86_BUILTIN_ROUNDSD,
20593 IX86_BUILTIN_ROUNDSS,
20595 IX86_BUILTIN_PTESTZ,
20596 IX86_BUILTIN_PTESTC,
20597 IX86_BUILTIN_PTESTNZC,
20599 IX86_BUILTIN_VEC_INIT_V2SI,
20600 IX86_BUILTIN_VEC_INIT_V4HI,
20601 IX86_BUILTIN_VEC_INIT_V8QI,
20602 IX86_BUILTIN_VEC_EXT_V2DF,
20603 IX86_BUILTIN_VEC_EXT_V2DI,
20604 IX86_BUILTIN_VEC_EXT_V4SF,
20605 IX86_BUILTIN_VEC_EXT_V4SI,
20606 IX86_BUILTIN_VEC_EXT_V8HI,
20607 IX86_BUILTIN_VEC_EXT_V2SI,
20608 IX86_BUILTIN_VEC_EXT_V4HI,
20609 IX86_BUILTIN_VEC_EXT_V16QI,
20610 IX86_BUILTIN_VEC_SET_V2DI,
20611 IX86_BUILTIN_VEC_SET_V4SF,
20612 IX86_BUILTIN_VEC_SET_V4SI,
20613 IX86_BUILTIN_VEC_SET_V8HI,
20614 IX86_BUILTIN_VEC_SET_V4HI,
20615 IX86_BUILTIN_VEC_SET_V16QI,
20617 IX86_BUILTIN_VEC_PACK_SFIX,
20620 IX86_BUILTIN_CRC32QI,
20621 IX86_BUILTIN_CRC32HI,
20622 IX86_BUILTIN_CRC32SI,
20623 IX86_BUILTIN_CRC32DI,
20625 IX86_BUILTIN_PCMPESTRI128,
20626 IX86_BUILTIN_PCMPESTRM128,
20627 IX86_BUILTIN_PCMPESTRA128,
20628 IX86_BUILTIN_PCMPESTRC128,
20629 IX86_BUILTIN_PCMPESTRO128,
20630 IX86_BUILTIN_PCMPESTRS128,
20631 IX86_BUILTIN_PCMPESTRZ128,
20632 IX86_BUILTIN_PCMPISTRI128,
20633 IX86_BUILTIN_PCMPISTRM128,
20634 IX86_BUILTIN_PCMPISTRA128,
20635 IX86_BUILTIN_PCMPISTRC128,
20636 IX86_BUILTIN_PCMPISTRO128,
20637 IX86_BUILTIN_PCMPISTRS128,
20638 IX86_BUILTIN_PCMPISTRZ128,
20640 IX86_BUILTIN_PCMPGTQ,
20642 /* AES instructions */
20643 IX86_BUILTIN_AESENC128,
20644 IX86_BUILTIN_AESENCLAST128,
20645 IX86_BUILTIN_AESDEC128,
20646 IX86_BUILTIN_AESDECLAST128,
20647 IX86_BUILTIN_AESIMC128,
20648 IX86_BUILTIN_AESKEYGENASSIST128,
20650 /* PCLMUL instruction */
20651 IX86_BUILTIN_PCLMULQDQ128,
20654 IX86_BUILTIN_ADDPD256,
20655 IX86_BUILTIN_ADDPS256,
20656 IX86_BUILTIN_ADDSUBPD256,
20657 IX86_BUILTIN_ADDSUBPS256,
20658 IX86_BUILTIN_ANDPD256,
20659 IX86_BUILTIN_ANDPS256,
20660 IX86_BUILTIN_ANDNPD256,
20661 IX86_BUILTIN_ANDNPS256,
20662 IX86_BUILTIN_BLENDPD256,
20663 IX86_BUILTIN_BLENDPS256,
20664 IX86_BUILTIN_BLENDVPD256,
20665 IX86_BUILTIN_BLENDVPS256,
20666 IX86_BUILTIN_DIVPD256,
20667 IX86_BUILTIN_DIVPS256,
20668 IX86_BUILTIN_DPPS256,
20669 IX86_BUILTIN_HADDPD256,
20670 IX86_BUILTIN_HADDPS256,
20671 IX86_BUILTIN_HSUBPD256,
20672 IX86_BUILTIN_HSUBPS256,
20673 IX86_BUILTIN_MAXPD256,
20674 IX86_BUILTIN_MAXPS256,
20675 IX86_BUILTIN_MINPD256,
20676 IX86_BUILTIN_MINPS256,
20677 IX86_BUILTIN_MULPD256,
20678 IX86_BUILTIN_MULPS256,
20679 IX86_BUILTIN_ORPD256,
20680 IX86_BUILTIN_ORPS256,
20681 IX86_BUILTIN_SHUFPD256,
20682 IX86_BUILTIN_SHUFPS256,
20683 IX86_BUILTIN_SUBPD256,
20684 IX86_BUILTIN_SUBPS256,
20685 IX86_BUILTIN_XORPD256,
20686 IX86_BUILTIN_XORPS256,
20687 IX86_BUILTIN_CMPSD,
20688 IX86_BUILTIN_CMPSS,
20689 IX86_BUILTIN_CMPPD,
20690 IX86_BUILTIN_CMPPS,
20691 IX86_BUILTIN_CMPPD256,
20692 IX86_BUILTIN_CMPPS256,
20693 IX86_BUILTIN_CVTDQ2PD256,
20694 IX86_BUILTIN_CVTDQ2PS256,
20695 IX86_BUILTIN_CVTPD2PS256,
20696 IX86_BUILTIN_CVTPS2DQ256,
20697 IX86_BUILTIN_CVTPS2PD256,
20698 IX86_BUILTIN_CVTTPD2DQ256,
20699 IX86_BUILTIN_CVTPD2DQ256,
20700 IX86_BUILTIN_CVTTPS2DQ256,
20701 IX86_BUILTIN_EXTRACTF128PD256,
20702 IX86_BUILTIN_EXTRACTF128PS256,
20703 IX86_BUILTIN_EXTRACTF128SI256,
20704 IX86_BUILTIN_VZEROALL,
20705 IX86_BUILTIN_VZEROUPPER,
20706 IX86_BUILTIN_VZEROUPPER_REX64,
20707 IX86_BUILTIN_VPERMILVARPD,
20708 IX86_BUILTIN_VPERMILVARPS,
20709 IX86_BUILTIN_VPERMILVARPD256,
20710 IX86_BUILTIN_VPERMILVARPS256,
20711 IX86_BUILTIN_VPERMILPD,
20712 IX86_BUILTIN_VPERMILPS,
20713 IX86_BUILTIN_VPERMILPD256,
20714 IX86_BUILTIN_VPERMILPS256,
20715 IX86_BUILTIN_VPERM2F128PD256,
20716 IX86_BUILTIN_VPERM2F128PS256,
20717 IX86_BUILTIN_VPERM2F128SI256,
20718 IX86_BUILTIN_VBROADCASTSS,
20719 IX86_BUILTIN_VBROADCASTSD256,
20720 IX86_BUILTIN_VBROADCASTSS256,
20721 IX86_BUILTIN_VBROADCASTPD256,
20722 IX86_BUILTIN_VBROADCASTPS256,
20723 IX86_BUILTIN_VINSERTF128PD256,
20724 IX86_BUILTIN_VINSERTF128PS256,
20725 IX86_BUILTIN_VINSERTF128SI256,
20726 IX86_BUILTIN_LOADUPD256,
20727 IX86_BUILTIN_LOADUPS256,
20728 IX86_BUILTIN_STOREUPD256,
20729 IX86_BUILTIN_STOREUPS256,
20730 IX86_BUILTIN_LDDQU256,
20731 IX86_BUILTIN_MOVNTDQ256,
20732 IX86_BUILTIN_MOVNTPD256,
20733 IX86_BUILTIN_MOVNTPS256,
20734 IX86_BUILTIN_LOADDQU256,
20735 IX86_BUILTIN_STOREDQU256,
20736 IX86_BUILTIN_MASKLOADPD,
20737 IX86_BUILTIN_MASKLOADPS,
20738 IX86_BUILTIN_MASKSTOREPD,
20739 IX86_BUILTIN_MASKSTOREPS,
20740 IX86_BUILTIN_MASKLOADPD256,
20741 IX86_BUILTIN_MASKLOADPS256,
20742 IX86_BUILTIN_MASKSTOREPD256,
20743 IX86_BUILTIN_MASKSTOREPS256,
20744 IX86_BUILTIN_MOVSHDUP256,
20745 IX86_BUILTIN_MOVSLDUP256,
20746 IX86_BUILTIN_MOVDDUP256,
20748 IX86_BUILTIN_SQRTPD256,
20749 IX86_BUILTIN_SQRTPS256,
20750 IX86_BUILTIN_SQRTPS_NR256,
20751 IX86_BUILTIN_RSQRTPS256,
20752 IX86_BUILTIN_RSQRTPS_NR256,
20754 IX86_BUILTIN_RCPPS256,
20756 IX86_BUILTIN_ROUNDPD256,
20757 IX86_BUILTIN_ROUNDPS256,
20759 IX86_BUILTIN_UNPCKHPD256,
20760 IX86_BUILTIN_UNPCKLPD256,
20761 IX86_BUILTIN_UNPCKHPS256,
20762 IX86_BUILTIN_UNPCKLPS256,
20764 IX86_BUILTIN_SI256_SI,
20765 IX86_BUILTIN_PS256_PS,
20766 IX86_BUILTIN_PD256_PD,
20767 IX86_BUILTIN_SI_SI256,
20768 IX86_BUILTIN_PS_PS256,
20769 IX86_BUILTIN_PD_PD256,
20771 IX86_BUILTIN_VTESTZPD,
20772 IX86_BUILTIN_VTESTCPD,
20773 IX86_BUILTIN_VTESTNZCPD,
20774 IX86_BUILTIN_VTESTZPS,
20775 IX86_BUILTIN_VTESTCPS,
20776 IX86_BUILTIN_VTESTNZCPS,
20777 IX86_BUILTIN_VTESTZPD256,
20778 IX86_BUILTIN_VTESTCPD256,
20779 IX86_BUILTIN_VTESTNZCPD256,
20780 IX86_BUILTIN_VTESTZPS256,
20781 IX86_BUILTIN_VTESTCPS256,
20782 IX86_BUILTIN_VTESTNZCPS256,
20783 IX86_BUILTIN_PTESTZ256,
20784 IX86_BUILTIN_PTESTC256,
20785 IX86_BUILTIN_PTESTNZC256,
20787 IX86_BUILTIN_MOVMSKPD256,
20788 IX86_BUILTIN_MOVMSKPS256,
20790 /* TFmode support builtins. */
20792 IX86_BUILTIN_HUGE_VALQ,
20793 IX86_BUILTIN_FABSQ,
20794 IX86_BUILTIN_COPYSIGNQ,
20796 /* SSE5 instructions */
20797 IX86_BUILTIN_FMADDSS,
20798 IX86_BUILTIN_FMADDSD,
20799 IX86_BUILTIN_FMADDPS,
20800 IX86_BUILTIN_FMADDPD,
20801 IX86_BUILTIN_FMSUBSS,
20802 IX86_BUILTIN_FMSUBSD,
20803 IX86_BUILTIN_FMSUBPS,
20804 IX86_BUILTIN_FMSUBPD,
20805 IX86_BUILTIN_FNMADDSS,
20806 IX86_BUILTIN_FNMADDSD,
20807 IX86_BUILTIN_FNMADDPS,
20808 IX86_BUILTIN_FNMADDPD,
20809 IX86_BUILTIN_FNMSUBSS,
20810 IX86_BUILTIN_FNMSUBSD,
20811 IX86_BUILTIN_FNMSUBPS,
20812 IX86_BUILTIN_FNMSUBPD,
20813 IX86_BUILTIN_PCMOV,
20814 IX86_BUILTIN_PCMOV_V2DI,
20815 IX86_BUILTIN_PCMOV_V4SI,
20816 IX86_BUILTIN_PCMOV_V8HI,
20817 IX86_BUILTIN_PCMOV_V16QI,
20818 IX86_BUILTIN_PCMOV_V4SF,
20819 IX86_BUILTIN_PCMOV_V2DF,
20820 IX86_BUILTIN_PPERM,
20821 IX86_BUILTIN_PERMPS,
20822 IX86_BUILTIN_PERMPD,
20823 IX86_BUILTIN_PMACSSWW,
20824 IX86_BUILTIN_PMACSWW,
20825 IX86_BUILTIN_PMACSSWD,
20826 IX86_BUILTIN_PMACSWD,
20827 IX86_BUILTIN_PMACSSDD,
20828 IX86_BUILTIN_PMACSDD,
20829 IX86_BUILTIN_PMACSSDQL,
20830 IX86_BUILTIN_PMACSSDQH,
20831 IX86_BUILTIN_PMACSDQL,
20832 IX86_BUILTIN_PMACSDQH,
20833 IX86_BUILTIN_PMADCSSWD,
20834 IX86_BUILTIN_PMADCSWD,
20835 IX86_BUILTIN_PHADDBW,
20836 IX86_BUILTIN_PHADDBD,
20837 IX86_BUILTIN_PHADDBQ,
20838 IX86_BUILTIN_PHADDWD,
20839 IX86_BUILTIN_PHADDWQ,
20840 IX86_BUILTIN_PHADDDQ,
20841 IX86_BUILTIN_PHADDUBW,
20842 IX86_BUILTIN_PHADDUBD,
20843 IX86_BUILTIN_PHADDUBQ,
20844 IX86_BUILTIN_PHADDUWD,
20845 IX86_BUILTIN_PHADDUWQ,
20846 IX86_BUILTIN_PHADDUDQ,
20847 IX86_BUILTIN_PHSUBBW,
20848 IX86_BUILTIN_PHSUBWD,
20849 IX86_BUILTIN_PHSUBDQ,
20850 IX86_BUILTIN_PROTB,
20851 IX86_BUILTIN_PROTW,
20852 IX86_BUILTIN_PROTD,
20853 IX86_BUILTIN_PROTQ,
20854 IX86_BUILTIN_PROTB_IMM,
20855 IX86_BUILTIN_PROTW_IMM,
20856 IX86_BUILTIN_PROTD_IMM,
20857 IX86_BUILTIN_PROTQ_IMM,
20858 IX86_BUILTIN_PSHLB,
20859 IX86_BUILTIN_PSHLW,
20860 IX86_BUILTIN_PSHLD,
20861 IX86_BUILTIN_PSHLQ,
20862 IX86_BUILTIN_PSHAB,
20863 IX86_BUILTIN_PSHAW,
20864 IX86_BUILTIN_PSHAD,
20865 IX86_BUILTIN_PSHAQ,
20866 IX86_BUILTIN_FRCZSS,
20867 IX86_BUILTIN_FRCZSD,
20868 IX86_BUILTIN_FRCZPS,
20869 IX86_BUILTIN_FRCZPD,
20870 IX86_BUILTIN_CVTPH2PS,
20871 IX86_BUILTIN_CVTPS2PH,
20873 IX86_BUILTIN_COMEQSS,
20874 IX86_BUILTIN_COMNESS,
20875 IX86_BUILTIN_COMLTSS,
20876 IX86_BUILTIN_COMLESS,
20877 IX86_BUILTIN_COMGTSS,
20878 IX86_BUILTIN_COMGESS,
20879 IX86_BUILTIN_COMUEQSS,
20880 IX86_BUILTIN_COMUNESS,
20881 IX86_BUILTIN_COMULTSS,
20882 IX86_BUILTIN_COMULESS,
20883 IX86_BUILTIN_COMUGTSS,
20884 IX86_BUILTIN_COMUGESS,
20885 IX86_BUILTIN_COMORDSS,
20886 IX86_BUILTIN_COMUNORDSS,
20887 IX86_BUILTIN_COMFALSESS,
20888 IX86_BUILTIN_COMTRUESS,
20890 IX86_BUILTIN_COMEQSD,
20891 IX86_BUILTIN_COMNESD,
20892 IX86_BUILTIN_COMLTSD,
20893 IX86_BUILTIN_COMLESD,
20894 IX86_BUILTIN_COMGTSD,
20895 IX86_BUILTIN_COMGESD,
20896 IX86_BUILTIN_COMUEQSD,
20897 IX86_BUILTIN_COMUNESD,
20898 IX86_BUILTIN_COMULTSD,
20899 IX86_BUILTIN_COMULESD,
20900 IX86_BUILTIN_COMUGTSD,
20901 IX86_BUILTIN_COMUGESD,
20902 IX86_BUILTIN_COMORDSD,
20903 IX86_BUILTIN_COMUNORDSD,
20904 IX86_BUILTIN_COMFALSESD,
20905 IX86_BUILTIN_COMTRUESD,
20907 IX86_BUILTIN_COMEQPS,
20908 IX86_BUILTIN_COMNEPS,
20909 IX86_BUILTIN_COMLTPS,
20910 IX86_BUILTIN_COMLEPS,
20911 IX86_BUILTIN_COMGTPS,
20912 IX86_BUILTIN_COMGEPS,
20913 IX86_BUILTIN_COMUEQPS,
20914 IX86_BUILTIN_COMUNEPS,
20915 IX86_BUILTIN_COMULTPS,
20916 IX86_BUILTIN_COMULEPS,
20917 IX86_BUILTIN_COMUGTPS,
20918 IX86_BUILTIN_COMUGEPS,
20919 IX86_BUILTIN_COMORDPS,
20920 IX86_BUILTIN_COMUNORDPS,
20921 IX86_BUILTIN_COMFALSEPS,
20922 IX86_BUILTIN_COMTRUEPS,
20924 IX86_BUILTIN_COMEQPD,
20925 IX86_BUILTIN_COMNEPD,
20926 IX86_BUILTIN_COMLTPD,
20927 IX86_BUILTIN_COMLEPD,
20928 IX86_BUILTIN_COMGTPD,
20929 IX86_BUILTIN_COMGEPD,
20930 IX86_BUILTIN_COMUEQPD,
20931 IX86_BUILTIN_COMUNEPD,
20932 IX86_BUILTIN_COMULTPD,
20933 IX86_BUILTIN_COMULEPD,
20934 IX86_BUILTIN_COMUGTPD,
20935 IX86_BUILTIN_COMUGEPD,
20936 IX86_BUILTIN_COMORDPD,
20937 IX86_BUILTIN_COMUNORDPD,
20938 IX86_BUILTIN_COMFALSEPD,
20939 IX86_BUILTIN_COMTRUEPD,
20941 IX86_BUILTIN_PCOMEQUB,
20942 IX86_BUILTIN_PCOMNEUB,
20943 IX86_BUILTIN_PCOMLTUB,
20944 IX86_BUILTIN_PCOMLEUB,
20945 IX86_BUILTIN_PCOMGTUB,
20946 IX86_BUILTIN_PCOMGEUB,
20947 IX86_BUILTIN_PCOMFALSEUB,
20948 IX86_BUILTIN_PCOMTRUEUB,
20949 IX86_BUILTIN_PCOMEQUW,
20950 IX86_BUILTIN_PCOMNEUW,
20951 IX86_BUILTIN_PCOMLTUW,
20952 IX86_BUILTIN_PCOMLEUW,
20953 IX86_BUILTIN_PCOMGTUW,
20954 IX86_BUILTIN_PCOMGEUW,
20955 IX86_BUILTIN_PCOMFALSEUW,
20956 IX86_BUILTIN_PCOMTRUEUW,
20957 IX86_BUILTIN_PCOMEQUD,
20958 IX86_BUILTIN_PCOMNEUD,
20959 IX86_BUILTIN_PCOMLTUD,
20960 IX86_BUILTIN_PCOMLEUD,
20961 IX86_BUILTIN_PCOMGTUD,
20962 IX86_BUILTIN_PCOMGEUD,
20963 IX86_BUILTIN_PCOMFALSEUD,
20964 IX86_BUILTIN_PCOMTRUEUD,
20965 IX86_BUILTIN_PCOMEQUQ,
20966 IX86_BUILTIN_PCOMNEUQ,
20967 IX86_BUILTIN_PCOMLTUQ,
20968 IX86_BUILTIN_PCOMLEUQ,
20969 IX86_BUILTIN_PCOMGTUQ,
20970 IX86_BUILTIN_PCOMGEUQ,
20971 IX86_BUILTIN_PCOMFALSEUQ,
20972 IX86_BUILTIN_PCOMTRUEUQ,
20974 IX86_BUILTIN_PCOMEQB,
20975 IX86_BUILTIN_PCOMNEB,
20976 IX86_BUILTIN_PCOMLTB,
20977 IX86_BUILTIN_PCOMLEB,
20978 IX86_BUILTIN_PCOMGTB,
20979 IX86_BUILTIN_PCOMGEB,
20980 IX86_BUILTIN_PCOMFALSEB,
20981 IX86_BUILTIN_PCOMTRUEB,
20982 IX86_BUILTIN_PCOMEQW,
20983 IX86_BUILTIN_PCOMNEW,
20984 IX86_BUILTIN_PCOMLTW,
20985 IX86_BUILTIN_PCOMLEW,
20986 IX86_BUILTIN_PCOMGTW,
20987 IX86_BUILTIN_PCOMGEW,
20988 IX86_BUILTIN_PCOMFALSEW,
20989 IX86_BUILTIN_PCOMTRUEW,
20990 IX86_BUILTIN_PCOMEQD,
20991 IX86_BUILTIN_PCOMNED,
20992 IX86_BUILTIN_PCOMLTD,
20993 IX86_BUILTIN_PCOMLED,
20994 IX86_BUILTIN_PCOMGTD,
20995 IX86_BUILTIN_PCOMGED,
20996 IX86_BUILTIN_PCOMFALSED,
20997 IX86_BUILTIN_PCOMTRUED,
20998 IX86_BUILTIN_PCOMEQQ,
20999 IX86_BUILTIN_PCOMNEQ,
21000 IX86_BUILTIN_PCOMLTQ,
21001 IX86_BUILTIN_PCOMLEQ,
21002 IX86_BUILTIN_PCOMGTQ,
21003 IX86_BUILTIN_PCOMGEQ,
21004 IX86_BUILTIN_PCOMFALSEQ,
21005 IX86_BUILTIN_PCOMTRUEQ,
21010 /* Table for the ix86 builtin decls. */
21011 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21013 /* Table of all of the builtin functions that are possible with different ISA's
21014 but are waiting to be built until a function is declared to use that
21016 struct GTY(()) builtin_isa {
21017 tree type; /* builtin type to use in the declaration */
21018 const char *name; /* function name */
21019 int isa; /* isa_flags this builtin is defined for */
21020 bool const_p; /* true if the declaration is constant */
21023 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21026 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
21027 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
21028 * function decl in the ix86_builtins array. Returns the function decl or
21029 * NULL_TREE, if the builtin was not added.
21031 * If the front end has a special hook for builtin functions, delay adding
21032 * builtin functions that aren't in the current ISA until the ISA is changed
21033 * with function specific optimization. Doing so, can save about 300K for the
21034 * default compiler. When the builtin is expanded, check at that time whether
21037 * If the front end doesn't have a special hook, record all builtins, even if
21038 * it isn't an instruction set in the current ISA in case the user uses
21039 * function specific options for a different ISA, so that we don't get scope
21040 * errors if a builtin is added in the middle of a function scope. */
21043 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
21045 tree decl = NULL_TREE;
21047 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21049 ix86_builtins_isa[(int) code].isa = mask;
21051 if ((mask & ix86_isa_flags) != 0
21052 || (lang_hooks.builtin_function
21053 == lang_hooks.builtin_function_ext_scope))
21056 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
21058 ix86_builtins[(int) code] = decl;
21059 ix86_builtins_isa[(int) code].type = NULL_TREE;
21063 ix86_builtins[(int) code] = NULL_TREE;
21064 ix86_builtins_isa[(int) code].const_p = false;
21065 ix86_builtins_isa[(int) code].type = type;
21066 ix86_builtins_isa[(int) code].name = name;
21073 /* Like def_builtin, but also marks the function decl "const". */
21076 def_builtin_const (int mask, const char *name, tree type,
21077 enum ix86_builtins code)
21079 tree decl = def_builtin (mask, name, type, code);
21081 TREE_READONLY (decl) = 1;
21083 ix86_builtins_isa[(int) code].const_p = true;
21088 /* Add any new builtin functions for a given ISA that may not have been
21089 declared. This saves a bit of space compared to adding all of the
21090 declarations to the tree, even if we didn't use them. */
21093 ix86_add_new_builtins (int isa)
21098 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21100 if ((ix86_builtins_isa[i].isa & isa) != 0
21101 && ix86_builtins_isa[i].type != NULL_TREE)
21103 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21104 ix86_builtins_isa[i].type,
21105 i, BUILT_IN_MD, NULL,
21108 ix86_builtins[i] = decl;
21109 ix86_builtins_isa[i].type = NULL_TREE;
21110 if (ix86_builtins_isa[i].const_p)
21111 TREE_READONLY (decl) = 1;
21116 /* Bits for builtin_description.flag. */
21118 /* Set when we don't support the comparison natively, and should
21119 swap_comparison in order to support it. */
21120 #define BUILTIN_DESC_SWAP_OPERANDS 1
21122 struct builtin_description
21124 const unsigned int mask;
21125 const enum insn_code icode;
21126 const char *const name;
21127 const enum ix86_builtins code;
21128 const enum rtx_code comparison;
21132 static const struct builtin_description bdesc_comi[] =
21134 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21135 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21136 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21137 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21138 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21139 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21140 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21141 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21142 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21143 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21144 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21145 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21160 static const struct builtin_description bdesc_pcmpestr[] =
21163 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21164 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21165 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21166 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21167 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21168 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21169 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21172 static const struct builtin_description bdesc_pcmpistr[] =
21175 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21176 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21177 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21178 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21179 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21180 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21181 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21184 /* Special builtin types */
21185 enum ix86_special_builtin_type
21187 SPECIAL_FTYPE_UNKNOWN,
21189 V32QI_FTYPE_PCCHAR,
21190 V16QI_FTYPE_PCCHAR,
21192 V8SF_FTYPE_PCFLOAT,
21194 V4DF_FTYPE_PCDOUBLE,
21195 V4SF_FTYPE_PCFLOAT,
21196 V2DF_FTYPE_PCDOUBLE,
21197 V8SF_FTYPE_PCV8SF_V8SF,
21198 V4DF_FTYPE_PCV4DF_V4DF,
21199 V4SF_FTYPE_V4SF_PCV2SF,
21200 V4SF_FTYPE_PCV4SF_V4SF,
21201 V2DF_FTYPE_V2DF_PCDOUBLE,
21202 V2DF_FTYPE_PCV2DF_V2DF,
21204 VOID_FTYPE_PV2SF_V4SF,
21205 VOID_FTYPE_PV4DI_V4DI,
21206 VOID_FTYPE_PV2DI_V2DI,
21207 VOID_FTYPE_PCHAR_V32QI,
21208 VOID_FTYPE_PCHAR_V16QI,
21209 VOID_FTYPE_PFLOAT_V8SF,
21210 VOID_FTYPE_PFLOAT_V4SF,
21211 VOID_FTYPE_PDOUBLE_V4DF,
21212 VOID_FTYPE_PDOUBLE_V2DF,
21214 VOID_FTYPE_PINT_INT,
21215 VOID_FTYPE_PV8SF_V8SF_V8SF,
21216 VOID_FTYPE_PV4DF_V4DF_V4DF,
21217 VOID_FTYPE_PV4SF_V4SF_V4SF,
21218 VOID_FTYPE_PV2DF_V2DF_V2DF
21221 /* Builtin types */
21222 enum ix86_builtin_type
21225 FLOAT128_FTYPE_FLOAT128,
21227 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21228 INT_FTYPE_V8SF_V8SF_PTEST,
21229 INT_FTYPE_V4DI_V4DI_PTEST,
21230 INT_FTYPE_V4DF_V4DF_PTEST,
21231 INT_FTYPE_V4SF_V4SF_PTEST,
21232 INT_FTYPE_V2DI_V2DI_PTEST,
21233 INT_FTYPE_V2DF_V2DF_PTEST,
21265 V4SF_FTYPE_V4SF_VEC_MERGE,
21274 V2DF_FTYPE_V2DF_VEC_MERGE,
21285 V16QI_FTYPE_V16QI_V16QI,
21286 V16QI_FTYPE_V8HI_V8HI,
21287 V8QI_FTYPE_V8QI_V8QI,
21288 V8QI_FTYPE_V4HI_V4HI,
21289 V8HI_FTYPE_V8HI_V8HI,
21290 V8HI_FTYPE_V8HI_V8HI_COUNT,
21291 V8HI_FTYPE_V16QI_V16QI,
21292 V8HI_FTYPE_V4SI_V4SI,
21293 V8HI_FTYPE_V8HI_SI_COUNT,
21294 V8SF_FTYPE_V8SF_V8SF,
21295 V8SF_FTYPE_V8SF_V8SI,
21296 V4SI_FTYPE_V4SI_V4SI,
21297 V4SI_FTYPE_V4SI_V4SI_COUNT,
21298 V4SI_FTYPE_V8HI_V8HI,
21299 V4SI_FTYPE_V4SF_V4SF,
21300 V4SI_FTYPE_V2DF_V2DF,
21301 V4SI_FTYPE_V4SI_SI_COUNT,
21302 V4HI_FTYPE_V4HI_V4HI,
21303 V4HI_FTYPE_V4HI_V4HI_COUNT,
21304 V4HI_FTYPE_V8QI_V8QI,
21305 V4HI_FTYPE_V2SI_V2SI,
21306 V4HI_FTYPE_V4HI_SI_COUNT,
21307 V4DF_FTYPE_V4DF_V4DF,
21308 V4DF_FTYPE_V4DF_V4DI,
21309 V4SF_FTYPE_V4SF_V4SF,
21310 V4SF_FTYPE_V4SF_V4SF_SWAP,
21311 V4SF_FTYPE_V4SF_V4SI,
21312 V4SF_FTYPE_V4SF_V2SI,
21313 V4SF_FTYPE_V4SF_V2DF,
21314 V4SF_FTYPE_V4SF_DI,
21315 V4SF_FTYPE_V4SF_SI,
21316 V2DI_FTYPE_V2DI_V2DI,
21317 V2DI_FTYPE_V2DI_V2DI_COUNT,
21318 V2DI_FTYPE_V16QI_V16QI,
21319 V2DI_FTYPE_V4SI_V4SI,
21320 V2DI_FTYPE_V2DI_V16QI,
21321 V2DI_FTYPE_V2DF_V2DF,
21322 V2DI_FTYPE_V2DI_SI_COUNT,
21323 V2SI_FTYPE_V2SI_V2SI,
21324 V2SI_FTYPE_V2SI_V2SI_COUNT,
21325 V2SI_FTYPE_V4HI_V4HI,
21326 V2SI_FTYPE_V2SF_V2SF,
21327 V2SI_FTYPE_V2SI_SI_COUNT,
21328 V2DF_FTYPE_V2DF_V2DF,
21329 V2DF_FTYPE_V2DF_V2DF_SWAP,
21330 V2DF_FTYPE_V2DF_V4SF,
21331 V2DF_FTYPE_V2DF_V2DI,
21332 V2DF_FTYPE_V2DF_DI,
21333 V2DF_FTYPE_V2DF_SI,
21334 V2SF_FTYPE_V2SF_V2SF,
21335 V1DI_FTYPE_V1DI_V1DI,
21336 V1DI_FTYPE_V1DI_V1DI_COUNT,
21337 V1DI_FTYPE_V8QI_V8QI,
21338 V1DI_FTYPE_V2SI_V2SI,
21339 V1DI_FTYPE_V1DI_SI_COUNT,
21340 UINT64_FTYPE_UINT64_UINT64,
21341 UINT_FTYPE_UINT_UINT,
21342 UINT_FTYPE_UINT_USHORT,
21343 UINT_FTYPE_UINT_UCHAR,
21344 V8HI_FTYPE_V8HI_INT,
21345 V4SI_FTYPE_V4SI_INT,
21346 V4HI_FTYPE_V4HI_INT,
21347 V8SF_FTYPE_V8SF_INT,
21348 V4SI_FTYPE_V8SI_INT,
21349 V4SF_FTYPE_V8SF_INT,
21350 V2DF_FTYPE_V4DF_INT,
21351 V4DF_FTYPE_V4DF_INT,
21352 V4SF_FTYPE_V4SF_INT,
21353 V2DI_FTYPE_V2DI_INT,
21354 V2DI2TI_FTYPE_V2DI_INT,
21355 V2DF_FTYPE_V2DF_INT,
21356 V16QI_FTYPE_V16QI_V16QI_V16QI,
21357 V8SF_FTYPE_V8SF_V8SF_V8SF,
21358 V4DF_FTYPE_V4DF_V4DF_V4DF,
21359 V4SF_FTYPE_V4SF_V4SF_V4SF,
21360 V2DF_FTYPE_V2DF_V2DF_V2DF,
21361 V16QI_FTYPE_V16QI_V16QI_INT,
21362 V8SI_FTYPE_V8SI_V8SI_INT,
21363 V8SI_FTYPE_V8SI_V4SI_INT,
21364 V8HI_FTYPE_V8HI_V8HI_INT,
21365 V8SF_FTYPE_V8SF_V8SF_INT,
21366 V8SF_FTYPE_V8SF_V4SF_INT,
21367 V4SI_FTYPE_V4SI_V4SI_INT,
21368 V4DF_FTYPE_V4DF_V4DF_INT,
21369 V4DF_FTYPE_V4DF_V2DF_INT,
21370 V4SF_FTYPE_V4SF_V4SF_INT,
21371 V2DI_FTYPE_V2DI_V2DI_INT,
21372 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21373 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21374 V2DF_FTYPE_V2DF_V2DF_INT,
21375 V2DI_FTYPE_V2DI_UINT_UINT,
21376 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21379 /* Special builtins with variable number of arguments. */
21380 static const struct builtin_description bdesc_special_args[] =
21383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21386 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21389 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21390 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21391 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21395 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21398 /* SSE or 3DNow!A */
21399 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21400 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21417 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21420 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21423 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21424 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21429 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21434 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21437 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21438 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21439 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21440 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21441 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21445 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21446 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21447 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21449 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21450 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21451 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21452 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21455 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21459 /* Builtins with variable number of arguments. */
21460 static const struct builtin_description bdesc_args[] =
21463 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21464 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21465 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21466 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21467 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21468 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21470 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21471 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21472 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21473 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21474 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21476 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21477 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21479 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21480 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21482 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21483 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21484 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21485 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21487 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21488 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21489 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21490 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21491 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21492 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21494 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21495 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21496 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21497 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21498 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21499 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21501 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21502 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21503 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21505 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21507 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21508 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21509 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21510 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21511 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21512 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21514 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21515 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21516 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21517 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21518 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21519 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21522 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21523 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21527 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21528 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21529 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21530 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21532 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21533 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21534 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21535 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21536 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21537 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21538 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21539 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21540 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21541 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21542 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21543 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21544 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21545 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21546 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21549 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21550 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21551 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21552 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21553 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21554 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21557 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21558 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21559 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21560 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21561 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21562 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21565 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21568 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21572 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21573 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21574 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21575 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21578 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21581 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21582 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21583 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21584 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21592 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21593 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21598 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21599 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21600 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21601 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21602 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21604 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21605 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21606 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21607 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21609 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21610 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21611 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21612 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21614 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21615 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21621 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21622 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21624 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21627 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21628 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21630 /* SSE MMX or 3Dnow!A */
21631 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21632 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21633 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21635 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21636 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21637 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21638 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21640 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21641 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21643 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21664 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21665 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21671 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21672 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21673 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21674 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21702 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21706 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21708 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21709 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21715 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21717 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21718 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21719 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21720 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21721 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21722 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21723 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21724 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21731 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21732 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21735 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21736 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21738 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21740 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21741 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21743 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21744 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21753 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21754 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21755 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21764 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21768 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21771 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21780 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21785 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21786 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21787 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21788 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21789 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21790 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21792 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21793 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21794 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21795 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21796 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21797 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21798 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21800 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21801 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21802 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21803 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21806 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21811 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21812 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21814 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21817 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21818 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21821 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21822 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21824 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21825 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21826 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21827 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21828 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21829 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21832 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21833 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21834 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21835 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21836 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21837 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21839 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21840 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21841 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21842 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21843 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21844 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21845 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21846 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21847 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21848 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21849 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21850 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21851 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21852 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21853 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21854 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21855 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21856 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21857 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21858 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21859 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21860 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21861 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21862 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21865 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21866 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21869 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21870 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21871 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21872 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21873 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21874 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21875 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21876 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21877 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21878 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21880 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21881 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21882 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21883 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21884 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21885 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21886 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21887 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21888 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21889 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21890 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21891 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21892 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21894 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21895 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21896 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21897 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21898 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21899 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21900 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21901 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21902 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21903 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21904 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21905 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21907 /* SSE4.1 and SSE5 */
21908 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21909 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21910 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21911 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21913 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21914 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21915 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21918 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21919 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21920 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21921 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21922 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21925 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21926 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21927 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21928 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21931 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21932 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21934 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21935 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21936 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21937 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21940 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21943 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21944 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21947 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21948 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21949 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21950 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21951 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21952 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21954 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21955 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21956 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21957 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21958 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21959 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21960 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21961 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21962 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21963 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21964 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21965 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21966 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21967 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21968 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21973 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21988 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22014 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22016 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22018 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
22054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22058 enum multi_arg_type {
22068 MULTI_ARG_3_PERMPS,
22069 MULTI_ARG_3_PERMPD,
22076 MULTI_ARG_2_DI_IMM,
22077 MULTI_ARG_2_SI_IMM,
22078 MULTI_ARG_2_HI_IMM,
22079 MULTI_ARG_2_QI_IMM,
22080 MULTI_ARG_2_SF_CMP,
22081 MULTI_ARG_2_DF_CMP,
22082 MULTI_ARG_2_DI_CMP,
22083 MULTI_ARG_2_SI_CMP,
22084 MULTI_ARG_2_HI_CMP,
22085 MULTI_ARG_2_QI_CMP,
22108 static const struct builtin_description bdesc_multi_arg[] =
22110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22129 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22196 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22198 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22199 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22200 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22201 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22208 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22209 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22228 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22229 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22232 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22237 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22238 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22239 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22240 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22241 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22242 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22243 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22244 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22245 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22246 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22247 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22248 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22249 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22250 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22251 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22252 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22254 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22255 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22256 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22257 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22258 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22259 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22260 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22263 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22266 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22270 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22275 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22276 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22278 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22279 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22280 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22281 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22282 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22283 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22284 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22288 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22302 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22304 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22312 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22336 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22346 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22347 in the current target ISA to allow the user to compile particular modules
22348 with different target specific options that differ from the command line
22351 ix86_init_mmx_sse_builtins (void)
22353 const struct builtin_description * d;
22356 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22357 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22358 tree V1DI_type_node
22359 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22360 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22361 tree V2DI_type_node
22362 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22363 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22364 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22365 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22366 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22367 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22368 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22370 tree pchar_type_node = build_pointer_type (char_type_node);
22371 tree pcchar_type_node
22372 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22373 tree pfloat_type_node = build_pointer_type (float_type_node);
22374 tree pcfloat_type_node
22375 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22376 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22377 tree pcv2sf_type_node
22378 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22379 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22380 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22383 tree int_ftype_v4sf_v4sf
22384 = build_function_type_list (integer_type_node,
22385 V4SF_type_node, V4SF_type_node, NULL_TREE);
22386 tree v4si_ftype_v4sf_v4sf
22387 = build_function_type_list (V4SI_type_node,
22388 V4SF_type_node, V4SF_type_node, NULL_TREE);
22389 /* MMX/SSE/integer conversions. */
22390 tree int_ftype_v4sf
22391 = build_function_type_list (integer_type_node,
22392 V4SF_type_node, NULL_TREE);
22393 tree int64_ftype_v4sf
22394 = build_function_type_list (long_long_integer_type_node,
22395 V4SF_type_node, NULL_TREE);
22396 tree int_ftype_v8qi
22397 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22398 tree v4sf_ftype_v4sf_int
22399 = build_function_type_list (V4SF_type_node,
22400 V4SF_type_node, integer_type_node, NULL_TREE);
22401 tree v4sf_ftype_v4sf_int64
22402 = build_function_type_list (V4SF_type_node,
22403 V4SF_type_node, long_long_integer_type_node,
22405 tree v4sf_ftype_v4sf_v2si
22406 = build_function_type_list (V4SF_type_node,
22407 V4SF_type_node, V2SI_type_node, NULL_TREE);
22409 /* Miscellaneous. */
22410 tree v8qi_ftype_v4hi_v4hi
22411 = build_function_type_list (V8QI_type_node,
22412 V4HI_type_node, V4HI_type_node, NULL_TREE);
22413 tree v4hi_ftype_v2si_v2si
22414 = build_function_type_list (V4HI_type_node,
22415 V2SI_type_node, V2SI_type_node, NULL_TREE);
22416 tree v4sf_ftype_v4sf_v4sf_int
22417 = build_function_type_list (V4SF_type_node,
22418 V4SF_type_node, V4SF_type_node,
22419 integer_type_node, NULL_TREE);
22420 tree v2si_ftype_v4hi_v4hi
22421 = build_function_type_list (V2SI_type_node,
22422 V4HI_type_node, V4HI_type_node, NULL_TREE);
22423 tree v4hi_ftype_v4hi_int
22424 = build_function_type_list (V4HI_type_node,
22425 V4HI_type_node, integer_type_node, NULL_TREE);
22426 tree v2si_ftype_v2si_int
22427 = build_function_type_list (V2SI_type_node,
22428 V2SI_type_node, integer_type_node, NULL_TREE);
22429 tree v1di_ftype_v1di_int
22430 = build_function_type_list (V1DI_type_node,
22431 V1DI_type_node, integer_type_node, NULL_TREE);
22433 tree void_ftype_void
22434 = build_function_type (void_type_node, void_list_node);
22435 tree void_ftype_unsigned
22436 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22437 tree void_ftype_unsigned_unsigned
22438 = build_function_type_list (void_type_node, unsigned_type_node,
22439 unsigned_type_node, NULL_TREE);
22440 tree void_ftype_pcvoid_unsigned_unsigned
22441 = build_function_type_list (void_type_node, const_ptr_type_node,
22442 unsigned_type_node, unsigned_type_node,
22444 tree unsigned_ftype_void
22445 = build_function_type (unsigned_type_node, void_list_node);
22446 tree v2si_ftype_v4sf
22447 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22448 /* Loads/stores. */
22449 tree void_ftype_v8qi_v8qi_pchar
22450 = build_function_type_list (void_type_node,
22451 V8QI_type_node, V8QI_type_node,
22452 pchar_type_node, NULL_TREE);
22453 tree v4sf_ftype_pcfloat
22454 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22455 tree v4sf_ftype_v4sf_pcv2sf
22456 = build_function_type_list (V4SF_type_node,
22457 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22458 tree void_ftype_pv2sf_v4sf
22459 = build_function_type_list (void_type_node,
22460 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22461 tree void_ftype_pfloat_v4sf
22462 = build_function_type_list (void_type_node,
22463 pfloat_type_node, V4SF_type_node, NULL_TREE);
22464 tree void_ftype_pdi_di
22465 = build_function_type_list (void_type_node,
22466 pdi_type_node, long_long_unsigned_type_node,
22468 tree void_ftype_pv2di_v2di
22469 = build_function_type_list (void_type_node,
22470 pv2di_type_node, V2DI_type_node, NULL_TREE);
22471 /* Normal vector unops. */
22472 tree v4sf_ftype_v4sf
22473 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22474 tree v16qi_ftype_v16qi
22475 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22476 tree v8hi_ftype_v8hi
22477 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22478 tree v4si_ftype_v4si
22479 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22480 tree v8qi_ftype_v8qi
22481 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22482 tree v4hi_ftype_v4hi
22483 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22485 /* Normal vector binops. */
22486 tree v4sf_ftype_v4sf_v4sf
22487 = build_function_type_list (V4SF_type_node,
22488 V4SF_type_node, V4SF_type_node, NULL_TREE);
22489 tree v8qi_ftype_v8qi_v8qi
22490 = build_function_type_list (V8QI_type_node,
22491 V8QI_type_node, V8QI_type_node, NULL_TREE);
22492 tree v4hi_ftype_v4hi_v4hi
22493 = build_function_type_list (V4HI_type_node,
22494 V4HI_type_node, V4HI_type_node, NULL_TREE);
22495 tree v2si_ftype_v2si_v2si
22496 = build_function_type_list (V2SI_type_node,
22497 V2SI_type_node, V2SI_type_node, NULL_TREE);
22498 tree v1di_ftype_v1di_v1di
22499 = build_function_type_list (V1DI_type_node,
22500 V1DI_type_node, V1DI_type_node, NULL_TREE);
22501 tree v1di_ftype_v1di_v1di_int
22502 = build_function_type_list (V1DI_type_node,
22503 V1DI_type_node, V1DI_type_node,
22504 integer_type_node, NULL_TREE);
22505 tree v2si_ftype_v2sf
22506 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22507 tree v2sf_ftype_v2si
22508 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22509 tree v2si_ftype_v2si
22510 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22511 tree v2sf_ftype_v2sf
22512 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22513 tree v2sf_ftype_v2sf_v2sf
22514 = build_function_type_list (V2SF_type_node,
22515 V2SF_type_node, V2SF_type_node, NULL_TREE);
22516 tree v2si_ftype_v2sf_v2sf
22517 = build_function_type_list (V2SI_type_node,
22518 V2SF_type_node, V2SF_type_node, NULL_TREE);
22519 tree pint_type_node = build_pointer_type (integer_type_node);
22520 tree pdouble_type_node = build_pointer_type (double_type_node);
22521 tree pcdouble_type_node = build_pointer_type (
22522 build_type_variant (double_type_node, 1, 0));
22523 tree int_ftype_v2df_v2df
22524 = build_function_type_list (integer_type_node,
22525 V2DF_type_node, V2DF_type_node, NULL_TREE);
22527 tree void_ftype_pcvoid
22528 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22529 tree v4sf_ftype_v4si
22530 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22531 tree v4si_ftype_v4sf
22532 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22533 tree v2df_ftype_v4si
22534 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22535 tree v4si_ftype_v2df
22536 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22537 tree v4si_ftype_v2df_v2df
22538 = build_function_type_list (V4SI_type_node,
22539 V2DF_type_node, V2DF_type_node, NULL_TREE);
22540 tree v2si_ftype_v2df
22541 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22542 tree v4sf_ftype_v2df
22543 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22544 tree v2df_ftype_v2si
22545 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22546 tree v2df_ftype_v4sf
22547 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22548 tree int_ftype_v2df
22549 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22550 tree int64_ftype_v2df
22551 = build_function_type_list (long_long_integer_type_node,
22552 V2DF_type_node, NULL_TREE);
22553 tree v2df_ftype_v2df_int
22554 = build_function_type_list (V2DF_type_node,
22555 V2DF_type_node, integer_type_node, NULL_TREE);
22556 tree v2df_ftype_v2df_int64
22557 = build_function_type_list (V2DF_type_node,
22558 V2DF_type_node, long_long_integer_type_node,
22560 tree v4sf_ftype_v4sf_v2df
22561 = build_function_type_list (V4SF_type_node,
22562 V4SF_type_node, V2DF_type_node, NULL_TREE);
22563 tree v2df_ftype_v2df_v4sf
22564 = build_function_type_list (V2DF_type_node,
22565 V2DF_type_node, V4SF_type_node, NULL_TREE);
22566 tree v2df_ftype_v2df_v2df_int
22567 = build_function_type_list (V2DF_type_node,
22568 V2DF_type_node, V2DF_type_node,
22571 tree v2df_ftype_v2df_pcdouble
22572 = build_function_type_list (V2DF_type_node,
22573 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22574 tree void_ftype_pdouble_v2df
22575 = build_function_type_list (void_type_node,
22576 pdouble_type_node, V2DF_type_node, NULL_TREE);
22577 tree void_ftype_pint_int
22578 = build_function_type_list (void_type_node,
22579 pint_type_node, integer_type_node, NULL_TREE);
22580 tree void_ftype_v16qi_v16qi_pchar
22581 = build_function_type_list (void_type_node,
22582 V16QI_type_node, V16QI_type_node,
22583 pchar_type_node, NULL_TREE);
22584 tree v2df_ftype_pcdouble
22585 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22586 tree v2df_ftype_v2df_v2df
22587 = build_function_type_list (V2DF_type_node,
22588 V2DF_type_node, V2DF_type_node, NULL_TREE);
22589 tree v16qi_ftype_v16qi_v16qi
22590 = build_function_type_list (V16QI_type_node,
22591 V16QI_type_node, V16QI_type_node, NULL_TREE);
22592 tree v8hi_ftype_v8hi_v8hi
22593 = build_function_type_list (V8HI_type_node,
22594 V8HI_type_node, V8HI_type_node, NULL_TREE);
22595 tree v4si_ftype_v4si_v4si
22596 = build_function_type_list (V4SI_type_node,
22597 V4SI_type_node, V4SI_type_node, NULL_TREE);
22598 tree v2di_ftype_v2di_v2di
22599 = build_function_type_list (V2DI_type_node,
22600 V2DI_type_node, V2DI_type_node, NULL_TREE);
22601 tree v2di_ftype_v2df_v2df
22602 = build_function_type_list (V2DI_type_node,
22603 V2DF_type_node, V2DF_type_node, NULL_TREE);
22604 tree v2df_ftype_v2df
22605 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22606 tree v2di_ftype_v2di_int
22607 = build_function_type_list (V2DI_type_node,
22608 V2DI_type_node, integer_type_node, NULL_TREE);
22609 tree v2di_ftype_v2di_v2di_int
22610 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22611 V2DI_type_node, integer_type_node, NULL_TREE);
22612 tree v4si_ftype_v4si_int
22613 = build_function_type_list (V4SI_type_node,
22614 V4SI_type_node, integer_type_node, NULL_TREE);
22615 tree v8hi_ftype_v8hi_int
22616 = build_function_type_list (V8HI_type_node,
22617 V8HI_type_node, integer_type_node, NULL_TREE);
22618 tree v4si_ftype_v8hi_v8hi
22619 = build_function_type_list (V4SI_type_node,
22620 V8HI_type_node, V8HI_type_node, NULL_TREE);
22621 tree v1di_ftype_v8qi_v8qi
22622 = build_function_type_list (V1DI_type_node,
22623 V8QI_type_node, V8QI_type_node, NULL_TREE);
22624 tree v1di_ftype_v2si_v2si
22625 = build_function_type_list (V1DI_type_node,
22626 V2SI_type_node, V2SI_type_node, NULL_TREE);
22627 tree v2di_ftype_v16qi_v16qi
22628 = build_function_type_list (V2DI_type_node,
22629 V16QI_type_node, V16QI_type_node, NULL_TREE);
22630 tree v2di_ftype_v4si_v4si
22631 = build_function_type_list (V2DI_type_node,
22632 V4SI_type_node, V4SI_type_node, NULL_TREE);
22633 tree int_ftype_v16qi
22634 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22635 tree v16qi_ftype_pcchar
22636 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22637 tree void_ftype_pchar_v16qi
22638 = build_function_type_list (void_type_node,
22639 pchar_type_node, V16QI_type_node, NULL_TREE);
22641 tree v2di_ftype_v2di_unsigned_unsigned
22642 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22643 unsigned_type_node, unsigned_type_node,
22645 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22646 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22647 unsigned_type_node, unsigned_type_node,
22649 tree v2di_ftype_v2di_v16qi
22650 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22652 tree v2df_ftype_v2df_v2df_v2df
22653 = build_function_type_list (V2DF_type_node,
22654 V2DF_type_node, V2DF_type_node,
22655 V2DF_type_node, NULL_TREE);
22656 tree v4sf_ftype_v4sf_v4sf_v4sf
22657 = build_function_type_list (V4SF_type_node,
22658 V4SF_type_node, V4SF_type_node,
22659 V4SF_type_node, NULL_TREE);
22660 tree v8hi_ftype_v16qi
22661 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22663 tree v4si_ftype_v16qi
22664 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22666 tree v2di_ftype_v16qi
22667 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22669 tree v4si_ftype_v8hi
22670 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22672 tree v2di_ftype_v8hi
22673 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22675 tree v2di_ftype_v4si
22676 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22678 tree v2di_ftype_pv2di
22679 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22681 tree v16qi_ftype_v16qi_v16qi_int
22682 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22683 V16QI_type_node, integer_type_node,
22685 tree v16qi_ftype_v16qi_v16qi_v16qi
22686 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22687 V16QI_type_node, V16QI_type_node,
22689 tree v8hi_ftype_v8hi_v8hi_int
22690 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22691 V8HI_type_node, integer_type_node,
22693 tree v4si_ftype_v4si_v4si_int
22694 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22695 V4SI_type_node, integer_type_node,
22697 tree int_ftype_v2di_v2di
22698 = build_function_type_list (integer_type_node,
22699 V2DI_type_node, V2DI_type_node,
22701 tree int_ftype_v16qi_int_v16qi_int_int
22702 = build_function_type_list (integer_type_node,
22709 tree v16qi_ftype_v16qi_int_v16qi_int_int
22710 = build_function_type_list (V16QI_type_node,
22717 tree int_ftype_v16qi_v16qi_int
22718 = build_function_type_list (integer_type_node,
22724 /* SSE5 instructions */
22725 tree v2di_ftype_v2di_v2di_v2di
22726 = build_function_type_list (V2DI_type_node,
22732 tree v4si_ftype_v4si_v4si_v4si
22733 = build_function_type_list (V4SI_type_node,
22739 tree v4si_ftype_v4si_v4si_v2di
22740 = build_function_type_list (V4SI_type_node,
22746 tree v8hi_ftype_v8hi_v8hi_v8hi
22747 = build_function_type_list (V8HI_type_node,
22753 tree v8hi_ftype_v8hi_v8hi_v4si
22754 = build_function_type_list (V8HI_type_node,
22760 tree v2df_ftype_v2df_v2df_v16qi
22761 = build_function_type_list (V2DF_type_node,
22767 tree v4sf_ftype_v4sf_v4sf_v16qi
22768 = build_function_type_list (V4SF_type_node,
22774 tree v2di_ftype_v2di_si
22775 = build_function_type_list (V2DI_type_node,
22780 tree v4si_ftype_v4si_si
22781 = build_function_type_list (V4SI_type_node,
22786 tree v8hi_ftype_v8hi_si
22787 = build_function_type_list (V8HI_type_node,
22792 tree v16qi_ftype_v16qi_si
22793 = build_function_type_list (V16QI_type_node,
22797 tree v4sf_ftype_v4hi
22798 = build_function_type_list (V4SF_type_node,
22802 tree v4hi_ftype_v4sf
22803 = build_function_type_list (V4HI_type_node,
22807 tree v2di_ftype_v2di
22808 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22810 tree v16qi_ftype_v8hi_v8hi
22811 = build_function_type_list (V16QI_type_node,
22812 V8HI_type_node, V8HI_type_node,
22814 tree v8hi_ftype_v4si_v4si
22815 = build_function_type_list (V8HI_type_node,
22816 V4SI_type_node, V4SI_type_node,
22818 tree v8hi_ftype_v16qi_v16qi
22819 = build_function_type_list (V8HI_type_node,
22820 V16QI_type_node, V16QI_type_node,
22822 tree v4hi_ftype_v8qi_v8qi
22823 = build_function_type_list (V4HI_type_node,
22824 V8QI_type_node, V8QI_type_node,
22826 tree unsigned_ftype_unsigned_uchar
22827 = build_function_type_list (unsigned_type_node,
22828 unsigned_type_node,
22829 unsigned_char_type_node,
22831 tree unsigned_ftype_unsigned_ushort
22832 = build_function_type_list (unsigned_type_node,
22833 unsigned_type_node,
22834 short_unsigned_type_node,
22836 tree unsigned_ftype_unsigned_unsigned
22837 = build_function_type_list (unsigned_type_node,
22838 unsigned_type_node,
22839 unsigned_type_node,
22841 tree uint64_ftype_uint64_uint64
22842 = build_function_type_list (long_long_unsigned_type_node,
22843 long_long_unsigned_type_node,
22844 long_long_unsigned_type_node,
22846 tree float_ftype_float
22847 = build_function_type_list (float_type_node,
22852 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22854 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22856 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22858 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22860 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22862 tree v8sf_ftype_v8sf
22863 = build_function_type_list (V8SF_type_node,
22866 tree v8si_ftype_v8sf
22867 = build_function_type_list (V8SI_type_node,
22870 tree v8sf_ftype_v8si
22871 = build_function_type_list (V8SF_type_node,
22874 tree v4si_ftype_v4df
22875 = build_function_type_list (V4SI_type_node,
22878 tree v4df_ftype_v4df
22879 = build_function_type_list (V4DF_type_node,
22882 tree v4df_ftype_v4si
22883 = build_function_type_list (V4DF_type_node,
22886 tree v4df_ftype_v4sf
22887 = build_function_type_list (V4DF_type_node,
22890 tree v4sf_ftype_v4df
22891 = build_function_type_list (V4SF_type_node,
22894 tree v8sf_ftype_v8sf_v8sf
22895 = build_function_type_list (V8SF_type_node,
22896 V8SF_type_node, V8SF_type_node,
22898 tree v4df_ftype_v4df_v4df
22899 = build_function_type_list (V4DF_type_node,
22900 V4DF_type_node, V4DF_type_node,
22902 tree v8sf_ftype_v8sf_int
22903 = build_function_type_list (V8SF_type_node,
22904 V8SF_type_node, integer_type_node,
22906 tree v4si_ftype_v8si_int
22907 = build_function_type_list (V4SI_type_node,
22908 V8SI_type_node, integer_type_node,
22910 tree v4df_ftype_v4df_int
22911 = build_function_type_list (V4DF_type_node,
22912 V4DF_type_node, integer_type_node,
22914 tree v4sf_ftype_v8sf_int
22915 = build_function_type_list (V4SF_type_node,
22916 V8SF_type_node, integer_type_node,
22918 tree v2df_ftype_v4df_int
22919 = build_function_type_list (V2DF_type_node,
22920 V4DF_type_node, integer_type_node,
22922 tree v8sf_ftype_v8sf_v8sf_int
22923 = build_function_type_list (V8SF_type_node,
22924 V8SF_type_node, V8SF_type_node,
22927 tree v8sf_ftype_v8sf_v8sf_v8sf
22928 = build_function_type_list (V8SF_type_node,
22929 V8SF_type_node, V8SF_type_node,
22932 tree v4df_ftype_v4df_v4df_v4df
22933 = build_function_type_list (V4DF_type_node,
22934 V4DF_type_node, V4DF_type_node,
22937 tree v8si_ftype_v8si_v8si_int
22938 = build_function_type_list (V8SI_type_node,
22939 V8SI_type_node, V8SI_type_node,
22942 tree v4df_ftype_v4df_v4df_int
22943 = build_function_type_list (V4DF_type_node,
22944 V4DF_type_node, V4DF_type_node,
22947 tree v8sf_ftype_pcfloat
22948 = build_function_type_list (V8SF_type_node,
22951 tree v4df_ftype_pcdouble
22952 = build_function_type_list (V4DF_type_node,
22953 pcdouble_type_node,
22955 tree pcv4sf_type_node
22956 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22957 tree pcv2df_type_node
22958 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22959 tree v8sf_ftype_pcv4sf
22960 = build_function_type_list (V8SF_type_node,
22963 tree v4df_ftype_pcv2df
22964 = build_function_type_list (V4DF_type_node,
22967 tree v32qi_ftype_pcchar
22968 = build_function_type_list (V32QI_type_node,
22971 tree void_ftype_pchar_v32qi
22972 = build_function_type_list (void_type_node,
22973 pchar_type_node, V32QI_type_node,
22975 tree v8si_ftype_v8si_v4si_int
22976 = build_function_type_list (V8SI_type_node,
22977 V8SI_type_node, V4SI_type_node,
22980 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22981 tree void_ftype_pv4di_v4di
22982 = build_function_type_list (void_type_node,
22983 pv4di_type_node, V4DI_type_node,
22985 tree v8sf_ftype_v8sf_v4sf_int
22986 = build_function_type_list (V8SF_type_node,
22987 V8SF_type_node, V4SF_type_node,
22990 tree v4df_ftype_v4df_v2df_int
22991 = build_function_type_list (V4DF_type_node,
22992 V4DF_type_node, V2DF_type_node,
22995 tree void_ftype_pfloat_v8sf
22996 = build_function_type_list (void_type_node,
22997 pfloat_type_node, V8SF_type_node,
22999 tree void_ftype_pdouble_v4df
23000 = build_function_type_list (void_type_node,
23001 pdouble_type_node, V4DF_type_node,
23003 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
23004 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
23005 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
23006 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
23007 tree pcv8sf_type_node
23008 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
23009 tree pcv4df_type_node
23010 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
23011 tree v8sf_ftype_pcv8sf_v8sf
23012 = build_function_type_list (V8SF_type_node,
23013 pcv8sf_type_node, V8SF_type_node,
23015 tree v4df_ftype_pcv4df_v4df
23016 = build_function_type_list (V4DF_type_node,
23017 pcv4df_type_node, V4DF_type_node,
23019 tree v4sf_ftype_pcv4sf_v4sf
23020 = build_function_type_list (V4SF_type_node,
23021 pcv4sf_type_node, V4SF_type_node,
23023 tree v2df_ftype_pcv2df_v2df
23024 = build_function_type_list (V2DF_type_node,
23025 pcv2df_type_node, V2DF_type_node,
23027 tree void_ftype_pv8sf_v8sf_v8sf
23028 = build_function_type_list (void_type_node,
23029 pv8sf_type_node, V8SF_type_node,
23032 tree void_ftype_pv4df_v4df_v4df
23033 = build_function_type_list (void_type_node,
23034 pv4df_type_node, V4DF_type_node,
23037 tree void_ftype_pv4sf_v4sf_v4sf
23038 = build_function_type_list (void_type_node,
23039 pv4sf_type_node, V4SF_type_node,
23042 tree void_ftype_pv2df_v2df_v2df
23043 = build_function_type_list (void_type_node,
23044 pv2df_type_node, V2DF_type_node,
23047 tree v4df_ftype_v2df
23048 = build_function_type_list (V4DF_type_node,
23051 tree v8sf_ftype_v4sf
23052 = build_function_type_list (V8SF_type_node,
23055 tree v8si_ftype_v4si
23056 = build_function_type_list (V8SI_type_node,
23059 tree v2df_ftype_v4df
23060 = build_function_type_list (V2DF_type_node,
23063 tree v4sf_ftype_v8sf
23064 = build_function_type_list (V4SF_type_node,
23067 tree v4si_ftype_v8si
23068 = build_function_type_list (V4SI_type_node,
23071 tree int_ftype_v4df
23072 = build_function_type_list (integer_type_node,
23075 tree int_ftype_v8sf
23076 = build_function_type_list (integer_type_node,
23079 tree int_ftype_v8sf_v8sf
23080 = build_function_type_list (integer_type_node,
23081 V8SF_type_node, V8SF_type_node,
23083 tree int_ftype_v4di_v4di
23084 = build_function_type_list (integer_type_node,
23085 V4DI_type_node, V4DI_type_node,
23087 tree int_ftype_v4df_v4df
23088 = build_function_type_list (integer_type_node,
23089 V4DF_type_node, V4DF_type_node,
23091 tree v8sf_ftype_v8sf_v8si
23092 = build_function_type_list (V8SF_type_node,
23093 V8SF_type_node, V8SI_type_node,
23095 tree v4df_ftype_v4df_v4di
23096 = build_function_type_list (V4DF_type_node,
23097 V4DF_type_node, V4DI_type_node,
23099 tree v4sf_ftype_v4sf_v4si
23100 = build_function_type_list (V4SF_type_node,
23101 V4SF_type_node, V4SI_type_node, NULL_TREE);
23102 tree v2df_ftype_v2df_v2di
23103 = build_function_type_list (V2DF_type_node,
23104 V2DF_type_node, V2DI_type_node, NULL_TREE);
23108 /* Add all special builtins with variable number of operands. */
23109 for (i = 0, d = bdesc_special_args;
23110 i < ARRAY_SIZE (bdesc_special_args);
23118 switch ((enum ix86_special_builtin_type) d->flag)
23120 case VOID_FTYPE_VOID:
23121 type = void_ftype_void;
23123 case V32QI_FTYPE_PCCHAR:
23124 type = v32qi_ftype_pcchar;
23126 case V16QI_FTYPE_PCCHAR:
23127 type = v16qi_ftype_pcchar;
23129 case V8SF_FTYPE_PCV4SF:
23130 type = v8sf_ftype_pcv4sf;
23132 case V8SF_FTYPE_PCFLOAT:
23133 type = v8sf_ftype_pcfloat;
23135 case V4DF_FTYPE_PCV2DF:
23136 type = v4df_ftype_pcv2df;
23138 case V4DF_FTYPE_PCDOUBLE:
23139 type = v4df_ftype_pcdouble;
23141 case V4SF_FTYPE_PCFLOAT:
23142 type = v4sf_ftype_pcfloat;
23144 case V2DI_FTYPE_PV2DI:
23145 type = v2di_ftype_pv2di;
23147 case V2DF_FTYPE_PCDOUBLE:
23148 type = v2df_ftype_pcdouble;
23150 case V8SF_FTYPE_PCV8SF_V8SF:
23151 type = v8sf_ftype_pcv8sf_v8sf;
23153 case V4DF_FTYPE_PCV4DF_V4DF:
23154 type = v4df_ftype_pcv4df_v4df;
23156 case V4SF_FTYPE_V4SF_PCV2SF:
23157 type = v4sf_ftype_v4sf_pcv2sf;
23159 case V4SF_FTYPE_PCV4SF_V4SF:
23160 type = v4sf_ftype_pcv4sf_v4sf;
23162 case V2DF_FTYPE_V2DF_PCDOUBLE:
23163 type = v2df_ftype_v2df_pcdouble;
23165 case V2DF_FTYPE_PCV2DF_V2DF:
23166 type = v2df_ftype_pcv2df_v2df;
23168 case VOID_FTYPE_PV2SF_V4SF:
23169 type = void_ftype_pv2sf_v4sf;
23171 case VOID_FTYPE_PV4DI_V4DI:
23172 type = void_ftype_pv4di_v4di;
23174 case VOID_FTYPE_PV2DI_V2DI:
23175 type = void_ftype_pv2di_v2di;
23177 case VOID_FTYPE_PCHAR_V32QI:
23178 type = void_ftype_pchar_v32qi;
23180 case VOID_FTYPE_PCHAR_V16QI:
23181 type = void_ftype_pchar_v16qi;
23183 case VOID_FTYPE_PFLOAT_V8SF:
23184 type = void_ftype_pfloat_v8sf;
23186 case VOID_FTYPE_PFLOAT_V4SF:
23187 type = void_ftype_pfloat_v4sf;
23189 case VOID_FTYPE_PDOUBLE_V4DF:
23190 type = void_ftype_pdouble_v4df;
23192 case VOID_FTYPE_PDOUBLE_V2DF:
23193 type = void_ftype_pdouble_v2df;
23195 case VOID_FTYPE_PDI_DI:
23196 type = void_ftype_pdi_di;
23198 case VOID_FTYPE_PINT_INT:
23199 type = void_ftype_pint_int;
23201 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23202 type = void_ftype_pv8sf_v8sf_v8sf;
23204 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23205 type = void_ftype_pv4df_v4df_v4df;
23207 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23208 type = void_ftype_pv4sf_v4sf_v4sf;
23210 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23211 type = void_ftype_pv2df_v2df_v2df;
23214 gcc_unreachable ();
23217 def_builtin (d->mask, d->name, type, d->code);
23220 /* Add all builtins with variable number of operands. */
23221 for (i = 0, d = bdesc_args;
23222 i < ARRAY_SIZE (bdesc_args);
23230 switch ((enum ix86_builtin_type) d->flag)
23232 case FLOAT_FTYPE_FLOAT:
23233 type = float_ftype_float;
23235 case INT_FTYPE_V8SF_V8SF_PTEST:
23236 type = int_ftype_v8sf_v8sf;
23238 case INT_FTYPE_V4DI_V4DI_PTEST:
23239 type = int_ftype_v4di_v4di;
23241 case INT_FTYPE_V4DF_V4DF_PTEST:
23242 type = int_ftype_v4df_v4df;
23244 case INT_FTYPE_V4SF_V4SF_PTEST:
23245 type = int_ftype_v4sf_v4sf;
23247 case INT_FTYPE_V2DI_V2DI_PTEST:
23248 type = int_ftype_v2di_v2di;
23250 case INT_FTYPE_V2DF_V2DF_PTEST:
23251 type = int_ftype_v2df_v2df;
23253 case INT64_FTYPE_V4SF:
23254 type = int64_ftype_v4sf;
23256 case INT64_FTYPE_V2DF:
23257 type = int64_ftype_v2df;
23259 case INT_FTYPE_V16QI:
23260 type = int_ftype_v16qi;
23262 case INT_FTYPE_V8QI:
23263 type = int_ftype_v8qi;
23265 case INT_FTYPE_V8SF:
23266 type = int_ftype_v8sf;
23268 case INT_FTYPE_V4DF:
23269 type = int_ftype_v4df;
23271 case INT_FTYPE_V4SF:
23272 type = int_ftype_v4sf;
23274 case INT_FTYPE_V2DF:
23275 type = int_ftype_v2df;
23277 case V16QI_FTYPE_V16QI:
23278 type = v16qi_ftype_v16qi;
23280 case V8SI_FTYPE_V8SF:
23281 type = v8si_ftype_v8sf;
23283 case V8SI_FTYPE_V4SI:
23284 type = v8si_ftype_v4si;
23286 case V8HI_FTYPE_V8HI:
23287 type = v8hi_ftype_v8hi;
23289 case V8HI_FTYPE_V16QI:
23290 type = v8hi_ftype_v16qi;
23292 case V8QI_FTYPE_V8QI:
23293 type = v8qi_ftype_v8qi;
23295 case V8SF_FTYPE_V8SF:
23296 type = v8sf_ftype_v8sf;
23298 case V8SF_FTYPE_V8SI:
23299 type = v8sf_ftype_v8si;
23301 case V8SF_FTYPE_V4SF:
23302 type = v8sf_ftype_v4sf;
23304 case V4SI_FTYPE_V4DF:
23305 type = v4si_ftype_v4df;
23307 case V4SI_FTYPE_V4SI:
23308 type = v4si_ftype_v4si;
23310 case V4SI_FTYPE_V16QI:
23311 type = v4si_ftype_v16qi;
23313 case V4SI_FTYPE_V8SI:
23314 type = v4si_ftype_v8si;
23316 case V4SI_FTYPE_V8HI:
23317 type = v4si_ftype_v8hi;
23319 case V4SI_FTYPE_V4SF:
23320 type = v4si_ftype_v4sf;
23322 case V4SI_FTYPE_V2DF:
23323 type = v4si_ftype_v2df;
23325 case V4HI_FTYPE_V4HI:
23326 type = v4hi_ftype_v4hi;
23328 case V4DF_FTYPE_V4DF:
23329 type = v4df_ftype_v4df;
23331 case V4DF_FTYPE_V4SI:
23332 type = v4df_ftype_v4si;
23334 case V4DF_FTYPE_V4SF:
23335 type = v4df_ftype_v4sf;
23337 case V4DF_FTYPE_V2DF:
23338 type = v4df_ftype_v2df;
23340 case V4SF_FTYPE_V4SF:
23341 case V4SF_FTYPE_V4SF_VEC_MERGE:
23342 type = v4sf_ftype_v4sf;
23344 case V4SF_FTYPE_V8SF:
23345 type = v4sf_ftype_v8sf;
23347 case V4SF_FTYPE_V4SI:
23348 type = v4sf_ftype_v4si;
23350 case V4SF_FTYPE_V4DF:
23351 type = v4sf_ftype_v4df;
23353 case V4SF_FTYPE_V2DF:
23354 type = v4sf_ftype_v2df;
23356 case V2DI_FTYPE_V2DI:
23357 type = v2di_ftype_v2di;
23359 case V2DI_FTYPE_V16QI:
23360 type = v2di_ftype_v16qi;
23362 case V2DI_FTYPE_V8HI:
23363 type = v2di_ftype_v8hi;
23365 case V2DI_FTYPE_V4SI:
23366 type = v2di_ftype_v4si;
23368 case V2SI_FTYPE_V2SI:
23369 type = v2si_ftype_v2si;
23371 case V2SI_FTYPE_V4SF:
23372 type = v2si_ftype_v4sf;
23374 case V2SI_FTYPE_V2DF:
23375 type = v2si_ftype_v2df;
23377 case V2SI_FTYPE_V2SF:
23378 type = v2si_ftype_v2sf;
23380 case V2DF_FTYPE_V4DF:
23381 type = v2df_ftype_v4df;
23383 case V2DF_FTYPE_V4SF:
23384 type = v2df_ftype_v4sf;
23386 case V2DF_FTYPE_V2DF:
23387 case V2DF_FTYPE_V2DF_VEC_MERGE:
23388 type = v2df_ftype_v2df;
23390 case V2DF_FTYPE_V2SI:
23391 type = v2df_ftype_v2si;
23393 case V2DF_FTYPE_V4SI:
23394 type = v2df_ftype_v4si;
23396 case V2SF_FTYPE_V2SF:
23397 type = v2sf_ftype_v2sf;
23399 case V2SF_FTYPE_V2SI:
23400 type = v2sf_ftype_v2si;
23402 case V16QI_FTYPE_V16QI_V16QI:
23403 type = v16qi_ftype_v16qi_v16qi;
23405 case V16QI_FTYPE_V8HI_V8HI:
23406 type = v16qi_ftype_v8hi_v8hi;
23408 case V8QI_FTYPE_V8QI_V8QI:
23409 type = v8qi_ftype_v8qi_v8qi;
23411 case V8QI_FTYPE_V4HI_V4HI:
23412 type = v8qi_ftype_v4hi_v4hi;
23414 case V8HI_FTYPE_V8HI_V8HI:
23415 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23416 type = v8hi_ftype_v8hi_v8hi;
23418 case V8HI_FTYPE_V16QI_V16QI:
23419 type = v8hi_ftype_v16qi_v16qi;
23421 case V8HI_FTYPE_V4SI_V4SI:
23422 type = v8hi_ftype_v4si_v4si;
23424 case V8HI_FTYPE_V8HI_SI_COUNT:
23425 type = v8hi_ftype_v8hi_int;
23427 case V8SF_FTYPE_V8SF_V8SF:
23428 type = v8sf_ftype_v8sf_v8sf;
23430 case V8SF_FTYPE_V8SF_V8SI:
23431 type = v8sf_ftype_v8sf_v8si;
23433 case V4SI_FTYPE_V4SI_V4SI:
23434 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23435 type = v4si_ftype_v4si_v4si;
23437 case V4SI_FTYPE_V8HI_V8HI:
23438 type = v4si_ftype_v8hi_v8hi;
23440 case V4SI_FTYPE_V4SF_V4SF:
23441 type = v4si_ftype_v4sf_v4sf;
23443 case V4SI_FTYPE_V2DF_V2DF:
23444 type = v4si_ftype_v2df_v2df;
23446 case V4SI_FTYPE_V4SI_SI_COUNT:
23447 type = v4si_ftype_v4si_int;
23449 case V4HI_FTYPE_V4HI_V4HI:
23450 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23451 type = v4hi_ftype_v4hi_v4hi;
23453 case V4HI_FTYPE_V8QI_V8QI:
23454 type = v4hi_ftype_v8qi_v8qi;
23456 case V4HI_FTYPE_V2SI_V2SI:
23457 type = v4hi_ftype_v2si_v2si;
23459 case V4HI_FTYPE_V4HI_SI_COUNT:
23460 type = v4hi_ftype_v4hi_int;
23462 case V4DF_FTYPE_V4DF_V4DF:
23463 type = v4df_ftype_v4df_v4df;
23465 case V4DF_FTYPE_V4DF_V4DI:
23466 type = v4df_ftype_v4df_v4di;
23468 case V4SF_FTYPE_V4SF_V4SF:
23469 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23470 type = v4sf_ftype_v4sf_v4sf;
23472 case V4SF_FTYPE_V4SF_V4SI:
23473 type = v4sf_ftype_v4sf_v4si;
23475 case V4SF_FTYPE_V4SF_V2SI:
23476 type = v4sf_ftype_v4sf_v2si;
23478 case V4SF_FTYPE_V4SF_V2DF:
23479 type = v4sf_ftype_v4sf_v2df;
23481 case V4SF_FTYPE_V4SF_DI:
23482 type = v4sf_ftype_v4sf_int64;
23484 case V4SF_FTYPE_V4SF_SI:
23485 type = v4sf_ftype_v4sf_int;
23487 case V2DI_FTYPE_V2DI_V2DI:
23488 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23489 type = v2di_ftype_v2di_v2di;
23491 case V2DI_FTYPE_V16QI_V16QI:
23492 type = v2di_ftype_v16qi_v16qi;
23494 case V2DI_FTYPE_V4SI_V4SI:
23495 type = v2di_ftype_v4si_v4si;
23497 case V2DI_FTYPE_V2DI_V16QI:
23498 type = v2di_ftype_v2di_v16qi;
23500 case V2DI_FTYPE_V2DF_V2DF:
23501 type = v2di_ftype_v2df_v2df;
23503 case V2DI_FTYPE_V2DI_SI_COUNT:
23504 type = v2di_ftype_v2di_int;
23506 case V2SI_FTYPE_V2SI_V2SI:
23507 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23508 type = v2si_ftype_v2si_v2si;
23510 case V2SI_FTYPE_V4HI_V4HI:
23511 type = v2si_ftype_v4hi_v4hi;
23513 case V2SI_FTYPE_V2SF_V2SF:
23514 type = v2si_ftype_v2sf_v2sf;
23516 case V2SI_FTYPE_V2SI_SI_COUNT:
23517 type = v2si_ftype_v2si_int;
23519 case V2DF_FTYPE_V2DF_V2DF:
23520 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23521 type = v2df_ftype_v2df_v2df;
23523 case V2DF_FTYPE_V2DF_V4SF:
23524 type = v2df_ftype_v2df_v4sf;
23526 case V2DF_FTYPE_V2DF_V2DI:
23527 type = v2df_ftype_v2df_v2di;
23529 case V2DF_FTYPE_V2DF_DI:
23530 type = v2df_ftype_v2df_int64;
23532 case V2DF_FTYPE_V2DF_SI:
23533 type = v2df_ftype_v2df_int;
23535 case V2SF_FTYPE_V2SF_V2SF:
23536 type = v2sf_ftype_v2sf_v2sf;
23538 case V1DI_FTYPE_V1DI_V1DI:
23539 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23540 type = v1di_ftype_v1di_v1di;
23542 case V1DI_FTYPE_V8QI_V8QI:
23543 type = v1di_ftype_v8qi_v8qi;
23545 case V1DI_FTYPE_V2SI_V2SI:
23546 type = v1di_ftype_v2si_v2si;
23548 case V1DI_FTYPE_V1DI_SI_COUNT:
23549 type = v1di_ftype_v1di_int;
23551 case UINT64_FTYPE_UINT64_UINT64:
23552 type = uint64_ftype_uint64_uint64;
23554 case UINT_FTYPE_UINT_UINT:
23555 type = unsigned_ftype_unsigned_unsigned;
23557 case UINT_FTYPE_UINT_USHORT:
23558 type = unsigned_ftype_unsigned_ushort;
23560 case UINT_FTYPE_UINT_UCHAR:
23561 type = unsigned_ftype_unsigned_uchar;
23563 case V8HI_FTYPE_V8HI_INT:
23564 type = v8hi_ftype_v8hi_int;
23566 case V8SF_FTYPE_V8SF_INT:
23567 type = v8sf_ftype_v8sf_int;
23569 case V4SI_FTYPE_V4SI_INT:
23570 type = v4si_ftype_v4si_int;
23572 case V4SI_FTYPE_V8SI_INT:
23573 type = v4si_ftype_v8si_int;
23575 case V4HI_FTYPE_V4HI_INT:
23576 type = v4hi_ftype_v4hi_int;
23578 case V4DF_FTYPE_V4DF_INT:
23579 type = v4df_ftype_v4df_int;
23581 case V4SF_FTYPE_V4SF_INT:
23582 type = v4sf_ftype_v4sf_int;
23584 case V4SF_FTYPE_V8SF_INT:
23585 type = v4sf_ftype_v8sf_int;
23587 case V2DI_FTYPE_V2DI_INT:
23588 case V2DI2TI_FTYPE_V2DI_INT:
23589 type = v2di_ftype_v2di_int;
23591 case V2DF_FTYPE_V2DF_INT:
23592 type = v2df_ftype_v2df_int;
23594 case V2DF_FTYPE_V4DF_INT:
23595 type = v2df_ftype_v4df_int;
23597 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23598 type = v16qi_ftype_v16qi_v16qi_v16qi;
23600 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23601 type = v8sf_ftype_v8sf_v8sf_v8sf;
23603 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23604 type = v4df_ftype_v4df_v4df_v4df;
23606 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23607 type = v4sf_ftype_v4sf_v4sf_v4sf;
23609 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23610 type = v2df_ftype_v2df_v2df_v2df;
23612 case V16QI_FTYPE_V16QI_V16QI_INT:
23613 type = v16qi_ftype_v16qi_v16qi_int;
23615 case V8SI_FTYPE_V8SI_V8SI_INT:
23616 type = v8si_ftype_v8si_v8si_int;
23618 case V8SI_FTYPE_V8SI_V4SI_INT:
23619 type = v8si_ftype_v8si_v4si_int;
23621 case V8HI_FTYPE_V8HI_V8HI_INT:
23622 type = v8hi_ftype_v8hi_v8hi_int;
23624 case V8SF_FTYPE_V8SF_V8SF_INT:
23625 type = v8sf_ftype_v8sf_v8sf_int;
23627 case V8SF_FTYPE_V8SF_V4SF_INT:
23628 type = v8sf_ftype_v8sf_v4sf_int;
23630 case V4SI_FTYPE_V4SI_V4SI_INT:
23631 type = v4si_ftype_v4si_v4si_int;
23633 case V4DF_FTYPE_V4DF_V4DF_INT:
23634 type = v4df_ftype_v4df_v4df_int;
23636 case V4DF_FTYPE_V4DF_V2DF_INT:
23637 type = v4df_ftype_v4df_v2df_int;
23639 case V4SF_FTYPE_V4SF_V4SF_INT:
23640 type = v4sf_ftype_v4sf_v4sf_int;
23642 case V2DI_FTYPE_V2DI_V2DI_INT:
23643 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23644 type = v2di_ftype_v2di_v2di_int;
23646 case V2DF_FTYPE_V2DF_V2DF_INT:
23647 type = v2df_ftype_v2df_v2df_int;
23649 case V2DI_FTYPE_V2DI_UINT_UINT:
23650 type = v2di_ftype_v2di_unsigned_unsigned;
23652 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23653 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23655 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23656 type = v1di_ftype_v1di_v1di_int;
23659 gcc_unreachable ();
23662 def_builtin_const (d->mask, d->name, type, d->code);
23665 /* pcmpestr[im] insns. */
23666 for (i = 0, d = bdesc_pcmpestr;
23667 i < ARRAY_SIZE (bdesc_pcmpestr);
23670 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23671 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23673 ftype = int_ftype_v16qi_int_v16qi_int_int;
23674 def_builtin_const (d->mask, d->name, ftype, d->code);
23677 /* pcmpistr[im] insns. */
23678 for (i = 0, d = bdesc_pcmpistr;
23679 i < ARRAY_SIZE (bdesc_pcmpistr);
23682 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23683 ftype = v16qi_ftype_v16qi_v16qi_int;
23685 ftype = int_ftype_v16qi_v16qi_int;
23686 def_builtin_const (d->mask, d->name, ftype, d->code);
23689 /* comi/ucomi insns. */
23690 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23691 if (d->mask == OPTION_MASK_ISA_SSE2)
23692 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23694 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23697 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23698 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23700 /* SSE or 3DNow!A */
23701 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23704 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23706 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23707 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23710 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23711 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23714 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23715 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23716 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23717 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23718 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23719 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23722 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23725 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23726 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23728 /* Access to the vec_init patterns. */
23729 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23730 integer_type_node, NULL_TREE);
23731 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23733 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23734 short_integer_type_node,
23735 short_integer_type_node,
23736 short_integer_type_node, NULL_TREE);
23737 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23739 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23740 char_type_node, char_type_node,
23741 char_type_node, char_type_node,
23742 char_type_node, char_type_node,
23743 char_type_node, NULL_TREE);
23744 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23746 /* Access to the vec_extract patterns. */
23747 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23748 integer_type_node, NULL_TREE);
23749 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23751 ftype = build_function_type_list (long_long_integer_type_node,
23752 V2DI_type_node, integer_type_node,
23754 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23756 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23757 integer_type_node, NULL_TREE);
23758 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23760 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23761 integer_type_node, NULL_TREE);
23762 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23764 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23765 integer_type_node, NULL_TREE);
23766 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23768 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23769 integer_type_node, NULL_TREE);
23770 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23772 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23773 integer_type_node, NULL_TREE);
23774 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23776 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23777 integer_type_node, NULL_TREE);
23778 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23780 /* Access to the vec_set patterns. */
23781 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23783 integer_type_node, NULL_TREE);
23784 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23786 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23788 integer_type_node, NULL_TREE);
23789 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23791 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23793 integer_type_node, NULL_TREE);
23794 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23796 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23798 integer_type_node, NULL_TREE);
23799 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23801 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23803 integer_type_node, NULL_TREE);
23804 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23806 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23808 integer_type_node, NULL_TREE);
23809 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23811 /* Add SSE5 multi-arg argument instructions */
23812 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23814 tree mtype = NULL_TREE;
23819 switch ((enum multi_arg_type)d->flag)
23821 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23822 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23823 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23824 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23825 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23826 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23827 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23828 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23829 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23830 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23831 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23832 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23833 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23834 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23835 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23836 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23837 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23838 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23839 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23840 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23841 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23842 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23843 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23844 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23845 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23846 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23847 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23848 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23849 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23850 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23851 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23852 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23853 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23854 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23855 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23856 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23857 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23858 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23859 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23860 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23861 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23862 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23863 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23864 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23865 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23866 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23867 case MULTI_ARG_UNKNOWN:
23869 gcc_unreachable ();
23873 def_builtin_const (d->mask, d->name, mtype, d->code);
23877 /* Internal method for ix86_init_builtins. */
23880 ix86_init_builtins_va_builtins_abi (void)
23882 tree ms_va_ref, sysv_va_ref;
23883 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23884 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23885 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23886 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23890 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23891 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23892 ms_va_ref = build_reference_type (ms_va_list_type_node);
23894 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23897 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23898 fnvoid_va_start_ms =
23899 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23900 fnvoid_va_end_sysv =
23901 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23902 fnvoid_va_start_sysv =
23903 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23905 fnvoid_va_copy_ms =
23906 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23908 fnvoid_va_copy_sysv =
23909 build_function_type_list (void_type_node, sysv_va_ref,
23910 sysv_va_ref, NULL_TREE);
23912 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23913 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23914 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23915 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23916 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23917 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23918 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23919 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23920 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23921 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23922 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23923 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23927 ix86_init_builtins (void)
23929 tree float128_type_node = make_node (REAL_TYPE);
23932 /* The __float80 type. */
23933 if (TYPE_MODE (long_double_type_node) == XFmode)
23934 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23938 /* The __float80 type. */
23939 tree float80_type_node = make_node (REAL_TYPE);
23941 TYPE_PRECISION (float80_type_node) = 80;
23942 layout_type (float80_type_node);
23943 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23947 /* The __float128 type. */
23948 TYPE_PRECISION (float128_type_node) = 128;
23949 layout_type (float128_type_node);
23950 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23953 /* TFmode support builtins. */
23954 ftype = build_function_type (float128_type_node, void_list_node);
23955 decl = add_builtin_function ("__builtin_infq", ftype,
23956 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23958 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23960 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23961 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23963 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23965 /* We will expand them to normal call if SSE2 isn't available since
23966 they are used by libgcc. */
23967 ftype = build_function_type_list (float128_type_node,
23968 float128_type_node,
23970 decl = add_builtin_function ("__builtin_fabsq", ftype,
23971 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23972 "__fabstf2", NULL_TREE);
23973 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23974 TREE_READONLY (decl) = 1;
23976 ftype = build_function_type_list (float128_type_node,
23977 float128_type_node,
23978 float128_type_node,
23980 decl = add_builtin_function ("__builtin_copysignq", ftype,
23981 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23982 "__copysigntf3", NULL_TREE);
23983 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23984 TREE_READONLY (decl) = 1;
23986 ix86_init_mmx_sse_builtins ();
23988 ix86_init_builtins_va_builtins_abi ();
23991 /* Errors in the source file can cause expand_expr to return const0_rtx
23992 where we expect a vector. To avoid crashing, use one of the vector
23993 clear instructions. */
23995 safe_vector_operand (rtx x, enum machine_mode mode)
23997 if (x == const0_rtx)
23998 x = CONST0_RTX (mode);
24002 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24005 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24008 tree arg0 = CALL_EXPR_ARG (exp, 0);
24009 tree arg1 = CALL_EXPR_ARG (exp, 1);
24010 rtx op0 = expand_normal (arg0);
24011 rtx op1 = expand_normal (arg1);
24012 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24013 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24014 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24016 if (VECTOR_MODE_P (mode0))
24017 op0 = safe_vector_operand (op0, mode0);
24018 if (VECTOR_MODE_P (mode1))
24019 op1 = safe_vector_operand (op1, mode1);
24021 if (optimize || !target
24022 || GET_MODE (target) != tmode
24023 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24024 target = gen_reg_rtx (tmode);
24026 if (GET_MODE (op1) == SImode && mode1 == TImode)
24028 rtx x = gen_reg_rtx (V4SImode);
24029 emit_insn (gen_sse2_loadd (x, op1));
24030 op1 = gen_lowpart (TImode, x);
24033 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24034 op0 = copy_to_mode_reg (mode0, op0);
24035 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24036 op1 = copy_to_mode_reg (mode1, op1);
24038 pat = GEN_FCN (icode) (target, op0, op1);
24047 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24050 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24051 enum multi_arg_type m_type,
24052 enum rtx_code sub_code)
24057 bool comparison_p = false;
24059 bool last_arg_constant = false;
24060 int num_memory = 0;
24063 enum machine_mode mode;
24066 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24070 case MULTI_ARG_3_SF:
24071 case MULTI_ARG_3_DF:
24072 case MULTI_ARG_3_DI:
24073 case MULTI_ARG_3_SI:
24074 case MULTI_ARG_3_SI_DI:
24075 case MULTI_ARG_3_HI:
24076 case MULTI_ARG_3_HI_SI:
24077 case MULTI_ARG_3_QI:
24078 case MULTI_ARG_3_PERMPS:
24079 case MULTI_ARG_3_PERMPD:
24083 case MULTI_ARG_2_SF:
24084 case MULTI_ARG_2_DF:
24085 case MULTI_ARG_2_DI:
24086 case MULTI_ARG_2_SI:
24087 case MULTI_ARG_2_HI:
24088 case MULTI_ARG_2_QI:
24092 case MULTI_ARG_2_DI_IMM:
24093 case MULTI_ARG_2_SI_IMM:
24094 case MULTI_ARG_2_HI_IMM:
24095 case MULTI_ARG_2_QI_IMM:
24097 last_arg_constant = true;
24100 case MULTI_ARG_1_SF:
24101 case MULTI_ARG_1_DF:
24102 case MULTI_ARG_1_DI:
24103 case MULTI_ARG_1_SI:
24104 case MULTI_ARG_1_HI:
24105 case MULTI_ARG_1_QI:
24106 case MULTI_ARG_1_SI_DI:
24107 case MULTI_ARG_1_HI_DI:
24108 case MULTI_ARG_1_HI_SI:
24109 case MULTI_ARG_1_QI_DI:
24110 case MULTI_ARG_1_QI_SI:
24111 case MULTI_ARG_1_QI_HI:
24112 case MULTI_ARG_1_PH2PS:
24113 case MULTI_ARG_1_PS2PH:
24117 case MULTI_ARG_2_SF_CMP:
24118 case MULTI_ARG_2_DF_CMP:
24119 case MULTI_ARG_2_DI_CMP:
24120 case MULTI_ARG_2_SI_CMP:
24121 case MULTI_ARG_2_HI_CMP:
24122 case MULTI_ARG_2_QI_CMP:
24124 comparison_p = true;
24127 case MULTI_ARG_2_SF_TF:
24128 case MULTI_ARG_2_DF_TF:
24129 case MULTI_ARG_2_DI_TF:
24130 case MULTI_ARG_2_SI_TF:
24131 case MULTI_ARG_2_HI_TF:
24132 case MULTI_ARG_2_QI_TF:
24137 case MULTI_ARG_UNKNOWN:
24139 gcc_unreachable ();
24142 if (optimize || !target
24143 || GET_MODE (target) != tmode
24144 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24145 target = gen_reg_rtx (tmode);
24147 gcc_assert (nargs <= 4);
24149 for (i = 0; i < nargs; i++)
24151 tree arg = CALL_EXPR_ARG (exp, i);
24152 rtx op = expand_normal (arg);
24153 int adjust = (comparison_p) ? 1 : 0;
24154 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24156 if (last_arg_constant && i == nargs-1)
24158 if (!CONST_INT_P (op))
24160 error ("last argument must be an immediate");
24161 return gen_reg_rtx (tmode);
24166 if (VECTOR_MODE_P (mode))
24167 op = safe_vector_operand (op, mode);
24169 /* If we aren't optimizing, only allow one memory operand to be
24171 if (memory_operand (op, mode))
24174 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24177 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24179 op = force_reg (mode, op);
24183 args[i].mode = mode;
24189 pat = GEN_FCN (icode) (target, args[0].op);
24194 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24195 GEN_INT ((int)sub_code));
24196 else if (! comparison_p)
24197 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24200 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24204 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24209 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24213 gcc_unreachable ();
24223 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24224 insns with vec_merge. */
24227 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24231 tree arg0 = CALL_EXPR_ARG (exp, 0);
24232 rtx op1, op0 = expand_normal (arg0);
24233 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24234 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24236 if (optimize || !target
24237 || GET_MODE (target) != tmode
24238 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24239 target = gen_reg_rtx (tmode);
24241 if (VECTOR_MODE_P (mode0))
24242 op0 = safe_vector_operand (op0, mode0);
24244 if ((optimize && !register_operand (op0, mode0))
24245 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24246 op0 = copy_to_mode_reg (mode0, op0);
24249 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24250 op1 = copy_to_mode_reg (mode0, op1);
24252 pat = GEN_FCN (icode) (target, op0, op1);
24259 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24262 ix86_expand_sse_compare (const struct builtin_description *d,
24263 tree exp, rtx target, bool swap)
24266 tree arg0 = CALL_EXPR_ARG (exp, 0);
24267 tree arg1 = CALL_EXPR_ARG (exp, 1);
24268 rtx op0 = expand_normal (arg0);
24269 rtx op1 = expand_normal (arg1);
24271 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24272 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24273 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24274 enum rtx_code comparison = d->comparison;
24276 if (VECTOR_MODE_P (mode0))
24277 op0 = safe_vector_operand (op0, mode0);
24278 if (VECTOR_MODE_P (mode1))
24279 op1 = safe_vector_operand (op1, mode1);
24281 /* Swap operands if we have a comparison that isn't available in
24285 rtx tmp = gen_reg_rtx (mode1);
24286 emit_move_insn (tmp, op1);
24291 if (optimize || !target
24292 || GET_MODE (target) != tmode
24293 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24294 target = gen_reg_rtx (tmode);
24296 if ((optimize && !register_operand (op0, mode0))
24297 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24298 op0 = copy_to_mode_reg (mode0, op0);
24299 if ((optimize && !register_operand (op1, mode1))
24300 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24301 op1 = copy_to_mode_reg (mode1, op1);
24303 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24304 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24311 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24314 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24318 tree arg0 = CALL_EXPR_ARG (exp, 0);
24319 tree arg1 = CALL_EXPR_ARG (exp, 1);
24320 rtx op0 = expand_normal (arg0);
24321 rtx op1 = expand_normal (arg1);
24322 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24323 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24324 enum rtx_code comparison = d->comparison;
24326 if (VECTOR_MODE_P (mode0))
24327 op0 = safe_vector_operand (op0, mode0);
24328 if (VECTOR_MODE_P (mode1))
24329 op1 = safe_vector_operand (op1, mode1);
24331 /* Swap operands if we have a comparison that isn't available in
24333 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24340 target = gen_reg_rtx (SImode);
24341 emit_move_insn (target, const0_rtx);
24342 target = gen_rtx_SUBREG (QImode, target, 0);
24344 if ((optimize && !register_operand (op0, mode0))
24345 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24346 op0 = copy_to_mode_reg (mode0, op0);
24347 if ((optimize && !register_operand (op1, mode1))
24348 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24349 op1 = copy_to_mode_reg (mode1, op1);
24351 pat = GEN_FCN (d->icode) (op0, op1);
24355 emit_insn (gen_rtx_SET (VOIDmode,
24356 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24357 gen_rtx_fmt_ee (comparison, QImode,
24361 return SUBREG_REG (target);
24364 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24367 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24371 tree arg0 = CALL_EXPR_ARG (exp, 0);
24372 tree arg1 = CALL_EXPR_ARG (exp, 1);
24373 rtx op0 = expand_normal (arg0);
24374 rtx op1 = expand_normal (arg1);
24375 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24376 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24377 enum rtx_code comparison = d->comparison;
24379 if (VECTOR_MODE_P (mode0))
24380 op0 = safe_vector_operand (op0, mode0);
24381 if (VECTOR_MODE_P (mode1))
24382 op1 = safe_vector_operand (op1, mode1);
24384 target = gen_reg_rtx (SImode);
24385 emit_move_insn (target, const0_rtx);
24386 target = gen_rtx_SUBREG (QImode, target, 0);
24388 if ((optimize && !register_operand (op0, mode0))
24389 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24390 op0 = copy_to_mode_reg (mode0, op0);
24391 if ((optimize && !register_operand (op1, mode1))
24392 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24393 op1 = copy_to_mode_reg (mode1, op1);
24395 pat = GEN_FCN (d->icode) (op0, op1);
24399 emit_insn (gen_rtx_SET (VOIDmode,
24400 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24401 gen_rtx_fmt_ee (comparison, QImode,
24405 return SUBREG_REG (target);
24408 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24411 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24412 tree exp, rtx target)
24415 tree arg0 = CALL_EXPR_ARG (exp, 0);
24416 tree arg1 = CALL_EXPR_ARG (exp, 1);
24417 tree arg2 = CALL_EXPR_ARG (exp, 2);
24418 tree arg3 = CALL_EXPR_ARG (exp, 3);
24419 tree arg4 = CALL_EXPR_ARG (exp, 4);
24420 rtx scratch0, scratch1;
24421 rtx op0 = expand_normal (arg0);
24422 rtx op1 = expand_normal (arg1);
24423 rtx op2 = expand_normal (arg2);
24424 rtx op3 = expand_normal (arg3);
24425 rtx op4 = expand_normal (arg4);
24426 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24428 tmode0 = insn_data[d->icode].operand[0].mode;
24429 tmode1 = insn_data[d->icode].operand[1].mode;
24430 modev2 = insn_data[d->icode].operand[2].mode;
24431 modei3 = insn_data[d->icode].operand[3].mode;
24432 modev4 = insn_data[d->icode].operand[4].mode;
24433 modei5 = insn_data[d->icode].operand[5].mode;
24434 modeimm = insn_data[d->icode].operand[6].mode;
24436 if (VECTOR_MODE_P (modev2))
24437 op0 = safe_vector_operand (op0, modev2);
24438 if (VECTOR_MODE_P (modev4))
24439 op2 = safe_vector_operand (op2, modev4);
24441 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24442 op0 = copy_to_mode_reg (modev2, op0);
24443 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24444 op1 = copy_to_mode_reg (modei3, op1);
24445 if ((optimize && !register_operand (op2, modev4))
24446 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24447 op2 = copy_to_mode_reg (modev4, op2);
24448 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24449 op3 = copy_to_mode_reg (modei5, op3);
24451 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24453 error ("the fifth argument must be a 8-bit immediate");
24457 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24459 if (optimize || !target
24460 || GET_MODE (target) != tmode0
24461 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24462 target = gen_reg_rtx (tmode0);
24464 scratch1 = gen_reg_rtx (tmode1);
24466 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24468 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24470 if (optimize || !target
24471 || GET_MODE (target) != tmode1
24472 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24473 target = gen_reg_rtx (tmode1);
24475 scratch0 = gen_reg_rtx (tmode0);
24477 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24481 gcc_assert (d->flag);
24483 scratch0 = gen_reg_rtx (tmode0);
24484 scratch1 = gen_reg_rtx (tmode1);
24486 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24496 target = gen_reg_rtx (SImode);
24497 emit_move_insn (target, const0_rtx);
24498 target = gen_rtx_SUBREG (QImode, target, 0);
24501 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24502 gen_rtx_fmt_ee (EQ, QImode,
24503 gen_rtx_REG ((enum machine_mode) d->flag,
24506 return SUBREG_REG (target);
24513 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24516 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24517 tree exp, rtx target)
24520 tree arg0 = CALL_EXPR_ARG (exp, 0);
24521 tree arg1 = CALL_EXPR_ARG (exp, 1);
24522 tree arg2 = CALL_EXPR_ARG (exp, 2);
24523 rtx scratch0, scratch1;
24524 rtx op0 = expand_normal (arg0);
24525 rtx op1 = expand_normal (arg1);
24526 rtx op2 = expand_normal (arg2);
24527 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24529 tmode0 = insn_data[d->icode].operand[0].mode;
24530 tmode1 = insn_data[d->icode].operand[1].mode;
24531 modev2 = insn_data[d->icode].operand[2].mode;
24532 modev3 = insn_data[d->icode].operand[3].mode;
24533 modeimm = insn_data[d->icode].operand[4].mode;
24535 if (VECTOR_MODE_P (modev2))
24536 op0 = safe_vector_operand (op0, modev2);
24537 if (VECTOR_MODE_P (modev3))
24538 op1 = safe_vector_operand (op1, modev3);
24540 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24541 op0 = copy_to_mode_reg (modev2, op0);
24542 if ((optimize && !register_operand (op1, modev3))
24543 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24544 op1 = copy_to_mode_reg (modev3, op1);
24546 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24548 error ("the third argument must be a 8-bit immediate");
24552 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24554 if (optimize || !target
24555 || GET_MODE (target) != tmode0
24556 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24557 target = gen_reg_rtx (tmode0);
24559 scratch1 = gen_reg_rtx (tmode1);
24561 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24563 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24565 if (optimize || !target
24566 || GET_MODE (target) != tmode1
24567 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24568 target = gen_reg_rtx (tmode1);
24570 scratch0 = gen_reg_rtx (tmode0);
24572 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24576 gcc_assert (d->flag);
24578 scratch0 = gen_reg_rtx (tmode0);
24579 scratch1 = gen_reg_rtx (tmode1);
24581 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24591 target = gen_reg_rtx (SImode);
24592 emit_move_insn (target, const0_rtx);
24593 target = gen_rtx_SUBREG (QImode, target, 0);
24596 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24597 gen_rtx_fmt_ee (EQ, QImode,
24598 gen_rtx_REG ((enum machine_mode) d->flag,
24601 return SUBREG_REG (target);
24607 /* Subroutine of ix86_expand_builtin to take care of insns with
24608 variable number of operands. */
24611 ix86_expand_args_builtin (const struct builtin_description *d,
24612 tree exp, rtx target)
24614 rtx pat, real_target;
24615 unsigned int i, nargs;
24616 unsigned int nargs_constant = 0;
24617 int num_memory = 0;
24621 enum machine_mode mode;
24623 bool last_arg_count = false;
24624 enum insn_code icode = d->icode;
24625 const struct insn_data *insn_p = &insn_data[icode];
24626 enum machine_mode tmode = insn_p->operand[0].mode;
24627 enum machine_mode rmode = VOIDmode;
24629 enum rtx_code comparison = d->comparison;
24631 switch ((enum ix86_builtin_type) d->flag)
24633 case INT_FTYPE_V8SF_V8SF_PTEST:
24634 case INT_FTYPE_V4DI_V4DI_PTEST:
24635 case INT_FTYPE_V4DF_V4DF_PTEST:
24636 case INT_FTYPE_V4SF_V4SF_PTEST:
24637 case INT_FTYPE_V2DI_V2DI_PTEST:
24638 case INT_FTYPE_V2DF_V2DF_PTEST:
24639 return ix86_expand_sse_ptest (d, exp, target);
24640 case FLOAT128_FTYPE_FLOAT128:
24641 case FLOAT_FTYPE_FLOAT:
24642 case INT64_FTYPE_V4SF:
24643 case INT64_FTYPE_V2DF:
24644 case INT_FTYPE_V16QI:
24645 case INT_FTYPE_V8QI:
24646 case INT_FTYPE_V8SF:
24647 case INT_FTYPE_V4DF:
24648 case INT_FTYPE_V4SF:
24649 case INT_FTYPE_V2DF:
24650 case V16QI_FTYPE_V16QI:
24651 case V8SI_FTYPE_V8SF:
24652 case V8SI_FTYPE_V4SI:
24653 case V8HI_FTYPE_V8HI:
24654 case V8HI_FTYPE_V16QI:
24655 case V8QI_FTYPE_V8QI:
24656 case V8SF_FTYPE_V8SF:
24657 case V8SF_FTYPE_V8SI:
24658 case V8SF_FTYPE_V4SF:
24659 case V4SI_FTYPE_V4SI:
24660 case V4SI_FTYPE_V16QI:
24661 case V4SI_FTYPE_V4SF:
24662 case V4SI_FTYPE_V8SI:
24663 case V4SI_FTYPE_V8HI:
24664 case V4SI_FTYPE_V4DF:
24665 case V4SI_FTYPE_V2DF:
24666 case V4HI_FTYPE_V4HI:
24667 case V4DF_FTYPE_V4DF:
24668 case V4DF_FTYPE_V4SI:
24669 case V4DF_FTYPE_V4SF:
24670 case V4DF_FTYPE_V2DF:
24671 case V4SF_FTYPE_V4SF:
24672 case V4SF_FTYPE_V4SI:
24673 case V4SF_FTYPE_V8SF:
24674 case V4SF_FTYPE_V4DF:
24675 case V4SF_FTYPE_V2DF:
24676 case V2DI_FTYPE_V2DI:
24677 case V2DI_FTYPE_V16QI:
24678 case V2DI_FTYPE_V8HI:
24679 case V2DI_FTYPE_V4SI:
24680 case V2DF_FTYPE_V2DF:
24681 case V2DF_FTYPE_V4SI:
24682 case V2DF_FTYPE_V4DF:
24683 case V2DF_FTYPE_V4SF:
24684 case V2DF_FTYPE_V2SI:
24685 case V2SI_FTYPE_V2SI:
24686 case V2SI_FTYPE_V4SF:
24687 case V2SI_FTYPE_V2SF:
24688 case V2SI_FTYPE_V2DF:
24689 case V2SF_FTYPE_V2SF:
24690 case V2SF_FTYPE_V2SI:
24693 case V4SF_FTYPE_V4SF_VEC_MERGE:
24694 case V2DF_FTYPE_V2DF_VEC_MERGE:
24695 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24696 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24697 case V16QI_FTYPE_V16QI_V16QI:
24698 case V16QI_FTYPE_V8HI_V8HI:
24699 case V8QI_FTYPE_V8QI_V8QI:
24700 case V8QI_FTYPE_V4HI_V4HI:
24701 case V8HI_FTYPE_V8HI_V8HI:
24702 case V8HI_FTYPE_V16QI_V16QI:
24703 case V8HI_FTYPE_V4SI_V4SI:
24704 case V8SF_FTYPE_V8SF_V8SF:
24705 case V8SF_FTYPE_V8SF_V8SI:
24706 case V4SI_FTYPE_V4SI_V4SI:
24707 case V4SI_FTYPE_V8HI_V8HI:
24708 case V4SI_FTYPE_V4SF_V4SF:
24709 case V4SI_FTYPE_V2DF_V2DF:
24710 case V4HI_FTYPE_V4HI_V4HI:
24711 case V4HI_FTYPE_V8QI_V8QI:
24712 case V4HI_FTYPE_V2SI_V2SI:
24713 case V4DF_FTYPE_V4DF_V4DF:
24714 case V4DF_FTYPE_V4DF_V4DI:
24715 case V4SF_FTYPE_V4SF_V4SF:
24716 case V4SF_FTYPE_V4SF_V4SI:
24717 case V4SF_FTYPE_V4SF_V2SI:
24718 case V4SF_FTYPE_V4SF_V2DF:
24719 case V4SF_FTYPE_V4SF_DI:
24720 case V4SF_FTYPE_V4SF_SI:
24721 case V2DI_FTYPE_V2DI_V2DI:
24722 case V2DI_FTYPE_V16QI_V16QI:
24723 case V2DI_FTYPE_V4SI_V4SI:
24724 case V2DI_FTYPE_V2DI_V16QI:
24725 case V2DI_FTYPE_V2DF_V2DF:
24726 case V2SI_FTYPE_V2SI_V2SI:
24727 case V2SI_FTYPE_V4HI_V4HI:
24728 case V2SI_FTYPE_V2SF_V2SF:
24729 case V2DF_FTYPE_V2DF_V2DF:
24730 case V2DF_FTYPE_V2DF_V4SF:
24731 case V2DF_FTYPE_V2DF_V2DI:
24732 case V2DF_FTYPE_V2DF_DI:
24733 case V2DF_FTYPE_V2DF_SI:
24734 case V2SF_FTYPE_V2SF_V2SF:
24735 case V1DI_FTYPE_V1DI_V1DI:
24736 case V1DI_FTYPE_V8QI_V8QI:
24737 case V1DI_FTYPE_V2SI_V2SI:
24738 if (comparison == UNKNOWN)
24739 return ix86_expand_binop_builtin (icode, exp, target);
24742 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24743 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24744 gcc_assert (comparison != UNKNOWN);
24748 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24749 case V8HI_FTYPE_V8HI_SI_COUNT:
24750 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24751 case V4SI_FTYPE_V4SI_SI_COUNT:
24752 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24753 case V4HI_FTYPE_V4HI_SI_COUNT:
24754 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24755 case V2DI_FTYPE_V2DI_SI_COUNT:
24756 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24757 case V2SI_FTYPE_V2SI_SI_COUNT:
24758 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24759 case V1DI_FTYPE_V1DI_SI_COUNT:
24761 last_arg_count = true;
24763 case UINT64_FTYPE_UINT64_UINT64:
24764 case UINT_FTYPE_UINT_UINT:
24765 case UINT_FTYPE_UINT_USHORT:
24766 case UINT_FTYPE_UINT_UCHAR:
24769 case V2DI2TI_FTYPE_V2DI_INT:
24772 nargs_constant = 1;
24774 case V8HI_FTYPE_V8HI_INT:
24775 case V8SF_FTYPE_V8SF_INT:
24776 case V4SI_FTYPE_V4SI_INT:
24777 case V4SI_FTYPE_V8SI_INT:
24778 case V4HI_FTYPE_V4HI_INT:
24779 case V4DF_FTYPE_V4DF_INT:
24780 case V4SF_FTYPE_V4SF_INT:
24781 case V4SF_FTYPE_V8SF_INT:
24782 case V2DI_FTYPE_V2DI_INT:
24783 case V2DF_FTYPE_V2DF_INT:
24784 case V2DF_FTYPE_V4DF_INT:
24786 nargs_constant = 1;
24788 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24789 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24790 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24791 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24792 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24795 case V16QI_FTYPE_V16QI_V16QI_INT:
24796 case V8HI_FTYPE_V8HI_V8HI_INT:
24797 case V8SI_FTYPE_V8SI_V8SI_INT:
24798 case V8SI_FTYPE_V8SI_V4SI_INT:
24799 case V8SF_FTYPE_V8SF_V8SF_INT:
24800 case V8SF_FTYPE_V8SF_V4SF_INT:
24801 case V4SI_FTYPE_V4SI_V4SI_INT:
24802 case V4DF_FTYPE_V4DF_V4DF_INT:
24803 case V4DF_FTYPE_V4DF_V2DF_INT:
24804 case V4SF_FTYPE_V4SF_V4SF_INT:
24805 case V2DI_FTYPE_V2DI_V2DI_INT:
24806 case V2DF_FTYPE_V2DF_V2DF_INT:
24808 nargs_constant = 1;
24810 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24813 nargs_constant = 1;
24815 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24818 nargs_constant = 1;
24820 case V2DI_FTYPE_V2DI_UINT_UINT:
24822 nargs_constant = 2;
24824 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24826 nargs_constant = 2;
24829 gcc_unreachable ();
24832 gcc_assert (nargs <= ARRAY_SIZE (args));
24834 if (comparison != UNKNOWN)
24836 gcc_assert (nargs == 2);
24837 return ix86_expand_sse_compare (d, exp, target, swap);
24840 if (rmode == VOIDmode || rmode == tmode)
24844 || GET_MODE (target) != tmode
24845 || ! (*insn_p->operand[0].predicate) (target, tmode))
24846 target = gen_reg_rtx (tmode);
24847 real_target = target;
24851 target = gen_reg_rtx (rmode);
24852 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24855 for (i = 0; i < nargs; i++)
24857 tree arg = CALL_EXPR_ARG (exp, i);
24858 rtx op = expand_normal (arg);
24859 enum machine_mode mode = insn_p->operand[i + 1].mode;
24860 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24862 if (last_arg_count && (i + 1) == nargs)
24864 /* SIMD shift insns take either an 8-bit immediate or
24865 register as count. But builtin functions take int as
24866 count. If count doesn't match, we put it in register. */
24869 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24870 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24871 op = copy_to_reg (op);
24874 else if ((nargs - i) <= nargs_constant)
24879 case CODE_FOR_sse4_1_roundpd:
24880 case CODE_FOR_sse4_1_roundps:
24881 case CODE_FOR_sse4_1_roundsd:
24882 case CODE_FOR_sse4_1_roundss:
24883 case CODE_FOR_sse4_1_blendps:
24884 case CODE_FOR_avx_blendpd256:
24885 case CODE_FOR_avx_vpermilv4df:
24886 case CODE_FOR_avx_roundpd256:
24887 case CODE_FOR_avx_roundps256:
24888 error ("the last argument must be a 4-bit immediate");
24891 case CODE_FOR_sse4_1_blendpd:
24892 case CODE_FOR_avx_vpermilv2df:
24893 error ("the last argument must be a 2-bit immediate");
24896 case CODE_FOR_avx_vextractf128v4df:
24897 case CODE_FOR_avx_vextractf128v8sf:
24898 case CODE_FOR_avx_vextractf128v8si:
24899 case CODE_FOR_avx_vinsertf128v4df:
24900 case CODE_FOR_avx_vinsertf128v8sf:
24901 case CODE_FOR_avx_vinsertf128v8si:
24902 error ("the last argument must be a 1-bit immediate");
24905 case CODE_FOR_avx_cmpsdv2df3:
24906 case CODE_FOR_avx_cmpssv4sf3:
24907 case CODE_FOR_avx_cmppdv2df3:
24908 case CODE_FOR_avx_cmppsv4sf3:
24909 case CODE_FOR_avx_cmppdv4df3:
24910 case CODE_FOR_avx_cmppsv8sf3:
24911 error ("the last argument must be a 5-bit immediate");
24915 switch (nargs_constant)
24918 if ((nargs - i) == nargs_constant)
24920 error ("the next to last argument must be an 8-bit immediate");
24924 error ("the last argument must be an 8-bit immediate");
24927 gcc_unreachable ();
24934 if (VECTOR_MODE_P (mode))
24935 op = safe_vector_operand (op, mode);
24937 /* If we aren't optimizing, only allow one memory operand to
24939 if (memory_operand (op, mode))
24942 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24944 if (optimize || !match || num_memory > 1)
24945 op = copy_to_mode_reg (mode, op);
24949 op = copy_to_reg (op);
24950 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24955 args[i].mode = mode;
24961 pat = GEN_FCN (icode) (real_target, args[0].op);
24964 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24967 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24971 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24972 args[2].op, args[3].op);
24975 gcc_unreachable ();
24985 /* Subroutine of ix86_expand_builtin to take care of special insns
24986 with variable number of operands. */
24989 ix86_expand_special_args_builtin (const struct builtin_description *d,
24990 tree exp, rtx target)
24994 unsigned int i, nargs, arg_adjust, memory;
24998 enum machine_mode mode;
25000 enum insn_code icode = d->icode;
25001 bool last_arg_constant = false;
25002 const struct insn_data *insn_p = &insn_data[icode];
25003 enum machine_mode tmode = insn_p->operand[0].mode;
25004 enum { load, store } klass;
25006 switch ((enum ix86_special_builtin_type) d->flag)
25008 case VOID_FTYPE_VOID:
25009 emit_insn (GEN_FCN (icode) (target));
25011 case V2DI_FTYPE_PV2DI:
25012 case V32QI_FTYPE_PCCHAR:
25013 case V16QI_FTYPE_PCCHAR:
25014 case V8SF_FTYPE_PCV4SF:
25015 case V8SF_FTYPE_PCFLOAT:
25016 case V4SF_FTYPE_PCFLOAT:
25017 case V4DF_FTYPE_PCV2DF:
25018 case V4DF_FTYPE_PCDOUBLE:
25019 case V2DF_FTYPE_PCDOUBLE:
25024 case VOID_FTYPE_PV2SF_V4SF:
25025 case VOID_FTYPE_PV4DI_V4DI:
25026 case VOID_FTYPE_PV2DI_V2DI:
25027 case VOID_FTYPE_PCHAR_V32QI:
25028 case VOID_FTYPE_PCHAR_V16QI:
25029 case VOID_FTYPE_PFLOAT_V8SF:
25030 case VOID_FTYPE_PFLOAT_V4SF:
25031 case VOID_FTYPE_PDOUBLE_V4DF:
25032 case VOID_FTYPE_PDOUBLE_V2DF:
25033 case VOID_FTYPE_PDI_DI:
25034 case VOID_FTYPE_PINT_INT:
25037 /* Reserve memory operand for target. */
25038 memory = ARRAY_SIZE (args);
25040 case V4SF_FTYPE_V4SF_PCV2SF:
25041 case V2DF_FTYPE_V2DF_PCDOUBLE:
25046 case V8SF_FTYPE_PCV8SF_V8SF:
25047 case V4DF_FTYPE_PCV4DF_V4DF:
25048 case V4SF_FTYPE_PCV4SF_V4SF:
25049 case V2DF_FTYPE_PCV2DF_V2DF:
25054 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25055 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25056 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25057 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25060 /* Reserve memory operand for target. */
25061 memory = ARRAY_SIZE (args);
25064 gcc_unreachable ();
25067 gcc_assert (nargs <= ARRAY_SIZE (args));
25069 if (klass == store)
25071 arg = CALL_EXPR_ARG (exp, 0);
25072 op = expand_normal (arg);
25073 gcc_assert (target == 0);
25074 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25082 || GET_MODE (target) != tmode
25083 || ! (*insn_p->operand[0].predicate) (target, tmode))
25084 target = gen_reg_rtx (tmode);
25087 for (i = 0; i < nargs; i++)
25089 enum machine_mode mode = insn_p->operand[i + 1].mode;
25092 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25093 op = expand_normal (arg);
25094 match = (*insn_p->operand[i + 1].predicate) (op, mode);
25096 if (last_arg_constant && (i + 1) == nargs)
25102 error ("the last argument must be an 8-bit immediate");
25110 /* This must be the memory operand. */
25111 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25112 gcc_assert (GET_MODE (op) == mode
25113 || GET_MODE (op) == VOIDmode);
25117 /* This must be register. */
25118 if (VECTOR_MODE_P (mode))
25119 op = safe_vector_operand (op, mode);
25121 gcc_assert (GET_MODE (op) == mode
25122 || GET_MODE (op) == VOIDmode);
25123 op = copy_to_mode_reg (mode, op);
25128 args[i].mode = mode;
25134 pat = GEN_FCN (icode) (target, args[0].op);
25137 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25140 gcc_unreachable ();
25146 return klass == store ? 0 : target;
25149 /* Return the integer constant in ARG. Constrain it to be in the range
25150 of the subparts of VEC_TYPE; issue an error if not. */
25153 get_element_number (tree vec_type, tree arg)
25155 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25157 if (!host_integerp (arg, 1)
25158 || (elt = tree_low_cst (arg, 1), elt > max))
25160 error ("selector must be an integer constant in the range 0..%wi", max);
25167 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25168 ix86_expand_vector_init. We DO have language-level syntax for this, in
25169 the form of (type){ init-list }. Except that since we can't place emms
25170 instructions from inside the compiler, we can't allow the use of MMX
25171 registers unless the user explicitly asks for it. So we do *not* define
25172 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25173 we have builtins invoked by mmintrin.h that gives us license to emit
25174 these sorts of instructions. */
25177 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25179 enum machine_mode tmode = TYPE_MODE (type);
25180 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25181 int i, n_elt = GET_MODE_NUNITS (tmode);
25182 rtvec v = rtvec_alloc (n_elt);
25184 gcc_assert (VECTOR_MODE_P (tmode));
25185 gcc_assert (call_expr_nargs (exp) == n_elt);
25187 for (i = 0; i < n_elt; ++i)
25189 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25190 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25193 if (!target || !register_operand (target, tmode))
25194 target = gen_reg_rtx (tmode);
25196 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25200 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25201 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25202 had a language-level syntax for referencing vector elements. */
25205 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25207 enum machine_mode tmode, mode0;
25212 arg0 = CALL_EXPR_ARG (exp, 0);
25213 arg1 = CALL_EXPR_ARG (exp, 1);
25215 op0 = expand_normal (arg0);
25216 elt = get_element_number (TREE_TYPE (arg0), arg1);
25218 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25219 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25220 gcc_assert (VECTOR_MODE_P (mode0));
25222 op0 = force_reg (mode0, op0);
25224 if (optimize || !target || !register_operand (target, tmode))
25225 target = gen_reg_rtx (tmode);
25227 ix86_expand_vector_extract (true, target, op0, elt);
25232 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25233 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25234 a language-level syntax for referencing vector elements. */
25237 ix86_expand_vec_set_builtin (tree exp)
25239 enum machine_mode tmode, mode1;
25240 tree arg0, arg1, arg2;
25242 rtx op0, op1, target;
25244 arg0 = CALL_EXPR_ARG (exp, 0);
25245 arg1 = CALL_EXPR_ARG (exp, 1);
25246 arg2 = CALL_EXPR_ARG (exp, 2);
25248 tmode = TYPE_MODE (TREE_TYPE (arg0));
25249 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25250 gcc_assert (VECTOR_MODE_P (tmode));
25252 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25253 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25254 elt = get_element_number (TREE_TYPE (arg0), arg2);
25256 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25257 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25259 op0 = force_reg (tmode, op0);
25260 op1 = force_reg (mode1, op1);
25262 /* OP0 is the source of these builtin functions and shouldn't be
25263 modified. Create a copy, use it and return it as target. */
25264 target = gen_reg_rtx (tmode);
25265 emit_move_insn (target, op0);
25266 ix86_expand_vector_set (true, target, op1, elt);
25271 /* Expand an expression EXP that calls a built-in function,
25272 with result going to TARGET if that's convenient
25273 (and in mode MODE if that's convenient).
25274 SUBTARGET may be used as the target for computing one of EXP's operands.
25275 IGNORE is nonzero if the value is to be ignored. */
25278 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25279 enum machine_mode mode ATTRIBUTE_UNUSED,
25280 int ignore ATTRIBUTE_UNUSED)
25282 const struct builtin_description *d;
25284 enum insn_code icode;
25285 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25286 tree arg0, arg1, arg2;
25287 rtx op0, op1, op2, pat;
25288 enum machine_mode mode0, mode1, mode2;
25289 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25291 /* Determine whether the builtin function is available under the current ISA.
25292 Originally the builtin was not created if it wasn't applicable to the
25293 current ISA based on the command line switches. With function specific
25294 options, we need to check in the context of the function making the call
25295 whether it is supported. */
25296 if (ix86_builtins_isa[fcode].isa
25297 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25299 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25300 NULL, NULL, false);
25303 error ("%qE needs unknown isa option", fndecl);
25306 gcc_assert (opts != NULL);
25307 error ("%qE needs isa option %s", fndecl, opts);
25315 case IX86_BUILTIN_MASKMOVQ:
25316 case IX86_BUILTIN_MASKMOVDQU:
25317 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25318 ? CODE_FOR_mmx_maskmovq
25319 : CODE_FOR_sse2_maskmovdqu);
25320 /* Note the arg order is different from the operand order. */
25321 arg1 = CALL_EXPR_ARG (exp, 0);
25322 arg2 = CALL_EXPR_ARG (exp, 1);
25323 arg0 = CALL_EXPR_ARG (exp, 2);
25324 op0 = expand_normal (arg0);
25325 op1 = expand_normal (arg1);
25326 op2 = expand_normal (arg2);
25327 mode0 = insn_data[icode].operand[0].mode;
25328 mode1 = insn_data[icode].operand[1].mode;
25329 mode2 = insn_data[icode].operand[2].mode;
25331 op0 = force_reg (Pmode, op0);
25332 op0 = gen_rtx_MEM (mode1, op0);
25334 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25335 op0 = copy_to_mode_reg (mode0, op0);
25336 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25337 op1 = copy_to_mode_reg (mode1, op1);
25338 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25339 op2 = copy_to_mode_reg (mode2, op2);
25340 pat = GEN_FCN (icode) (op0, op1, op2);
25346 case IX86_BUILTIN_LDMXCSR:
25347 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25348 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25349 emit_move_insn (target, op0);
25350 emit_insn (gen_sse_ldmxcsr (target));
25353 case IX86_BUILTIN_STMXCSR:
25354 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25355 emit_insn (gen_sse_stmxcsr (target));
25356 return copy_to_mode_reg (SImode, target);
25358 case IX86_BUILTIN_CLFLUSH:
25359 arg0 = CALL_EXPR_ARG (exp, 0);
25360 op0 = expand_normal (arg0);
25361 icode = CODE_FOR_sse2_clflush;
25362 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25363 op0 = copy_to_mode_reg (Pmode, op0);
25365 emit_insn (gen_sse2_clflush (op0));
25368 case IX86_BUILTIN_MONITOR:
25369 arg0 = CALL_EXPR_ARG (exp, 0);
25370 arg1 = CALL_EXPR_ARG (exp, 1);
25371 arg2 = CALL_EXPR_ARG (exp, 2);
25372 op0 = expand_normal (arg0);
25373 op1 = expand_normal (arg1);
25374 op2 = expand_normal (arg2);
25376 op0 = copy_to_mode_reg (Pmode, op0);
25378 op1 = copy_to_mode_reg (SImode, op1);
25380 op2 = copy_to_mode_reg (SImode, op2);
25381 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25384 case IX86_BUILTIN_MWAIT:
25385 arg0 = CALL_EXPR_ARG (exp, 0);
25386 arg1 = CALL_EXPR_ARG (exp, 1);
25387 op0 = expand_normal (arg0);
25388 op1 = expand_normal (arg1);
25390 op0 = copy_to_mode_reg (SImode, op0);
25392 op1 = copy_to_mode_reg (SImode, op1);
25393 emit_insn (gen_sse3_mwait (op0, op1));
25396 case IX86_BUILTIN_VEC_INIT_V2SI:
25397 case IX86_BUILTIN_VEC_INIT_V4HI:
25398 case IX86_BUILTIN_VEC_INIT_V8QI:
25399 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25401 case IX86_BUILTIN_VEC_EXT_V2DF:
25402 case IX86_BUILTIN_VEC_EXT_V2DI:
25403 case IX86_BUILTIN_VEC_EXT_V4SF:
25404 case IX86_BUILTIN_VEC_EXT_V4SI:
25405 case IX86_BUILTIN_VEC_EXT_V8HI:
25406 case IX86_BUILTIN_VEC_EXT_V2SI:
25407 case IX86_BUILTIN_VEC_EXT_V4HI:
25408 case IX86_BUILTIN_VEC_EXT_V16QI:
25409 return ix86_expand_vec_ext_builtin (exp, target);
25411 case IX86_BUILTIN_VEC_SET_V2DI:
25412 case IX86_BUILTIN_VEC_SET_V4SF:
25413 case IX86_BUILTIN_VEC_SET_V4SI:
25414 case IX86_BUILTIN_VEC_SET_V8HI:
25415 case IX86_BUILTIN_VEC_SET_V4HI:
25416 case IX86_BUILTIN_VEC_SET_V16QI:
25417 return ix86_expand_vec_set_builtin (exp);
25419 case IX86_BUILTIN_INFQ:
25420 case IX86_BUILTIN_HUGE_VALQ:
25422 REAL_VALUE_TYPE inf;
25426 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25428 tmp = validize_mem (force_const_mem (mode, tmp));
25431 target = gen_reg_rtx (mode);
25433 emit_move_insn (target, tmp);
25441 for (i = 0, d = bdesc_special_args;
25442 i < ARRAY_SIZE (bdesc_special_args);
25444 if (d->code == fcode)
25445 return ix86_expand_special_args_builtin (d, exp, target);
25447 for (i = 0, d = bdesc_args;
25448 i < ARRAY_SIZE (bdesc_args);
25450 if (d->code == fcode)
25453 case IX86_BUILTIN_FABSQ:
25454 case IX86_BUILTIN_COPYSIGNQ:
25456 /* Emit a normal call if SSE2 isn't available. */
25457 return expand_call (exp, target, ignore);
25459 return ix86_expand_args_builtin (d, exp, target);
25462 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25463 if (d->code == fcode)
25464 return ix86_expand_sse_comi (d, exp, target);
25466 for (i = 0, d = bdesc_pcmpestr;
25467 i < ARRAY_SIZE (bdesc_pcmpestr);
25469 if (d->code == fcode)
25470 return ix86_expand_sse_pcmpestr (d, exp, target);
25472 for (i = 0, d = bdesc_pcmpistr;
25473 i < ARRAY_SIZE (bdesc_pcmpistr);
25475 if (d->code == fcode)
25476 return ix86_expand_sse_pcmpistr (d, exp, target);
25478 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25479 if (d->code == fcode)
25480 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25481 (enum multi_arg_type)d->flag,
25484 gcc_unreachable ();
25487 /* Returns a function decl for a vectorized version of the builtin function
25488 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25489 if it is not available. */
25492 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25495 enum machine_mode in_mode, out_mode;
25498 if (TREE_CODE (type_out) != VECTOR_TYPE
25499 || TREE_CODE (type_in) != VECTOR_TYPE)
25502 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25503 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25504 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25505 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25509 case BUILT_IN_SQRT:
25510 if (out_mode == DFmode && out_n == 2
25511 && in_mode == DFmode && in_n == 2)
25512 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25515 case BUILT_IN_SQRTF:
25516 if (out_mode == SFmode && out_n == 4
25517 && in_mode == SFmode && in_n == 4)
25518 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25521 case BUILT_IN_LRINT:
25522 if (out_mode == SImode && out_n == 4
25523 && in_mode == DFmode && in_n == 2)
25524 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25527 case BUILT_IN_LRINTF:
25528 if (out_mode == SImode && out_n == 4
25529 && in_mode == SFmode && in_n == 4)
25530 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25537 /* Dispatch to a handler for a vectorization library. */
25538 if (ix86_veclib_handler)
25539 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25545 /* Handler for an SVML-style interface to
25546 a library with vectorized intrinsics. */
25549 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25552 tree fntype, new_fndecl, args;
25555 enum machine_mode el_mode, in_mode;
25558 /* The SVML is suitable for unsafe math only. */
25559 if (!flag_unsafe_math_optimizations)
25562 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25563 n = TYPE_VECTOR_SUBPARTS (type_out);
25564 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25565 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25566 if (el_mode != in_mode
25574 case BUILT_IN_LOG10:
25576 case BUILT_IN_TANH:
25578 case BUILT_IN_ATAN:
25579 case BUILT_IN_ATAN2:
25580 case BUILT_IN_ATANH:
25581 case BUILT_IN_CBRT:
25582 case BUILT_IN_SINH:
25584 case BUILT_IN_ASINH:
25585 case BUILT_IN_ASIN:
25586 case BUILT_IN_COSH:
25588 case BUILT_IN_ACOSH:
25589 case BUILT_IN_ACOS:
25590 if (el_mode != DFmode || n != 2)
25594 case BUILT_IN_EXPF:
25595 case BUILT_IN_LOGF:
25596 case BUILT_IN_LOG10F:
25597 case BUILT_IN_POWF:
25598 case BUILT_IN_TANHF:
25599 case BUILT_IN_TANF:
25600 case BUILT_IN_ATANF:
25601 case BUILT_IN_ATAN2F:
25602 case BUILT_IN_ATANHF:
25603 case BUILT_IN_CBRTF:
25604 case BUILT_IN_SINHF:
25605 case BUILT_IN_SINF:
25606 case BUILT_IN_ASINHF:
25607 case BUILT_IN_ASINF:
25608 case BUILT_IN_COSHF:
25609 case BUILT_IN_COSF:
25610 case BUILT_IN_ACOSHF:
25611 case BUILT_IN_ACOSF:
25612 if (el_mode != SFmode || n != 4)
25620 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25622 if (fn == BUILT_IN_LOGF)
25623 strcpy (name, "vmlsLn4");
25624 else if (fn == BUILT_IN_LOG)
25625 strcpy (name, "vmldLn2");
25628 sprintf (name, "vmls%s", bname+10);
25629 name[strlen (name)-1] = '4';
25632 sprintf (name, "vmld%s2", bname+10);
25634 /* Convert to uppercase. */
25638 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25639 args = TREE_CHAIN (args))
25643 fntype = build_function_type_list (type_out, type_in, NULL);
25645 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25647 /* Build a function declaration for the vectorized function. */
25648 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25649 TREE_PUBLIC (new_fndecl) = 1;
25650 DECL_EXTERNAL (new_fndecl) = 1;
25651 DECL_IS_NOVOPS (new_fndecl) = 1;
25652 TREE_READONLY (new_fndecl) = 1;
25657 /* Handler for an ACML-style interface to
25658 a library with vectorized intrinsics. */
25661 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25663 char name[20] = "__vr.._";
25664 tree fntype, new_fndecl, args;
25667 enum machine_mode el_mode, in_mode;
25670 /* The ACML is 64bits only and suitable for unsafe math only as
25671 it does not correctly support parts of IEEE with the required
25672 precision such as denormals. */
25674 || !flag_unsafe_math_optimizations)
25677 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25678 n = TYPE_VECTOR_SUBPARTS (type_out);
25679 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25680 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25681 if (el_mode != in_mode
25691 case BUILT_IN_LOG2:
25692 case BUILT_IN_LOG10:
25695 if (el_mode != DFmode
25700 case BUILT_IN_SINF:
25701 case BUILT_IN_COSF:
25702 case BUILT_IN_EXPF:
25703 case BUILT_IN_POWF:
25704 case BUILT_IN_LOGF:
25705 case BUILT_IN_LOG2F:
25706 case BUILT_IN_LOG10F:
25709 if (el_mode != SFmode
25718 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25719 sprintf (name + 7, "%s", bname+10);
25722 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25723 args = TREE_CHAIN (args))
25727 fntype = build_function_type_list (type_out, type_in, NULL);
25729 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25731 /* Build a function declaration for the vectorized function. */
25732 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25733 TREE_PUBLIC (new_fndecl) = 1;
25734 DECL_EXTERNAL (new_fndecl) = 1;
25735 DECL_IS_NOVOPS (new_fndecl) = 1;
25736 TREE_READONLY (new_fndecl) = 1;
25742 /* Returns a decl of a function that implements conversion of an integer vector
25743 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25744 side of the conversion.
25745 Return NULL_TREE if it is not available. */
25748 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25750 if (TREE_CODE (type) != VECTOR_TYPE
25751 /* There are only conversions from/to signed integers. */
25752 || TYPE_UNSIGNED (TREE_TYPE (type)))
25758 switch (TYPE_MODE (type))
25761 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25766 case FIX_TRUNC_EXPR:
25767 switch (TYPE_MODE (type))
25770 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25780 /* Returns a code for a target-specific builtin that implements
25781 reciprocal of the function, or NULL_TREE if not available. */
25784 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25785 bool sqrt ATTRIBUTE_UNUSED)
25787 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25788 && flag_finite_math_only && !flag_trapping_math
25789 && flag_unsafe_math_optimizations))
25793 /* Machine dependent builtins. */
25796 /* Vectorized version of sqrt to rsqrt conversion. */
25797 case IX86_BUILTIN_SQRTPS_NR:
25798 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25804 /* Normal builtins. */
25807 /* Sqrt to rsqrt conversion. */
25808 case BUILT_IN_SQRTF:
25809 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25816 /* Store OPERAND to the memory after reload is completed. This means
25817 that we can't easily use assign_stack_local. */
25819 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25823 gcc_assert (reload_completed);
25824 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25826 result = gen_rtx_MEM (mode,
25827 gen_rtx_PLUS (Pmode,
25829 GEN_INT (-RED_ZONE_SIZE)));
25830 emit_move_insn (result, operand);
25832 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25838 operand = gen_lowpart (DImode, operand);
25842 gen_rtx_SET (VOIDmode,
25843 gen_rtx_MEM (DImode,
25844 gen_rtx_PRE_DEC (DImode,
25845 stack_pointer_rtx)),
25849 gcc_unreachable ();
25851 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25860 split_di (&operand, 1, operands, operands + 1);
25862 gen_rtx_SET (VOIDmode,
25863 gen_rtx_MEM (SImode,
25864 gen_rtx_PRE_DEC (Pmode,
25865 stack_pointer_rtx)),
25868 gen_rtx_SET (VOIDmode,
25869 gen_rtx_MEM (SImode,
25870 gen_rtx_PRE_DEC (Pmode,
25871 stack_pointer_rtx)),
25876 /* Store HImodes as SImodes. */
25877 operand = gen_lowpart (SImode, operand);
25881 gen_rtx_SET (VOIDmode,
25882 gen_rtx_MEM (GET_MODE (operand),
25883 gen_rtx_PRE_DEC (SImode,
25884 stack_pointer_rtx)),
25888 gcc_unreachable ();
25890 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25895 /* Free operand from the memory. */
25897 ix86_free_from_memory (enum machine_mode mode)
25899 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25903 if (mode == DImode || TARGET_64BIT)
25907 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25908 to pop or add instruction if registers are available. */
25909 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25910 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25915 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25916 QImode must go into class Q_REGS.
25917 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25918 movdf to do mem-to-mem moves through integer regs. */
25920 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25922 enum machine_mode mode = GET_MODE (x);
25924 /* We're only allowed to return a subclass of CLASS. Many of the
25925 following checks fail for NO_REGS, so eliminate that early. */
25926 if (regclass == NO_REGS)
25929 /* All classes can load zeros. */
25930 if (x == CONST0_RTX (mode))
25933 /* Force constants into memory if we are loading a (nonzero) constant into
25934 an MMX or SSE register. This is because there are no MMX/SSE instructions
25935 to load from a constant. */
25937 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25940 /* Prefer SSE regs only, if we can use them for math. */
25941 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25942 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25944 /* Floating-point constants need more complex checks. */
25945 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25947 /* General regs can load everything. */
25948 if (reg_class_subset_p (regclass, GENERAL_REGS))
25951 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25952 zero above. We only want to wind up preferring 80387 registers if
25953 we plan on doing computation with them. */
25955 && standard_80387_constant_p (x))
25957 /* Limit class to non-sse. */
25958 if (regclass == FLOAT_SSE_REGS)
25960 if (regclass == FP_TOP_SSE_REGS)
25962 if (regclass == FP_SECOND_SSE_REGS)
25963 return FP_SECOND_REG;
25964 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25971 /* Generally when we see PLUS here, it's the function invariant
25972 (plus soft-fp const_int). Which can only be computed into general
25974 if (GET_CODE (x) == PLUS)
25975 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25977 /* QImode constants are easy to load, but non-constant QImode data
25978 must go into Q_REGS. */
25979 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25981 if (reg_class_subset_p (regclass, Q_REGS))
25983 if (reg_class_subset_p (Q_REGS, regclass))
25991 /* Discourage putting floating-point values in SSE registers unless
25992 SSE math is being used, and likewise for the 387 registers. */
25994 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25996 enum machine_mode mode = GET_MODE (x);
25998 /* Restrict the output reload class to the register bank that we are doing
25999 math on. If we would like not to return a subset of CLASS, reject this
26000 alternative: if reload cannot do this, it will still use its choice. */
26001 mode = GET_MODE (x);
26002 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26003 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26005 if (X87_FLOAT_MODE_P (mode))
26007 if (regclass == FP_TOP_SSE_REGS)
26009 else if (regclass == FP_SECOND_SSE_REGS)
26010 return FP_SECOND_REG;
26012 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26018 static enum reg_class
26019 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
26020 enum machine_mode mode,
26021 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26023 /* QImode spills from non-QI registers require
26024 intermediate register on 32bit targets. */
26025 if (!in_p && mode == QImode && !TARGET_64BIT
26026 && (rclass == GENERAL_REGS
26027 || rclass == LEGACY_REGS
26028 || rclass == INDEX_REGS))
26037 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26038 regno = true_regnum (x);
26040 /* Return Q_REGS if the operand is in memory. */
26048 /* If we are copying between general and FP registers, we need a memory
26049 location. The same is true for SSE and MMX registers.
26051 To optimize register_move_cost performance, allow inline variant.
26053 The macro can't work reliably when one of the CLASSES is class containing
26054 registers from multiple units (SSE, MMX, integer). We avoid this by never
26055 combining those units in single alternative in the machine description.
26056 Ensure that this constraint holds to avoid unexpected surprises.
26058 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26059 enforce these sanity checks. */
26062 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26063 enum machine_mode mode, int strict)
26065 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26066 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26067 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26068 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26069 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26070 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26072 gcc_assert (!strict);
26076 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26079 /* ??? This is a lie. We do have moves between mmx/general, and for
26080 mmx/sse2. But by saying we need secondary memory we discourage the
26081 register allocator from using the mmx registers unless needed. */
26082 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26085 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26087 /* SSE1 doesn't have any direct moves from other classes. */
26091 /* If the target says that inter-unit moves are more expensive
26092 than moving through memory, then don't generate them. */
26093 if (!TARGET_INTER_UNIT_MOVES)
26096 /* Between SSE and general, we have moves no larger than word size. */
26097 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26105 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26106 enum machine_mode mode, int strict)
26108 return inline_secondary_memory_needed (class1, class2, mode, strict);
26111 /* Return true if the registers in CLASS cannot represent the change from
26112 modes FROM to TO. */
26115 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26116 enum reg_class regclass)
26121 /* x87 registers can't do subreg at all, as all values are reformatted
26122 to extended precision. */
26123 if (MAYBE_FLOAT_CLASS_P (regclass))
26126 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26128 /* Vector registers do not support QI or HImode loads. If we don't
26129 disallow a change to these modes, reload will assume it's ok to
26130 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26131 the vec_dupv4hi pattern. */
26132 if (GET_MODE_SIZE (from) < 4)
26135 /* Vector registers do not support subreg with nonzero offsets, which
26136 are otherwise valid for integer registers. Since we can't see
26137 whether we have a nonzero offset from here, prohibit all
26138 nonparadoxical subregs changing size. */
26139 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26146 /* Return the cost of moving data of mode M between a
26147 register and memory. A value of 2 is the default; this cost is
26148 relative to those in `REGISTER_MOVE_COST'.
26150 This function is used extensively by register_move_cost that is used to
26151 build tables at startup. Make it inline in this case.
26152 When IN is 2, return maximum of in and out move cost.
26154 If moving between registers and memory is more expensive than
26155 between two registers, you should define this macro to express the
26158 Model also increased moving costs of QImode registers in non
26162 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26166 if (FLOAT_CLASS_P (regclass))
26184 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26185 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26187 if (SSE_CLASS_P (regclass))
26190 switch (GET_MODE_SIZE (mode))
26205 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26206 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26208 if (MMX_CLASS_P (regclass))
26211 switch (GET_MODE_SIZE (mode))
26223 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26224 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26226 switch (GET_MODE_SIZE (mode))
26229 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26232 return ix86_cost->int_store[0];
26233 if (TARGET_PARTIAL_REG_DEPENDENCY
26234 && optimize_function_for_speed_p (cfun))
26235 cost = ix86_cost->movzbl_load;
26237 cost = ix86_cost->int_load[0];
26239 return MAX (cost, ix86_cost->int_store[0]);
26245 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26247 return ix86_cost->movzbl_load;
26249 return ix86_cost->int_store[0] + 4;
26254 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26255 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26257 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26258 if (mode == TFmode)
26261 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26263 cost = ix86_cost->int_load[2];
26265 cost = ix86_cost->int_store[2];
26266 return (cost * (((int) GET_MODE_SIZE (mode)
26267 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26272 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26274 return inline_memory_move_cost (mode, regclass, in);
26278 /* Return the cost of moving data from a register in class CLASS1 to
26279 one in class CLASS2.
26281 It is not required that the cost always equal 2 when FROM is the same as TO;
26282 on some machines it is expensive to move between registers if they are not
26283 general registers. */
26286 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26287 enum reg_class class2)
26289 /* In case we require secondary memory, compute cost of the store followed
26290 by load. In order to avoid bad register allocation choices, we need
26291 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26293 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26297 cost += inline_memory_move_cost (mode, class1, 2);
26298 cost += inline_memory_move_cost (mode, class2, 2);
26300 /* In case of copying from general_purpose_register we may emit multiple
26301 stores followed by single load causing memory size mismatch stall.
26302 Count this as arbitrarily high cost of 20. */
26303 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26306 /* In the case of FP/MMX moves, the registers actually overlap, and we
26307 have to switch modes in order to treat them differently. */
26308 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26309 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26315 /* Moves between SSE/MMX and integer unit are expensive. */
26316 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26317 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26319 /* ??? By keeping returned value relatively high, we limit the number
26320 of moves between integer and MMX/SSE registers for all targets.
26321 Additionally, high value prevents problem with x86_modes_tieable_p(),
26322 where integer modes in MMX/SSE registers are not tieable
26323 because of missing QImode and HImode moves to, from or between
26324 MMX/SSE registers. */
26325 return MAX (8, ix86_cost->mmxsse_to_integer);
26327 if (MAYBE_FLOAT_CLASS_P (class1))
26328 return ix86_cost->fp_move;
26329 if (MAYBE_SSE_CLASS_P (class1))
26330 return ix86_cost->sse_move;
26331 if (MAYBE_MMX_CLASS_P (class1))
26332 return ix86_cost->mmx_move;
26336 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26339 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26341 /* Flags and only flags can only hold CCmode values. */
26342 if (CC_REGNO_P (regno))
26343 return GET_MODE_CLASS (mode) == MODE_CC;
26344 if (GET_MODE_CLASS (mode) == MODE_CC
26345 || GET_MODE_CLASS (mode) == MODE_RANDOM
26346 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26348 if (FP_REGNO_P (regno))
26349 return VALID_FP_MODE_P (mode);
26350 if (SSE_REGNO_P (regno))
26352 /* We implement the move patterns for all vector modes into and
26353 out of SSE registers, even when no operation instructions
26354 are available. OImode move is available only when AVX is
26356 return ((TARGET_AVX && mode == OImode)
26357 || VALID_AVX256_REG_MODE (mode)
26358 || VALID_SSE_REG_MODE (mode)
26359 || VALID_SSE2_REG_MODE (mode)
26360 || VALID_MMX_REG_MODE (mode)
26361 || VALID_MMX_REG_MODE_3DNOW (mode));
26363 if (MMX_REGNO_P (regno))
26365 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26366 so if the register is available at all, then we can move data of
26367 the given mode into or out of it. */
26368 return (VALID_MMX_REG_MODE (mode)
26369 || VALID_MMX_REG_MODE_3DNOW (mode));
26372 if (mode == QImode)
26374 /* Take care for QImode values - they can be in non-QI regs,
26375 but then they do cause partial register stalls. */
26376 if (regno <= BX_REG || TARGET_64BIT)
26378 if (!TARGET_PARTIAL_REG_STALL)
26380 return reload_in_progress || reload_completed;
26382 /* We handle both integer and floats in the general purpose registers. */
26383 else if (VALID_INT_MODE_P (mode))
26385 else if (VALID_FP_MODE_P (mode))
26387 else if (VALID_DFP_MODE_P (mode))
26389 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26390 on to use that value in smaller contexts, this can easily force a
26391 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26392 supporting DImode, allow it. */
26393 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26399 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26400 tieable integer mode. */
26403 ix86_tieable_integer_mode_p (enum machine_mode mode)
26412 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26415 return TARGET_64BIT;
26422 /* Return true if MODE1 is accessible in a register that can hold MODE2
26423 without copying. That is, all register classes that can hold MODE2
26424 can also hold MODE1. */
26427 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26429 if (mode1 == mode2)
26432 if (ix86_tieable_integer_mode_p (mode1)
26433 && ix86_tieable_integer_mode_p (mode2))
26436 /* MODE2 being XFmode implies fp stack or general regs, which means we
26437 can tie any smaller floating point modes to it. Note that we do not
26438 tie this with TFmode. */
26439 if (mode2 == XFmode)
26440 return mode1 == SFmode || mode1 == DFmode;
26442 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26443 that we can tie it with SFmode. */
26444 if (mode2 == DFmode)
26445 return mode1 == SFmode;
26447 /* If MODE2 is only appropriate for an SSE register, then tie with
26448 any other mode acceptable to SSE registers. */
26449 if (GET_MODE_SIZE (mode2) == 16
26450 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26451 return (GET_MODE_SIZE (mode1) == 16
26452 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26454 /* If MODE2 is appropriate for an MMX register, then tie
26455 with any other mode acceptable to MMX registers. */
26456 if (GET_MODE_SIZE (mode2) == 8
26457 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26458 return (GET_MODE_SIZE (mode1) == 8
26459 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26464 /* Compute a (partial) cost for rtx X. Return true if the complete
26465 cost has been computed, and false if subexpressions should be
26466 scanned. In either case, *TOTAL contains the cost result. */
26469 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26471 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26472 enum machine_mode mode = GET_MODE (x);
26473 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26481 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26483 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26485 else if (flag_pic && SYMBOLIC_CONST (x)
26487 || (!GET_CODE (x) != LABEL_REF
26488 && (GET_CODE (x) != SYMBOL_REF
26489 || !SYMBOL_REF_LOCAL_P (x)))))
26496 if (mode == VOIDmode)
26499 switch (standard_80387_constant_p (x))
26504 default: /* Other constants */
26509 /* Start with (MEM (SYMBOL_REF)), since that's where
26510 it'll probably end up. Add a penalty for size. */
26511 *total = (COSTS_N_INSNS (1)
26512 + (flag_pic != 0 && !TARGET_64BIT)
26513 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26519 /* The zero extensions is often completely free on x86_64, so make
26520 it as cheap as possible. */
26521 if (TARGET_64BIT && mode == DImode
26522 && GET_MODE (XEXP (x, 0)) == SImode)
26524 else if (TARGET_ZERO_EXTEND_WITH_AND)
26525 *total = cost->add;
26527 *total = cost->movzx;
26531 *total = cost->movsx;
26535 if (CONST_INT_P (XEXP (x, 1))
26536 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26538 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26541 *total = cost->add;
26544 if ((value == 2 || value == 3)
26545 && cost->lea <= cost->shift_const)
26547 *total = cost->lea;
26557 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26559 if (CONST_INT_P (XEXP (x, 1)))
26561 if (INTVAL (XEXP (x, 1)) > 32)
26562 *total = cost->shift_const + COSTS_N_INSNS (2);
26564 *total = cost->shift_const * 2;
26568 if (GET_CODE (XEXP (x, 1)) == AND)
26569 *total = cost->shift_var * 2;
26571 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26576 if (CONST_INT_P (XEXP (x, 1)))
26577 *total = cost->shift_const;
26579 *total = cost->shift_var;
26584 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26586 /* ??? SSE scalar cost should be used here. */
26587 *total = cost->fmul;
26590 else if (X87_FLOAT_MODE_P (mode))
26592 *total = cost->fmul;
26595 else if (FLOAT_MODE_P (mode))
26597 /* ??? SSE vector cost should be used here. */
26598 *total = cost->fmul;
26603 rtx op0 = XEXP (x, 0);
26604 rtx op1 = XEXP (x, 1);
26606 if (CONST_INT_P (XEXP (x, 1)))
26608 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26609 for (nbits = 0; value != 0; value &= value - 1)
26613 /* This is arbitrary. */
26616 /* Compute costs correctly for widening multiplication. */
26617 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26618 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26619 == GET_MODE_SIZE (mode))
26621 int is_mulwiden = 0;
26622 enum machine_mode inner_mode = GET_MODE (op0);
26624 if (GET_CODE (op0) == GET_CODE (op1))
26625 is_mulwiden = 1, op1 = XEXP (op1, 0);
26626 else if (CONST_INT_P (op1))
26628 if (GET_CODE (op0) == SIGN_EXTEND)
26629 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26632 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26636 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26639 *total = (cost->mult_init[MODE_INDEX (mode)]
26640 + nbits * cost->mult_bit
26641 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26650 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26651 /* ??? SSE cost should be used here. */
26652 *total = cost->fdiv;
26653 else if (X87_FLOAT_MODE_P (mode))
26654 *total = cost->fdiv;
26655 else if (FLOAT_MODE_P (mode))
26656 /* ??? SSE vector cost should be used here. */
26657 *total = cost->fdiv;
26659 *total = cost->divide[MODE_INDEX (mode)];
26663 if (GET_MODE_CLASS (mode) == MODE_INT
26664 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26666 if (GET_CODE (XEXP (x, 0)) == PLUS
26667 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26668 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26669 && CONSTANT_P (XEXP (x, 1)))
26671 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26672 if (val == 2 || val == 4 || val == 8)
26674 *total = cost->lea;
26675 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26676 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26677 outer_code, speed);
26678 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26682 else if (GET_CODE (XEXP (x, 0)) == MULT
26683 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26685 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26686 if (val == 2 || val == 4 || val == 8)
26688 *total = cost->lea;
26689 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26690 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26694 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26696 *total = cost->lea;
26697 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26698 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26699 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26706 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26708 /* ??? SSE cost should be used here. */
26709 *total = cost->fadd;
26712 else if (X87_FLOAT_MODE_P (mode))
26714 *total = cost->fadd;
26717 else if (FLOAT_MODE_P (mode))
26719 /* ??? SSE vector cost should be used here. */
26720 *total = cost->fadd;
26728 if (!TARGET_64BIT && mode == DImode)
26730 *total = (cost->add * 2
26731 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26732 << (GET_MODE (XEXP (x, 0)) != DImode))
26733 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26734 << (GET_MODE (XEXP (x, 1)) != DImode)));
26740 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26742 /* ??? SSE cost should be used here. */
26743 *total = cost->fchs;
26746 else if (X87_FLOAT_MODE_P (mode))
26748 *total = cost->fchs;
26751 else if (FLOAT_MODE_P (mode))
26753 /* ??? SSE vector cost should be used here. */
26754 *total = cost->fchs;
26760 if (!TARGET_64BIT && mode == DImode)
26761 *total = cost->add * 2;
26763 *total = cost->add;
26767 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26768 && XEXP (XEXP (x, 0), 1) == const1_rtx
26769 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26770 && XEXP (x, 1) == const0_rtx)
26772 /* This kind of construct is implemented using test[bwl].
26773 Treat it as if we had an AND. */
26774 *total = (cost->add
26775 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26776 + rtx_cost (const1_rtx, outer_code, speed));
26782 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26787 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26788 /* ??? SSE cost should be used here. */
26789 *total = cost->fabs;
26790 else if (X87_FLOAT_MODE_P (mode))
26791 *total = cost->fabs;
26792 else if (FLOAT_MODE_P (mode))
26793 /* ??? SSE vector cost should be used here. */
26794 *total = cost->fabs;
26798 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26799 /* ??? SSE cost should be used here. */
26800 *total = cost->fsqrt;
26801 else if (X87_FLOAT_MODE_P (mode))
26802 *total = cost->fsqrt;
26803 else if (FLOAT_MODE_P (mode))
26804 /* ??? SSE vector cost should be used here. */
26805 *total = cost->fsqrt;
26809 if (XINT (x, 1) == UNSPEC_TP)
26820 static int current_machopic_label_num;
26822 /* Given a symbol name and its associated stub, write out the
26823 definition of the stub. */
26826 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26828 unsigned int length;
26829 char *binder_name, *symbol_name, lazy_ptr_name[32];
26830 int label = ++current_machopic_label_num;
26832 /* For 64-bit we shouldn't get here. */
26833 gcc_assert (!TARGET_64BIT);
26835 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26836 symb = (*targetm.strip_name_encoding) (symb);
26838 length = strlen (stub);
26839 binder_name = XALLOCAVEC (char, length + 32);
26840 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26842 length = strlen (symb);
26843 symbol_name = XALLOCAVEC (char, length + 32);
26844 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26846 sprintf (lazy_ptr_name, "L%d$lz", label);
26849 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26851 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26853 fprintf (file, "%s:\n", stub);
26854 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26858 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26859 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26860 fprintf (file, "\tjmp\t*%%edx\n");
26863 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26865 fprintf (file, "%s:\n", binder_name);
26869 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26870 fprintf (file, "\tpushl\t%%eax\n");
26873 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26875 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26877 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26878 fprintf (file, "%s:\n", lazy_ptr_name);
26879 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26880 fprintf (file, "\t.long %s\n", binder_name);
26884 darwin_x86_file_end (void)
26886 darwin_file_end ();
26889 #endif /* TARGET_MACHO */
26891 /* Order the registers for register allocator. */
26894 x86_order_regs_for_local_alloc (void)
26899 /* First allocate the local general purpose registers. */
26900 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26901 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26902 reg_alloc_order [pos++] = i;
26904 /* Global general purpose registers. */
26905 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26906 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26907 reg_alloc_order [pos++] = i;
26909 /* x87 registers come first in case we are doing FP math
26911 if (!TARGET_SSE_MATH)
26912 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26913 reg_alloc_order [pos++] = i;
26915 /* SSE registers. */
26916 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26917 reg_alloc_order [pos++] = i;
26918 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26919 reg_alloc_order [pos++] = i;
26921 /* x87 registers. */
26922 if (TARGET_SSE_MATH)
26923 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26924 reg_alloc_order [pos++] = i;
26926 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26927 reg_alloc_order [pos++] = i;
26929 /* Initialize the rest of array as we do not allocate some registers
26931 while (pos < FIRST_PSEUDO_REGISTER)
26932 reg_alloc_order [pos++] = 0;
26935 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26936 struct attribute_spec.handler. */
26938 ix86_handle_abi_attribute (tree *node, tree name,
26939 tree args ATTRIBUTE_UNUSED,
26940 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26942 if (TREE_CODE (*node) != FUNCTION_TYPE
26943 && TREE_CODE (*node) != METHOD_TYPE
26944 && TREE_CODE (*node) != FIELD_DECL
26945 && TREE_CODE (*node) != TYPE_DECL)
26947 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26949 *no_add_attrs = true;
26954 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
26956 *no_add_attrs = true;
26960 /* Can combine regparm with all attributes but fastcall. */
26961 if (is_attribute_p ("ms_abi", name))
26963 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26965 error ("ms_abi and sysv_abi attributes are not compatible");
26970 else if (is_attribute_p ("sysv_abi", name))
26972 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26974 error ("ms_abi and sysv_abi attributes are not compatible");
26983 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26984 struct attribute_spec.handler. */
26986 ix86_handle_struct_attribute (tree *node, tree name,
26987 tree args ATTRIBUTE_UNUSED,
26988 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26991 if (DECL_P (*node))
26993 if (TREE_CODE (*node) == TYPE_DECL)
26994 type = &TREE_TYPE (*node);
26999 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27000 || TREE_CODE (*type) == UNION_TYPE)))
27002 warning (OPT_Wattributes, "%qE attribute ignored",
27004 *no_add_attrs = true;
27007 else if ((is_attribute_p ("ms_struct", name)
27008 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27009 || ((is_attribute_p ("gcc_struct", name)
27010 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27012 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27014 *no_add_attrs = true;
27021 ix86_ms_bitfield_layout_p (const_tree record_type)
27023 return (TARGET_MS_BITFIELD_LAYOUT &&
27024 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27025 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27028 /* Returns an expression indicating where the this parameter is
27029 located on entry to the FUNCTION. */
27032 x86_this_parameter (tree function)
27034 tree type = TREE_TYPE (function);
27035 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27040 const int *parm_regs;
27042 if (ix86_function_type_abi (type) == MS_ABI)
27043 parm_regs = x86_64_ms_abi_int_parameter_registers;
27045 parm_regs = x86_64_int_parameter_registers;
27046 return gen_rtx_REG (DImode, parm_regs[aggr]);
27049 nregs = ix86_function_regparm (type, function);
27051 if (nregs > 0 && !stdarg_p (type))
27055 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27056 regno = aggr ? DX_REG : CX_REG;
27064 return gen_rtx_MEM (SImode,
27065 plus_constant (stack_pointer_rtx, 4));
27068 return gen_rtx_REG (SImode, regno);
27071 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27074 /* Determine whether x86_output_mi_thunk can succeed. */
27077 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27078 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27079 HOST_WIDE_INT vcall_offset, const_tree function)
27081 /* 64-bit can handle anything. */
27085 /* For 32-bit, everything's fine if we have one free register. */
27086 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27089 /* Need a free register for vcall_offset. */
27093 /* Need a free register for GOT references. */
27094 if (flag_pic && !(*targetm.binds_local_p) (function))
27097 /* Otherwise ok. */
27101 /* Output the assembler code for a thunk function. THUNK_DECL is the
27102 declaration for the thunk function itself, FUNCTION is the decl for
27103 the target function. DELTA is an immediate constant offset to be
27104 added to THIS. If VCALL_OFFSET is nonzero, the word at
27105 *(*this + vcall_offset) should be added to THIS. */
27108 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27109 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27110 HOST_WIDE_INT vcall_offset, tree function)
27113 rtx this_param = x86_this_parameter (function);
27116 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27117 pull it in now and let DELTA benefit. */
27118 if (REG_P (this_param))
27119 this_reg = this_param;
27120 else if (vcall_offset)
27122 /* Put the this parameter into %eax. */
27123 xops[0] = this_param;
27124 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27125 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27128 this_reg = NULL_RTX;
27130 /* Adjust the this parameter by a fixed constant. */
27133 xops[0] = GEN_INT (delta);
27134 xops[1] = this_reg ? this_reg : this_param;
27137 if (!x86_64_general_operand (xops[0], DImode))
27139 tmp = gen_rtx_REG (DImode, R10_REG);
27141 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27143 xops[1] = this_param;
27145 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27148 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27151 /* Adjust the this parameter by a value stored in the vtable. */
27155 tmp = gen_rtx_REG (DImode, R10_REG);
27158 int tmp_regno = CX_REG;
27159 if (lookup_attribute ("fastcall",
27160 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27161 tmp_regno = AX_REG;
27162 tmp = gen_rtx_REG (SImode, tmp_regno);
27165 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27167 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27169 /* Adjust the this parameter. */
27170 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27171 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27173 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27174 xops[0] = GEN_INT (vcall_offset);
27176 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27177 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27179 xops[1] = this_reg;
27180 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27183 /* If necessary, drop THIS back to its stack slot. */
27184 if (this_reg && this_reg != this_param)
27186 xops[0] = this_reg;
27187 xops[1] = this_param;
27188 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27191 xops[0] = XEXP (DECL_RTL (function), 0);
27194 if (!flag_pic || (*targetm.binds_local_p) (function))
27195 output_asm_insn ("jmp\t%P0", xops);
27196 /* All thunks should be in the same object as their target,
27197 and thus binds_local_p should be true. */
27198 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27199 gcc_unreachable ();
27202 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27203 tmp = gen_rtx_CONST (Pmode, tmp);
27204 tmp = gen_rtx_MEM (QImode, tmp);
27206 output_asm_insn ("jmp\t%A0", xops);
27211 if (!flag_pic || (*targetm.binds_local_p) (function))
27212 output_asm_insn ("jmp\t%P0", xops);
27217 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27218 tmp = (gen_rtx_SYMBOL_REF
27220 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27221 tmp = gen_rtx_MEM (QImode, tmp);
27223 output_asm_insn ("jmp\t%0", xops);
27226 #endif /* TARGET_MACHO */
27228 tmp = gen_rtx_REG (SImode, CX_REG);
27229 output_set_got (tmp, NULL_RTX);
27232 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27233 output_asm_insn ("jmp\t{*}%1", xops);
27239 x86_file_start (void)
27241 default_file_start ();
27243 darwin_file_start ();
27245 if (X86_FILE_START_VERSION_DIRECTIVE)
27246 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27247 if (X86_FILE_START_FLTUSED)
27248 fputs ("\t.global\t__fltused\n", asm_out_file);
27249 if (ix86_asm_dialect == ASM_INTEL)
27250 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27254 x86_field_alignment (tree field, int computed)
27256 enum machine_mode mode;
27257 tree type = TREE_TYPE (field);
27259 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27261 mode = TYPE_MODE (strip_array_types (type));
27262 if (mode == DFmode || mode == DCmode
27263 || GET_MODE_CLASS (mode) == MODE_INT
27264 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27265 return MIN (32, computed);
27269 /* Output assembler code to FILE to increment profiler label # LABELNO
27270 for profiling a function entry. */
27272 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27276 #ifndef NO_PROFILE_COUNTERS
27277 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27280 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27281 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27283 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27287 #ifndef NO_PROFILE_COUNTERS
27288 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27289 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27291 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27295 #ifndef NO_PROFILE_COUNTERS
27296 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27297 PROFILE_COUNT_REGISTER);
27299 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27303 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27304 /* We don't have exact information about the insn sizes, but we may assume
27305 quite safely that we are informed about all 1 byte insns and memory
27306 address sizes. This is enough to eliminate unnecessary padding in
27310 min_insn_size (rtx insn)
27314 if (!INSN_P (insn) || !active_insn_p (insn))
27317 /* Discard alignments we've emit and jump instructions. */
27318 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27319 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27321 if (JUMP_TABLE_DATA_P(insn))
27324 /* Important case - calls are always 5 bytes.
27325 It is common to have many calls in the row. */
27327 && symbolic_reference_mentioned_p (PATTERN (insn))
27328 && !SIBLING_CALL_P (insn))
27330 if (get_attr_length (insn) <= 1)
27333 /* For normal instructions we may rely on the sizes of addresses
27334 and the presence of symbol to require 4 bytes of encoding.
27335 This is not the case for jumps where references are PC relative. */
27336 if (!JUMP_P (insn))
27338 l = get_attr_length_address (insn);
27339 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27348 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27352 ix86_avoid_jump_mispredicts (void)
27354 rtx insn, start = get_insns ();
27355 int nbytes = 0, njumps = 0;
27358 /* Look for all minimal intervals of instructions containing 4 jumps.
27359 The intervals are bounded by START and INSN. NBYTES is the total
27360 size of instructions in the interval including INSN and not including
27361 START. When the NBYTES is smaller than 16 bytes, it is possible
27362 that the end of START and INSN ends up in the same 16byte page.
27364 The smallest offset in the page INSN can start is the case where START
27365 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27366 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27368 for (insn = start; insn; insn = NEXT_INSN (insn))
27372 if (LABEL_P (insn))
27374 int align = label_to_alignment (insn);
27375 int max_skip = label_to_max_skip (insn);
27379 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27380 already in the current 16 byte page, because otherwise
27381 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27382 bytes to reach 16 byte boundary. */
27384 || (align <= 3 && max_skip != (1 << align) - 1))
27387 fprintf (dump_file, "Label %i with max_skip %i\n",
27388 INSN_UID (insn), max_skip);
27391 while (nbytes + max_skip >= 16)
27393 start = NEXT_INSN (start);
27394 if ((JUMP_P (start)
27395 && GET_CODE (PATTERN (start)) != ADDR_VEC
27396 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27398 njumps--, isjump = 1;
27401 nbytes -= min_insn_size (start);
27407 min_size = min_insn_size (insn);
27408 nbytes += min_size;
27410 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27411 INSN_UID (insn), min_size);
27413 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27414 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27422 start = NEXT_INSN (start);
27423 if ((JUMP_P (start)
27424 && GET_CODE (PATTERN (start)) != ADDR_VEC
27425 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27427 njumps--, isjump = 1;
27430 nbytes -= min_insn_size (start);
27432 gcc_assert (njumps >= 0);
27434 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27435 INSN_UID (start), INSN_UID (insn), nbytes);
27437 if (njumps == 3 && isjump && nbytes < 16)
27439 int padsize = 15 - nbytes + min_insn_size (insn);
27442 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27443 INSN_UID (insn), padsize);
27444 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27450 /* AMD Athlon works faster
27451 when RET is not destination of conditional jump or directly preceded
27452 by other jump instruction. We avoid the penalty by inserting NOP just
27453 before the RET instructions in such cases. */
27455 ix86_pad_returns (void)
27460 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27462 basic_block bb = e->src;
27463 rtx ret = BB_END (bb);
27465 bool replace = false;
27467 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27468 || optimize_bb_for_size_p (bb))
27470 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27471 if (active_insn_p (prev) || LABEL_P (prev))
27473 if (prev && LABEL_P (prev))
27478 FOR_EACH_EDGE (e, ei, bb->preds)
27479 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27480 && !(e->flags & EDGE_FALLTHRU))
27485 prev = prev_active_insn (ret);
27487 && ((JUMP_P (prev) && any_condjump_p (prev))
27490 /* Empty functions get branch mispredict even when the jump destination
27491 is not visible to us. */
27492 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27497 emit_insn_before (gen_return_internal_long (), ret);
27503 /* Implement machine specific optimizations. We implement padding of returns
27504 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27508 if (optimize && optimize_function_for_speed_p (cfun))
27510 if (TARGET_PAD_RETURNS)
27511 ix86_pad_returns ();
27512 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27513 if (TARGET_FOUR_JUMP_LIMIT)
27514 ix86_avoid_jump_mispredicts ();
27519 /* Return nonzero when QImode register that must be represented via REX prefix
27522 x86_extended_QIreg_mentioned_p (rtx insn)
27525 extract_insn_cached (insn);
27526 for (i = 0; i < recog_data.n_operands; i++)
27527 if (REG_P (recog_data.operand[i])
27528 && REGNO (recog_data.operand[i]) > BX_REG)
27533 /* Return nonzero when P points to register encoded via REX prefix.
27534 Called via for_each_rtx. */
27536 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27538 unsigned int regno;
27541 regno = REGNO (*p);
27542 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27545 /* Return true when INSN mentions register that must be encoded using REX
27548 x86_extended_reg_mentioned_p (rtx insn)
27550 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27551 extended_reg_mentioned_1, NULL);
27554 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27555 optabs would emit if we didn't have TFmode patterns. */
27558 x86_emit_floatuns (rtx operands[2])
27560 rtx neglab, donelab, i0, i1, f0, in, out;
27561 enum machine_mode mode, inmode;
27563 inmode = GET_MODE (operands[1]);
27564 gcc_assert (inmode == SImode || inmode == DImode);
27567 in = force_reg (inmode, operands[1]);
27568 mode = GET_MODE (out);
27569 neglab = gen_label_rtx ();
27570 donelab = gen_label_rtx ();
27571 f0 = gen_reg_rtx (mode);
27573 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27575 expand_float (out, in, 0);
27577 emit_jump_insn (gen_jump (donelab));
27580 emit_label (neglab);
27582 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27584 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27586 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27588 expand_float (f0, i0, 0);
27590 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27592 emit_label (donelab);
27595 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27596 with all elements equal to VAR. Return true if successful. */
27599 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27600 rtx target, rtx val)
27602 enum machine_mode hmode, smode, wsmode, wvmode;
27617 val = force_reg (GET_MODE_INNER (mode), val);
27618 x = gen_rtx_VEC_DUPLICATE (mode, val);
27619 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27625 if (TARGET_SSE || TARGET_3DNOW_A)
27627 val = gen_lowpart (SImode, val);
27628 x = gen_rtx_TRUNCATE (HImode, val);
27629 x = gen_rtx_VEC_DUPLICATE (mode, x);
27630 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27652 /* Extend HImode to SImode using a paradoxical SUBREG. */
27653 tmp1 = gen_reg_rtx (SImode);
27654 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27655 /* Insert the SImode value as low element of V4SImode vector. */
27656 tmp2 = gen_reg_rtx (V4SImode);
27657 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27658 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27659 CONST0_RTX (V4SImode),
27661 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27662 /* Cast the V4SImode vector back to a V8HImode vector. */
27663 tmp1 = gen_reg_rtx (V8HImode);
27664 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27665 /* Duplicate the low short through the whole low SImode word. */
27666 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27667 /* Cast the V8HImode vector back to a V4SImode vector. */
27668 tmp2 = gen_reg_rtx (V4SImode);
27669 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27670 /* Replicate the low element of the V4SImode vector. */
27671 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27672 /* Cast the V2SImode back to V8HImode, and store in target. */
27673 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27684 /* Extend QImode to SImode using a paradoxical SUBREG. */
27685 tmp1 = gen_reg_rtx (SImode);
27686 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27687 /* Insert the SImode value as low element of V4SImode vector. */
27688 tmp2 = gen_reg_rtx (V4SImode);
27689 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27690 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27691 CONST0_RTX (V4SImode),
27693 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27694 /* Cast the V4SImode vector back to a V16QImode vector. */
27695 tmp1 = gen_reg_rtx (V16QImode);
27696 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27697 /* Duplicate the low byte through the whole low SImode word. */
27698 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27699 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27700 /* Cast the V16QImode vector back to a V4SImode vector. */
27701 tmp2 = gen_reg_rtx (V4SImode);
27702 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27703 /* Replicate the low element of the V4SImode vector. */
27704 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27705 /* Cast the V2SImode back to V16QImode, and store in target. */
27706 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27714 /* Replicate the value once into the next wider mode and recurse. */
27715 val = convert_modes (wsmode, smode, val, true);
27716 x = expand_simple_binop (wsmode, ASHIFT, val,
27717 GEN_INT (GET_MODE_BITSIZE (smode)),
27718 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27719 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27721 x = gen_reg_rtx (wvmode);
27722 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27723 gcc_unreachable ();
27724 emit_move_insn (target, gen_lowpart (mode, x));
27747 rtx tmp = gen_reg_rtx (hmode);
27748 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27749 emit_insn (gen_rtx_SET (VOIDmode, target,
27750 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27759 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27760 whose ONE_VAR element is VAR, and other elements are zero. Return true
27764 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27765 rtx target, rtx var, int one_var)
27767 enum machine_mode vsimode;
27770 bool use_vector_set = false;
27775 /* For SSE4.1, we normally use vector set. But if the second
27776 element is zero and inter-unit moves are OK, we use movq
27778 use_vector_set = (TARGET_64BIT
27780 && !(TARGET_INTER_UNIT_MOVES
27786 use_vector_set = TARGET_SSE4_1;
27789 use_vector_set = TARGET_SSE2;
27792 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27799 use_vector_set = TARGET_AVX;
27802 /* Use ix86_expand_vector_set in 64bit mode only. */
27803 use_vector_set = TARGET_AVX && TARGET_64BIT;
27809 if (use_vector_set)
27811 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27812 var = force_reg (GET_MODE_INNER (mode), var);
27813 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27829 var = force_reg (GET_MODE_INNER (mode), var);
27830 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27831 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27836 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27837 new_target = gen_reg_rtx (mode);
27839 new_target = target;
27840 var = force_reg (GET_MODE_INNER (mode), var);
27841 x = gen_rtx_VEC_DUPLICATE (mode, var);
27842 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27843 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27846 /* We need to shuffle the value to the correct position, so
27847 create a new pseudo to store the intermediate result. */
27849 /* With SSE2, we can use the integer shuffle insns. */
27850 if (mode != V4SFmode && TARGET_SSE2)
27852 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27854 GEN_INT (one_var == 1 ? 0 : 1),
27855 GEN_INT (one_var == 2 ? 0 : 1),
27856 GEN_INT (one_var == 3 ? 0 : 1)));
27857 if (target != new_target)
27858 emit_move_insn (target, new_target);
27862 /* Otherwise convert the intermediate result to V4SFmode and
27863 use the SSE1 shuffle instructions. */
27864 if (mode != V4SFmode)
27866 tmp = gen_reg_rtx (V4SFmode);
27867 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27872 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27874 GEN_INT (one_var == 1 ? 0 : 1),
27875 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27876 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27878 if (mode != V4SFmode)
27879 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27880 else if (tmp != target)
27881 emit_move_insn (target, tmp);
27883 else if (target != new_target)
27884 emit_move_insn (target, new_target);
27889 vsimode = V4SImode;
27895 vsimode = V2SImode;
27901 /* Zero extend the variable element to SImode and recurse. */
27902 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27904 x = gen_reg_rtx (vsimode);
27905 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27907 gcc_unreachable ();
27909 emit_move_insn (target, gen_lowpart (mode, x));
27917 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27918 consisting of the values in VALS. It is known that all elements
27919 except ONE_VAR are constants. Return true if successful. */
27922 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27923 rtx target, rtx vals, int one_var)
27925 rtx var = XVECEXP (vals, 0, one_var);
27926 enum machine_mode wmode;
27929 const_vec = copy_rtx (vals);
27930 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27931 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27939 /* For the two element vectors, it's just as easy to use
27940 the general case. */
27944 /* Use ix86_expand_vector_set in 64bit mode only. */
27967 /* There's no way to set one QImode entry easily. Combine
27968 the variable value with its adjacent constant value, and
27969 promote to an HImode set. */
27970 x = XVECEXP (vals, 0, one_var ^ 1);
27973 var = convert_modes (HImode, QImode, var, true);
27974 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27975 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27976 x = GEN_INT (INTVAL (x) & 0xff);
27980 var = convert_modes (HImode, QImode, var, true);
27981 x = gen_int_mode (INTVAL (x) << 8, HImode);
27983 if (x != const0_rtx)
27984 var = expand_simple_binop (HImode, IOR, var, x, var,
27985 1, OPTAB_LIB_WIDEN);
27987 x = gen_reg_rtx (wmode);
27988 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27989 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27991 emit_move_insn (target, gen_lowpart (mode, x));
27998 emit_move_insn (target, const_vec);
27999 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28003 /* A subroutine of ix86_expand_vector_init_general. Use vector
28004 concatenate to handle the most general case: all values variable,
28005 and none identical. */
28008 ix86_expand_vector_init_concat (enum machine_mode mode,
28009 rtx target, rtx *ops, int n)
28011 enum machine_mode cmode, hmode = VOIDmode;
28012 rtx first[8], second[4];
28052 gcc_unreachable ();
28055 if (!register_operand (ops[1], cmode))
28056 ops[1] = force_reg (cmode, ops[1]);
28057 if (!register_operand (ops[0], cmode))
28058 ops[0] = force_reg (cmode, ops[0]);
28059 emit_insn (gen_rtx_SET (VOIDmode, target,
28060 gen_rtx_VEC_CONCAT (mode, ops[0],
28080 gcc_unreachable ();
28096 gcc_unreachable ();
28101 /* FIXME: We process inputs backward to help RA. PR 36222. */
28104 for (; i > 0; i -= 2, j--)
28106 first[j] = gen_reg_rtx (cmode);
28107 v = gen_rtvec (2, ops[i - 1], ops[i]);
28108 ix86_expand_vector_init (false, first[j],
28109 gen_rtx_PARALLEL (cmode, v));
28115 gcc_assert (hmode != VOIDmode);
28116 for (i = j = 0; i < n; i += 2, j++)
28118 second[j] = gen_reg_rtx (hmode);
28119 ix86_expand_vector_init_concat (hmode, second [j],
28123 ix86_expand_vector_init_concat (mode, target, second, n);
28126 ix86_expand_vector_init_concat (mode, target, first, n);
28130 gcc_unreachable ();
28134 /* A subroutine of ix86_expand_vector_init_general. Use vector
28135 interleave to handle the most general case: all values variable,
28136 and none identical. */
28139 ix86_expand_vector_init_interleave (enum machine_mode mode,
28140 rtx target, rtx *ops, int n)
28142 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28145 rtx (*gen_load_even) (rtx, rtx, rtx);
28146 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28147 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28152 gen_load_even = gen_vec_setv8hi;
28153 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28154 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28155 inner_mode = HImode;
28156 first_imode = V4SImode;
28157 second_imode = V2DImode;
28158 third_imode = VOIDmode;
28161 gen_load_even = gen_vec_setv16qi;
28162 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28163 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28164 inner_mode = QImode;
28165 first_imode = V8HImode;
28166 second_imode = V4SImode;
28167 third_imode = V2DImode;
28170 gcc_unreachable ();
28173 for (i = 0; i < n; i++)
28175 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28176 op0 = gen_reg_rtx (SImode);
28177 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28179 /* Insert the SImode value as low element of V4SImode vector. */
28180 op1 = gen_reg_rtx (V4SImode);
28181 op0 = gen_rtx_VEC_MERGE (V4SImode,
28182 gen_rtx_VEC_DUPLICATE (V4SImode,
28184 CONST0_RTX (V4SImode),
28186 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28188 /* Cast the V4SImode vector back to a vector in orignal mode. */
28189 op0 = gen_reg_rtx (mode);
28190 emit_move_insn (op0, gen_lowpart (mode, op1));
28192 /* Load even elements into the second positon. */
28193 emit_insn ((*gen_load_even) (op0,
28194 force_reg (inner_mode,
28198 /* Cast vector to FIRST_IMODE vector. */
28199 ops[i] = gen_reg_rtx (first_imode);
28200 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28203 /* Interleave low FIRST_IMODE vectors. */
28204 for (i = j = 0; i < n; i += 2, j++)
28206 op0 = gen_reg_rtx (first_imode);
28207 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28209 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28210 ops[j] = gen_reg_rtx (second_imode);
28211 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28214 /* Interleave low SECOND_IMODE vectors. */
28215 switch (second_imode)
28218 for (i = j = 0; i < n / 2; i += 2, j++)
28220 op0 = gen_reg_rtx (second_imode);
28221 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28224 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28226 ops[j] = gen_reg_rtx (third_imode);
28227 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28229 second_imode = V2DImode;
28230 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28234 op0 = gen_reg_rtx (second_imode);
28235 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28238 /* Cast the SECOND_IMODE vector back to a vector on original
28240 emit_insn (gen_rtx_SET (VOIDmode, target,
28241 gen_lowpart (mode, op0)));
28245 gcc_unreachable ();
28249 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28250 all values variable, and none identical. */
28253 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28254 rtx target, rtx vals)
28256 rtx ops[32], op0, op1;
28257 enum machine_mode half_mode = VOIDmode;
28264 if (!mmx_ok && !TARGET_SSE)
28276 n = GET_MODE_NUNITS (mode);
28277 for (i = 0; i < n; i++)
28278 ops[i] = XVECEXP (vals, 0, i);
28279 ix86_expand_vector_init_concat (mode, target, ops, n);
28283 half_mode = V16QImode;
28287 half_mode = V8HImode;
28291 n = GET_MODE_NUNITS (mode);
28292 for (i = 0; i < n; i++)
28293 ops[i] = XVECEXP (vals, 0, i);
28294 op0 = gen_reg_rtx (half_mode);
28295 op1 = gen_reg_rtx (half_mode);
28296 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28298 ix86_expand_vector_init_interleave (half_mode, op1,
28299 &ops [n >> 1], n >> 2);
28300 emit_insn (gen_rtx_SET (VOIDmode, target,
28301 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28305 if (!TARGET_SSE4_1)
28313 /* Don't use ix86_expand_vector_init_interleave if we can't
28314 move from GPR to SSE register directly. */
28315 if (!TARGET_INTER_UNIT_MOVES)
28318 n = GET_MODE_NUNITS (mode);
28319 for (i = 0; i < n; i++)
28320 ops[i] = XVECEXP (vals, 0, i);
28321 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28329 gcc_unreachable ();
28333 int i, j, n_elts, n_words, n_elt_per_word;
28334 enum machine_mode inner_mode;
28335 rtx words[4], shift;
28337 inner_mode = GET_MODE_INNER (mode);
28338 n_elts = GET_MODE_NUNITS (mode);
28339 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28340 n_elt_per_word = n_elts / n_words;
28341 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28343 for (i = 0; i < n_words; ++i)
28345 rtx word = NULL_RTX;
28347 for (j = 0; j < n_elt_per_word; ++j)
28349 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28350 elt = convert_modes (word_mode, inner_mode, elt, true);
28356 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28357 word, 1, OPTAB_LIB_WIDEN);
28358 word = expand_simple_binop (word_mode, IOR, word, elt,
28359 word, 1, OPTAB_LIB_WIDEN);
28367 emit_move_insn (target, gen_lowpart (mode, words[0]));
28368 else if (n_words == 2)
28370 rtx tmp = gen_reg_rtx (mode);
28371 emit_clobber (tmp);
28372 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28373 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28374 emit_move_insn (target, tmp);
28376 else if (n_words == 4)
28378 rtx tmp = gen_reg_rtx (V4SImode);
28379 gcc_assert (word_mode == SImode);
28380 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28381 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28382 emit_move_insn (target, gen_lowpart (mode, tmp));
28385 gcc_unreachable ();
28389 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28390 instructions unless MMX_OK is true. */
28393 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28395 enum machine_mode mode = GET_MODE (target);
28396 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28397 int n_elts = GET_MODE_NUNITS (mode);
28398 int n_var = 0, one_var = -1;
28399 bool all_same = true, all_const_zero = true;
28403 for (i = 0; i < n_elts; ++i)
28405 x = XVECEXP (vals, 0, i);
28406 if (!(CONST_INT_P (x)
28407 || GET_CODE (x) == CONST_DOUBLE
28408 || GET_CODE (x) == CONST_FIXED))
28409 n_var++, one_var = i;
28410 else if (x != CONST0_RTX (inner_mode))
28411 all_const_zero = false;
28412 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28416 /* Constants are best loaded from the constant pool. */
28419 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28423 /* If all values are identical, broadcast the value. */
28425 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28426 XVECEXP (vals, 0, 0)))
28429 /* Values where only one field is non-constant are best loaded from
28430 the pool and overwritten via move later. */
28434 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28435 XVECEXP (vals, 0, one_var),
28439 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28443 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28447 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28449 enum machine_mode mode = GET_MODE (target);
28450 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28451 enum machine_mode half_mode;
28452 bool use_vec_merge = false;
28454 static rtx (*gen_extract[6][2]) (rtx, rtx)
28456 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28457 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28458 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28459 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28460 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28461 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28463 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28465 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28466 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28467 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28468 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28469 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28470 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28480 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28481 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28483 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28485 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28486 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28492 use_vec_merge = TARGET_SSE4_1;
28500 /* For the two element vectors, we implement a VEC_CONCAT with
28501 the extraction of the other element. */
28503 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28504 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28507 op0 = val, op1 = tmp;
28509 op0 = tmp, op1 = val;
28511 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28512 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28517 use_vec_merge = TARGET_SSE4_1;
28524 use_vec_merge = true;
28528 /* tmp = target = A B C D */
28529 tmp = copy_to_reg (target);
28530 /* target = A A B B */
28531 emit_insn (gen_sse_unpcklps (target, target, target));
28532 /* target = X A B B */
28533 ix86_expand_vector_set (false, target, val, 0);
28534 /* target = A X C D */
28535 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28536 GEN_INT (1), GEN_INT (0),
28537 GEN_INT (2+4), GEN_INT (3+4)));
28541 /* tmp = target = A B C D */
28542 tmp = copy_to_reg (target);
28543 /* tmp = X B C D */
28544 ix86_expand_vector_set (false, tmp, val, 0);
28545 /* target = A B X D */
28546 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28547 GEN_INT (0), GEN_INT (1),
28548 GEN_INT (0+4), GEN_INT (3+4)));
28552 /* tmp = target = A B C D */
28553 tmp = copy_to_reg (target);
28554 /* tmp = X B C D */
28555 ix86_expand_vector_set (false, tmp, val, 0);
28556 /* target = A B X D */
28557 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28558 GEN_INT (0), GEN_INT (1),
28559 GEN_INT (2+4), GEN_INT (0+4)));
28563 gcc_unreachable ();
28568 use_vec_merge = TARGET_SSE4_1;
28572 /* Element 0 handled by vec_merge below. */
28575 use_vec_merge = true;
28581 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28582 store into element 0, then shuffle them back. */
28586 order[0] = GEN_INT (elt);
28587 order[1] = const1_rtx;
28588 order[2] = const2_rtx;
28589 order[3] = GEN_INT (3);
28590 order[elt] = const0_rtx;
28592 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28593 order[1], order[2], order[3]));
28595 ix86_expand_vector_set (false, target, val, 0);
28597 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28598 order[1], order[2], order[3]));
28602 /* For SSE1, we have to reuse the V4SF code. */
28603 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28604 gen_lowpart (SFmode, val), elt);
28609 use_vec_merge = TARGET_SSE2;
28612 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28616 use_vec_merge = TARGET_SSE4_1;
28623 half_mode = V16QImode;
28629 half_mode = V8HImode;
28635 half_mode = V4SImode;
28641 half_mode = V2DImode;
28647 half_mode = V4SFmode;
28653 half_mode = V2DFmode;
28659 /* Compute offset. */
28663 gcc_assert (i <= 1);
28665 /* Extract the half. */
28666 tmp = gen_reg_rtx (half_mode);
28667 emit_insn ((*gen_extract[j][i]) (tmp, target));
28669 /* Put val in tmp at elt. */
28670 ix86_expand_vector_set (false, tmp, val, elt);
28673 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28682 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28683 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28684 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28688 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28690 emit_move_insn (mem, target);
28692 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28693 emit_move_insn (tmp, val);
28695 emit_move_insn (target, mem);
28700 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28702 enum machine_mode mode = GET_MODE (vec);
28703 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28704 bool use_vec_extr = false;
28717 use_vec_extr = true;
28721 use_vec_extr = TARGET_SSE4_1;
28733 tmp = gen_reg_rtx (mode);
28734 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28735 GEN_INT (elt), GEN_INT (elt),
28736 GEN_INT (elt+4), GEN_INT (elt+4)));
28740 tmp = gen_reg_rtx (mode);
28741 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28745 gcc_unreachable ();
28748 use_vec_extr = true;
28753 use_vec_extr = TARGET_SSE4_1;
28767 tmp = gen_reg_rtx (mode);
28768 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28769 GEN_INT (elt), GEN_INT (elt),
28770 GEN_INT (elt), GEN_INT (elt)));
28774 tmp = gen_reg_rtx (mode);
28775 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28779 gcc_unreachable ();
28782 use_vec_extr = true;
28787 /* For SSE1, we have to reuse the V4SF code. */
28788 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28789 gen_lowpart (V4SFmode, vec), elt);
28795 use_vec_extr = TARGET_SSE2;
28798 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28802 use_vec_extr = TARGET_SSE4_1;
28806 /* ??? Could extract the appropriate HImode element and shift. */
28813 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28814 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28816 /* Let the rtl optimizers know about the zero extension performed. */
28817 if (inner_mode == QImode || inner_mode == HImode)
28819 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28820 target = gen_lowpart (SImode, target);
28823 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28827 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28829 emit_move_insn (mem, vec);
28831 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28832 emit_move_insn (target, tmp);
28836 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28837 pattern to reduce; DEST is the destination; IN is the input vector. */
28840 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28842 rtx tmp1, tmp2, tmp3;
28844 tmp1 = gen_reg_rtx (V4SFmode);
28845 tmp2 = gen_reg_rtx (V4SFmode);
28846 tmp3 = gen_reg_rtx (V4SFmode);
28848 emit_insn (gen_sse_movhlps (tmp1, in, in));
28849 emit_insn (fn (tmp2, tmp1, in));
28851 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28852 GEN_INT (1), GEN_INT (1),
28853 GEN_INT (1+4), GEN_INT (1+4)));
28854 emit_insn (fn (dest, tmp2, tmp3));
28857 /* Target hook for scalar_mode_supported_p. */
28859 ix86_scalar_mode_supported_p (enum machine_mode mode)
28861 if (DECIMAL_FLOAT_MODE_P (mode))
28863 else if (mode == TFmode)
28866 return default_scalar_mode_supported_p (mode);
28869 /* Implements target hook vector_mode_supported_p. */
28871 ix86_vector_mode_supported_p (enum machine_mode mode)
28873 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28875 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28877 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28879 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28881 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28886 /* Target hook for c_mode_for_suffix. */
28887 static enum machine_mode
28888 ix86_c_mode_for_suffix (char suffix)
28898 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28900 We do this in the new i386 backend to maintain source compatibility
28901 with the old cc0-based compiler. */
28904 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28905 tree inputs ATTRIBUTE_UNUSED,
28908 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28910 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28915 /* Implements target vector targetm.asm.encode_section_info. This
28916 is not used by netware. */
28918 static void ATTRIBUTE_UNUSED
28919 ix86_encode_section_info (tree decl, rtx rtl, int first)
28921 default_encode_section_info (decl, rtl, first);
28923 if (TREE_CODE (decl) == VAR_DECL
28924 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28925 && ix86_in_large_data_p (decl))
28926 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28929 /* Worker function for REVERSE_CONDITION. */
28932 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28934 return (mode != CCFPmode && mode != CCFPUmode
28935 ? reverse_condition (code)
28936 : reverse_condition_maybe_unordered (code));
28939 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28943 output_387_reg_move (rtx insn, rtx *operands)
28945 if (REG_P (operands[0]))
28947 if (REG_P (operands[1])
28948 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28950 if (REGNO (operands[0]) == FIRST_STACK_REG)
28951 return output_387_ffreep (operands, 0);
28952 return "fstp\t%y0";
28954 if (STACK_TOP_P (operands[0]))
28955 return "fld%Z1\t%y1";
28958 else if (MEM_P (operands[0]))
28960 gcc_assert (REG_P (operands[1]));
28961 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28962 return "fstp%Z0\t%y0";
28965 /* There is no non-popping store to memory for XFmode.
28966 So if we need one, follow the store with a load. */
28967 if (GET_MODE (operands[0]) == XFmode)
28968 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
28970 return "fst%Z0\t%y0";
28977 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28978 FP status register is set. */
28981 ix86_emit_fp_unordered_jump (rtx label)
28983 rtx reg = gen_reg_rtx (HImode);
28986 emit_insn (gen_x86_fnstsw_1 (reg));
28988 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28990 emit_insn (gen_x86_sahf_1 (reg));
28992 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28993 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28997 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28999 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29000 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29003 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29004 gen_rtx_LABEL_REF (VOIDmode, label),
29006 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29008 emit_jump_insn (temp);
29009 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29012 /* Output code to perform a log1p XFmode calculation. */
29014 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29016 rtx label1 = gen_label_rtx ();
29017 rtx label2 = gen_label_rtx ();
29019 rtx tmp = gen_reg_rtx (XFmode);
29020 rtx tmp2 = gen_reg_rtx (XFmode);
29023 emit_insn (gen_absxf2 (tmp, op1));
29024 test = gen_rtx_GE (VOIDmode, tmp,
29025 CONST_DOUBLE_FROM_REAL_VALUE (
29026 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29028 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29030 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29031 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29032 emit_jump (label2);
29034 emit_label (label1);
29035 emit_move_insn (tmp, CONST1_RTX (XFmode));
29036 emit_insn (gen_addxf3 (tmp, op1, tmp));
29037 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29038 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29040 emit_label (label2);
29043 /* Output code to perform a Newton-Rhapson approximation of a single precision
29044 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29046 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29048 rtx x0, x1, e0, e1, two;
29050 x0 = gen_reg_rtx (mode);
29051 e0 = gen_reg_rtx (mode);
29052 e1 = gen_reg_rtx (mode);
29053 x1 = gen_reg_rtx (mode);
29055 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29057 if (VECTOR_MODE_P (mode))
29058 two = ix86_build_const_vector (SFmode, true, two);
29060 two = force_reg (mode, two);
29062 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29064 /* x0 = rcp(b) estimate */
29065 emit_insn (gen_rtx_SET (VOIDmode, x0,
29066 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29069 emit_insn (gen_rtx_SET (VOIDmode, e0,
29070 gen_rtx_MULT (mode, x0, b)));
29072 emit_insn (gen_rtx_SET (VOIDmode, e1,
29073 gen_rtx_MINUS (mode, two, e0)));
29075 emit_insn (gen_rtx_SET (VOIDmode, x1,
29076 gen_rtx_MULT (mode, x0, e1)));
29078 emit_insn (gen_rtx_SET (VOIDmode, res,
29079 gen_rtx_MULT (mode, a, x1)));
29082 /* Output code to perform a Newton-Rhapson approximation of a
29083 single precision floating point [reciprocal] square root. */
29085 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29088 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29091 x0 = gen_reg_rtx (mode);
29092 e0 = gen_reg_rtx (mode);
29093 e1 = gen_reg_rtx (mode);
29094 e2 = gen_reg_rtx (mode);
29095 e3 = gen_reg_rtx (mode);
29097 real_from_integer (&r, VOIDmode, -3, -1, 0);
29098 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29100 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29101 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29103 if (VECTOR_MODE_P (mode))
29105 mthree = ix86_build_const_vector (SFmode, true, mthree);
29106 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29109 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29110 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29112 /* x0 = rsqrt(a) estimate */
29113 emit_insn (gen_rtx_SET (VOIDmode, x0,
29114 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29117 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29122 zero = gen_reg_rtx (mode);
29123 mask = gen_reg_rtx (mode);
29125 zero = force_reg (mode, CONST0_RTX(mode));
29126 emit_insn (gen_rtx_SET (VOIDmode, mask,
29127 gen_rtx_NE (mode, zero, a)));
29129 emit_insn (gen_rtx_SET (VOIDmode, x0,
29130 gen_rtx_AND (mode, x0, mask)));
29134 emit_insn (gen_rtx_SET (VOIDmode, e0,
29135 gen_rtx_MULT (mode, x0, a)));
29137 emit_insn (gen_rtx_SET (VOIDmode, e1,
29138 gen_rtx_MULT (mode, e0, x0)));
29141 mthree = force_reg (mode, mthree);
29142 emit_insn (gen_rtx_SET (VOIDmode, e2,
29143 gen_rtx_PLUS (mode, e1, mthree)));
29145 mhalf = force_reg (mode, mhalf);
29147 /* e3 = -.5 * x0 */
29148 emit_insn (gen_rtx_SET (VOIDmode, e3,
29149 gen_rtx_MULT (mode, x0, mhalf)));
29151 /* e3 = -.5 * e0 */
29152 emit_insn (gen_rtx_SET (VOIDmode, e3,
29153 gen_rtx_MULT (mode, e0, mhalf)));
29154 /* ret = e2 * e3 */
29155 emit_insn (gen_rtx_SET (VOIDmode, res,
29156 gen_rtx_MULT (mode, e2, e3)));
29159 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29161 static void ATTRIBUTE_UNUSED
29162 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29165 /* With Binutils 2.15, the "@unwind" marker must be specified on
29166 every occurrence of the ".eh_frame" section, not just the first
29169 && strcmp (name, ".eh_frame") == 0)
29171 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29172 flags & SECTION_WRITE ? "aw" : "a");
29175 default_elf_asm_named_section (name, flags, decl);
29178 /* Return the mangling of TYPE if it is an extended fundamental type. */
29180 static const char *
29181 ix86_mangle_type (const_tree type)
29183 type = TYPE_MAIN_VARIANT (type);
29185 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29186 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29189 switch (TYPE_MODE (type))
29192 /* __float128 is "g". */
29195 /* "long double" or __float80 is "e". */
29202 /* For 32-bit code we can save PIC register setup by using
29203 __stack_chk_fail_local hidden function instead of calling
29204 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29205 register, so it is better to call __stack_chk_fail directly. */
29208 ix86_stack_protect_fail (void)
29210 return TARGET_64BIT
29211 ? default_external_stack_protect_fail ()
29212 : default_hidden_stack_protect_fail ();
29215 /* Select a format to encode pointers in exception handling data. CODE
29216 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29217 true if the symbol may be affected by dynamic relocations.
29219 ??? All x86 object file formats are capable of representing this.
29220 After all, the relocation needed is the same as for the call insn.
29221 Whether or not a particular assembler allows us to enter such, I
29222 guess we'll have to see. */
29224 asm_preferred_eh_data_format (int code, int global)
29228 int type = DW_EH_PE_sdata8;
29230 || ix86_cmodel == CM_SMALL_PIC
29231 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29232 type = DW_EH_PE_sdata4;
29233 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29235 if (ix86_cmodel == CM_SMALL
29236 || (ix86_cmodel == CM_MEDIUM && code))
29237 return DW_EH_PE_udata4;
29238 return DW_EH_PE_absptr;
29241 /* Expand copysign from SIGN to the positive value ABS_VALUE
29242 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29245 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29247 enum machine_mode mode = GET_MODE (sign);
29248 rtx sgn = gen_reg_rtx (mode);
29249 if (mask == NULL_RTX)
29251 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29252 if (!VECTOR_MODE_P (mode))
29254 /* We need to generate a scalar mode mask in this case. */
29255 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29256 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29257 mask = gen_reg_rtx (mode);
29258 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29262 mask = gen_rtx_NOT (mode, mask);
29263 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29264 gen_rtx_AND (mode, mask, sign)));
29265 emit_insn (gen_rtx_SET (VOIDmode, result,
29266 gen_rtx_IOR (mode, abs_value, sgn)));
29269 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29270 mask for masking out the sign-bit is stored in *SMASK, if that is
29273 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29275 enum machine_mode mode = GET_MODE (op0);
29278 xa = gen_reg_rtx (mode);
29279 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29280 if (!VECTOR_MODE_P (mode))
29282 /* We need to generate a scalar mode mask in this case. */
29283 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29284 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29285 mask = gen_reg_rtx (mode);
29286 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29288 emit_insn (gen_rtx_SET (VOIDmode, xa,
29289 gen_rtx_AND (mode, op0, mask)));
29297 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29298 swapping the operands if SWAP_OPERANDS is true. The expanded
29299 code is a forward jump to a newly created label in case the
29300 comparison is true. The generated label rtx is returned. */
29302 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29303 bool swap_operands)
29314 label = gen_label_rtx ();
29315 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29316 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29317 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29318 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29319 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29320 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29321 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29322 JUMP_LABEL (tmp) = label;
29327 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29328 using comparison code CODE. Operands are swapped for the comparison if
29329 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29331 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29332 bool swap_operands)
29334 enum machine_mode mode = GET_MODE (op0);
29335 rtx mask = gen_reg_rtx (mode);
29344 if (mode == DFmode)
29345 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29346 gen_rtx_fmt_ee (code, mode, op0, op1)));
29348 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29349 gen_rtx_fmt_ee (code, mode, op0, op1)));
29354 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29355 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29357 ix86_gen_TWO52 (enum machine_mode mode)
29359 REAL_VALUE_TYPE TWO52r;
29362 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29363 TWO52 = const_double_from_real_value (TWO52r, mode);
29364 TWO52 = force_reg (mode, TWO52);
29369 /* Expand SSE sequence for computing lround from OP1 storing
29372 ix86_expand_lround (rtx op0, rtx op1)
29374 /* C code for the stuff we're doing below:
29375 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29378 enum machine_mode mode = GET_MODE (op1);
29379 const struct real_format *fmt;
29380 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29383 /* load nextafter (0.5, 0.0) */
29384 fmt = REAL_MODE_FORMAT (mode);
29385 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29386 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29388 /* adj = copysign (0.5, op1) */
29389 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29390 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29392 /* adj = op1 + adj */
29393 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29395 /* op0 = (imode)adj */
29396 expand_fix (op0, adj, 0);
29399 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29402 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29404 /* C code for the stuff we're doing below (for do_floor):
29406 xi -= (double)xi > op1 ? 1 : 0;
29409 enum machine_mode fmode = GET_MODE (op1);
29410 enum machine_mode imode = GET_MODE (op0);
29411 rtx ireg, freg, label, tmp;
29413 /* reg = (long)op1 */
29414 ireg = gen_reg_rtx (imode);
29415 expand_fix (ireg, op1, 0);
29417 /* freg = (double)reg */
29418 freg = gen_reg_rtx (fmode);
29419 expand_float (freg, ireg, 0);
29421 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29422 label = ix86_expand_sse_compare_and_jump (UNLE,
29423 freg, op1, !do_floor);
29424 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29425 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29426 emit_move_insn (ireg, tmp);
29428 emit_label (label);
29429 LABEL_NUSES (label) = 1;
29431 emit_move_insn (op0, ireg);
29434 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29435 result in OPERAND0. */
29437 ix86_expand_rint (rtx operand0, rtx operand1)
29439 /* C code for the stuff we're doing below:
29440 xa = fabs (operand1);
29441 if (!isless (xa, 2**52))
29443 xa = xa + 2**52 - 2**52;
29444 return copysign (xa, operand1);
29446 enum machine_mode mode = GET_MODE (operand0);
29447 rtx res, xa, label, TWO52, mask;
29449 res = gen_reg_rtx (mode);
29450 emit_move_insn (res, operand1);
29452 /* xa = abs (operand1) */
29453 xa = ix86_expand_sse_fabs (res, &mask);
29455 /* if (!isless (xa, TWO52)) goto label; */
29456 TWO52 = ix86_gen_TWO52 (mode);
29457 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29459 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29460 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29462 ix86_sse_copysign_to_positive (res, xa, res, mask);
29464 emit_label (label);
29465 LABEL_NUSES (label) = 1;
29467 emit_move_insn (operand0, res);
29470 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29473 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29475 /* C code for the stuff we expand below.
29476 double xa = fabs (x), x2;
29477 if (!isless (xa, TWO52))
29479 xa = xa + TWO52 - TWO52;
29480 x2 = copysign (xa, x);
29489 enum machine_mode mode = GET_MODE (operand0);
29490 rtx xa, TWO52, tmp, label, one, res, mask;
29492 TWO52 = ix86_gen_TWO52 (mode);
29494 /* Temporary for holding the result, initialized to the input
29495 operand to ease control flow. */
29496 res = gen_reg_rtx (mode);
29497 emit_move_insn (res, operand1);
29499 /* xa = abs (operand1) */
29500 xa = ix86_expand_sse_fabs (res, &mask);
29502 /* if (!isless (xa, TWO52)) goto label; */
29503 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29505 /* xa = xa + TWO52 - TWO52; */
29506 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29507 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29509 /* xa = copysign (xa, operand1) */
29510 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29512 /* generate 1.0 or -1.0 */
29513 one = force_reg (mode,
29514 const_double_from_real_value (do_floor
29515 ? dconst1 : dconstm1, mode));
29517 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29518 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29519 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29520 gen_rtx_AND (mode, one, tmp)));
29521 /* We always need to subtract here to preserve signed zero. */
29522 tmp = expand_simple_binop (mode, MINUS,
29523 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29524 emit_move_insn (res, tmp);
29526 emit_label (label);
29527 LABEL_NUSES (label) = 1;
29529 emit_move_insn (operand0, res);
29532 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29535 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29537 /* C code for the stuff we expand below.
29538 double xa = fabs (x), x2;
29539 if (!isless (xa, TWO52))
29541 x2 = (double)(long)x;
29548 if (HONOR_SIGNED_ZEROS (mode))
29549 return copysign (x2, x);
29552 enum machine_mode mode = GET_MODE (operand0);
29553 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29555 TWO52 = ix86_gen_TWO52 (mode);
29557 /* Temporary for holding the result, initialized to the input
29558 operand to ease control flow. */
29559 res = gen_reg_rtx (mode);
29560 emit_move_insn (res, operand1);
29562 /* xa = abs (operand1) */
29563 xa = ix86_expand_sse_fabs (res, &mask);
29565 /* if (!isless (xa, TWO52)) goto label; */
29566 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29568 /* xa = (double)(long)x */
29569 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29570 expand_fix (xi, res, 0);
29571 expand_float (xa, xi, 0);
29574 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29576 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29577 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29578 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29579 gen_rtx_AND (mode, one, tmp)));
29580 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29581 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29582 emit_move_insn (res, tmp);
29584 if (HONOR_SIGNED_ZEROS (mode))
29585 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29587 emit_label (label);
29588 LABEL_NUSES (label) = 1;
29590 emit_move_insn (operand0, res);
29593 /* Expand SSE sequence for computing round from OPERAND1 storing
29594 into OPERAND0. Sequence that works without relying on DImode truncation
29595 via cvttsd2siq that is only available on 64bit targets. */
29597 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29599 /* C code for the stuff we expand below.
29600 double xa = fabs (x), xa2, x2;
29601 if (!isless (xa, TWO52))
29603 Using the absolute value and copying back sign makes
29604 -0.0 -> -0.0 correct.
29605 xa2 = xa + TWO52 - TWO52;
29610 else if (dxa > 0.5)
29612 x2 = copysign (xa2, x);
29615 enum machine_mode mode = GET_MODE (operand0);
29616 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29618 TWO52 = ix86_gen_TWO52 (mode);
29620 /* Temporary for holding the result, initialized to the input
29621 operand to ease control flow. */
29622 res = gen_reg_rtx (mode);
29623 emit_move_insn (res, operand1);
29625 /* xa = abs (operand1) */
29626 xa = ix86_expand_sse_fabs (res, &mask);
29628 /* if (!isless (xa, TWO52)) goto label; */
29629 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29631 /* xa2 = xa + TWO52 - TWO52; */
29632 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29633 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29635 /* dxa = xa2 - xa; */
29636 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29638 /* generate 0.5, 1.0 and -0.5 */
29639 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29640 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29641 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29645 tmp = gen_reg_rtx (mode);
29646 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29647 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29648 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29649 gen_rtx_AND (mode, one, tmp)));
29650 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29651 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29652 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29653 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29654 gen_rtx_AND (mode, one, tmp)));
29655 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29657 /* res = copysign (xa2, operand1) */
29658 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29660 emit_label (label);
29661 LABEL_NUSES (label) = 1;
29663 emit_move_insn (operand0, res);
29666 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29669 ix86_expand_trunc (rtx operand0, rtx operand1)
29671 /* C code for SSE variant we expand below.
29672 double xa = fabs (x), x2;
29673 if (!isless (xa, TWO52))
29675 x2 = (double)(long)x;
29676 if (HONOR_SIGNED_ZEROS (mode))
29677 return copysign (x2, x);
29680 enum machine_mode mode = GET_MODE (operand0);
29681 rtx xa, xi, TWO52, label, res, mask;
29683 TWO52 = ix86_gen_TWO52 (mode);
29685 /* Temporary for holding the result, initialized to the input
29686 operand to ease control flow. */
29687 res = gen_reg_rtx (mode);
29688 emit_move_insn (res, operand1);
29690 /* xa = abs (operand1) */
29691 xa = ix86_expand_sse_fabs (res, &mask);
29693 /* if (!isless (xa, TWO52)) goto label; */
29694 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29696 /* x = (double)(long)x */
29697 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29698 expand_fix (xi, res, 0);
29699 expand_float (res, xi, 0);
29701 if (HONOR_SIGNED_ZEROS (mode))
29702 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29704 emit_label (label);
29705 LABEL_NUSES (label) = 1;
29707 emit_move_insn (operand0, res);
29710 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29713 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29715 enum machine_mode mode = GET_MODE (operand0);
29716 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29718 /* C code for SSE variant we expand below.
29719 double xa = fabs (x), x2;
29720 if (!isless (xa, TWO52))
29722 xa2 = xa + TWO52 - TWO52;
29726 x2 = copysign (xa2, x);
29730 TWO52 = ix86_gen_TWO52 (mode);
29732 /* Temporary for holding the result, initialized to the input
29733 operand to ease control flow. */
29734 res = gen_reg_rtx (mode);
29735 emit_move_insn (res, operand1);
29737 /* xa = abs (operand1) */
29738 xa = ix86_expand_sse_fabs (res, &smask);
29740 /* if (!isless (xa, TWO52)) goto label; */
29741 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29743 /* res = xa + TWO52 - TWO52; */
29744 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29745 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29746 emit_move_insn (res, tmp);
29749 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29751 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29752 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29753 emit_insn (gen_rtx_SET (VOIDmode, mask,
29754 gen_rtx_AND (mode, mask, one)));
29755 tmp = expand_simple_binop (mode, MINUS,
29756 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29757 emit_move_insn (res, tmp);
29759 /* res = copysign (res, operand1) */
29760 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29762 emit_label (label);
29763 LABEL_NUSES (label) = 1;
29765 emit_move_insn (operand0, res);
29768 /* Expand SSE sequence for computing round from OPERAND1 storing
29771 ix86_expand_round (rtx operand0, rtx operand1)
29773 /* C code for the stuff we're doing below:
29774 double xa = fabs (x);
29775 if (!isless (xa, TWO52))
29777 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29778 return copysign (xa, x);
29780 enum machine_mode mode = GET_MODE (operand0);
29781 rtx res, TWO52, xa, label, xi, half, mask;
29782 const struct real_format *fmt;
29783 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29785 /* Temporary for holding the result, initialized to the input
29786 operand to ease control flow. */
29787 res = gen_reg_rtx (mode);
29788 emit_move_insn (res, operand1);
29790 TWO52 = ix86_gen_TWO52 (mode);
29791 xa = ix86_expand_sse_fabs (res, &mask);
29792 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29794 /* load nextafter (0.5, 0.0) */
29795 fmt = REAL_MODE_FORMAT (mode);
29796 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29797 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29799 /* xa = xa + 0.5 */
29800 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29801 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29803 /* xa = (double)(int64_t)xa */
29804 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29805 expand_fix (xi, xa, 0);
29806 expand_float (xa, xi, 0);
29808 /* res = copysign (xa, operand1) */
29809 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29811 emit_label (label);
29812 LABEL_NUSES (label) = 1;
29814 emit_move_insn (operand0, res);
29818 /* Validate whether a SSE5 instruction is valid or not.
29819 OPERANDS is the array of operands.
29820 NUM is the number of operands.
29821 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29822 NUM_MEMORY is the maximum number of memory operands to accept.
29823 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29826 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29827 bool uses_oc0, int num_memory, bool commutative)
29833 /* Count the number of memory arguments */
29836 for (i = 0; i < num; i++)
29838 enum machine_mode mode = GET_MODE (operands[i]);
29839 if (register_operand (operands[i], mode))
29842 else if (memory_operand (operands[i], mode))
29844 mem_mask |= (1 << i);
29850 rtx pattern = PATTERN (insn);
29852 /* allow 0 for pcmov */
29853 if (GET_CODE (pattern) != SET
29854 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29856 || operands[i] != CONST0_RTX (mode))
29861 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29862 a memory operation. */
29863 if (num_memory < 0)
29865 num_memory = -num_memory;
29866 if ((mem_mask & (1 << (num-1))) != 0)
29868 mem_mask &= ~(1 << (num-1));
29873 /* If there were no memory operations, allow the insn */
29877 /* Do not allow the destination register to be a memory operand. */
29878 else if (mem_mask & (1 << 0))
29881 /* If there are too many memory operations, disallow the instruction. While
29882 the hardware only allows 1 memory reference, before register allocation
29883 for some insns, we allow two memory operations sometimes in order to allow
29884 code like the following to be optimized:
29886 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29888 or similar cases that are vectorized into using the fmaddss
29890 else if (mem_count > num_memory)
29893 /* Don't allow more than one memory operation if not optimizing. */
29894 else if (mem_count > 1 && !optimize)
29897 else if (num == 4 && mem_count == 1)
29899 /* formats (destination is the first argument), example fmaddss:
29900 xmm1, xmm1, xmm2, xmm3/mem
29901 xmm1, xmm1, xmm2/mem, xmm3
29902 xmm1, xmm2, xmm3/mem, xmm1
29903 xmm1, xmm2/mem, xmm3, xmm1 */
29905 return ((mem_mask == (1 << 1))
29906 || (mem_mask == (1 << 2))
29907 || (mem_mask == (1 << 3)));
29909 /* format, example pmacsdd:
29910 xmm1, xmm2, xmm3/mem, xmm1 */
29912 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29914 return (mem_mask == (1 << 2));
29917 else if (num == 4 && num_memory == 2)
29919 /* If there are two memory operations, we can load one of the memory ops
29920 into the destination register. This is for optimizing the
29921 multiply/add ops, which the combiner has optimized both the multiply
29922 and the add insns to have a memory operation. We have to be careful
29923 that the destination doesn't overlap with the inputs. */
29924 rtx op0 = operands[0];
29926 if (reg_mentioned_p (op0, operands[1])
29927 || reg_mentioned_p (op0, operands[2])
29928 || reg_mentioned_p (op0, operands[3]))
29931 /* formats (destination is the first argument), example fmaddss:
29932 xmm1, xmm1, xmm2, xmm3/mem
29933 xmm1, xmm1, xmm2/mem, xmm3
29934 xmm1, xmm2, xmm3/mem, xmm1
29935 xmm1, xmm2/mem, xmm3, xmm1
29937 For the oc0 case, we will load either operands[1] or operands[3] into
29938 operands[0], so any combination of 2 memory operands is ok. */
29942 /* format, example pmacsdd:
29943 xmm1, xmm2, xmm3/mem, xmm1
29945 For the integer multiply/add instructions be more restrictive and
29946 require operands[2] and operands[3] to be the memory operands. */
29948 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29950 return (mem_mask == ((1 << 2) | (1 << 3)));
29953 else if (num == 3 && num_memory == 1)
29955 /* formats, example protb:
29956 xmm1, xmm2, xmm3/mem
29957 xmm1, xmm2/mem, xmm3 */
29959 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29961 /* format, example comeq:
29962 xmm1, xmm2, xmm3/mem */
29964 return (mem_mask == (1 << 2));
29968 gcc_unreachable ();
29974 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29975 hardware will allow by using the destination register to load one of the
29976 memory operations. Presently this is used by the multiply/add routines to
29977 allow 2 memory references. */
29980 ix86_expand_sse5_multiple_memory (rtx operands[],
29982 enum machine_mode mode)
29984 rtx op0 = operands[0];
29986 || memory_operand (op0, mode)
29987 || reg_mentioned_p (op0, operands[1])
29988 || reg_mentioned_p (op0, operands[2])
29989 || reg_mentioned_p (op0, operands[3]))
29990 gcc_unreachable ();
29992 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29993 the destination register. */
29994 if (memory_operand (operands[1], mode))
29996 emit_move_insn (op0, operands[1]);
29999 else if (memory_operand (operands[3], mode))
30001 emit_move_insn (op0, operands[3]);
30005 gcc_unreachable ();
30011 /* Table of valid machine attributes. */
30012 static const struct attribute_spec ix86_attribute_table[] =
30014 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30015 /* Stdcall attribute says callee is responsible for popping arguments
30016 if they are not variable. */
30017 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30018 /* Fastcall attribute says callee is responsible for popping arguments
30019 if they are not variable. */
30020 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30021 /* Cdecl attribute says the callee is a normal C declaration */
30022 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30023 /* Regparm attribute specifies how many integer arguments are to be
30024 passed in registers. */
30025 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30026 /* Sseregparm attribute says we are using x86_64 calling conventions
30027 for FP arguments. */
30028 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30029 /* force_align_arg_pointer says this function realigns the stack at entry. */
30030 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30031 false, true, true, ix86_handle_cconv_attribute },
30032 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30033 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30034 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30035 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30037 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30038 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30039 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30040 SUBTARGET_ATTRIBUTE_TABLE,
30042 /* ms_abi and sysv_abi calling convention function attributes. */
30043 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30044 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30046 { NULL, 0, 0, false, false, false, NULL }
30049 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30051 x86_builtin_vectorization_cost (bool runtime_test)
30053 /* If the branch of the runtime test is taken - i.e. - the vectorized
30054 version is skipped - this incurs a misprediction cost (because the
30055 vectorized version is expected to be the fall-through). So we subtract
30056 the latency of a mispredicted branch from the costs that are incured
30057 when the vectorized version is executed.
30059 TODO: The values in individual target tables have to be tuned or new
30060 fields may be needed. For eg. on K8, the default branch path is the
30061 not-taken path. If the taken path is predicted correctly, the minimum
30062 penalty of going down the taken-path is 1 cycle. If the taken-path is
30063 not predicted correctly, then the minimum penalty is 10 cycles. */
30067 return (-(ix86_cost->cond_taken_branch_cost));
30073 /* This function returns the calling abi specific va_list type node.
30074 It returns the FNDECL specific va_list type. */
30077 ix86_fn_abi_va_list (tree fndecl)
30080 return va_list_type_node;
30081 gcc_assert (fndecl != NULL_TREE);
30083 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30084 return ms_va_list_type_node;
30086 return sysv_va_list_type_node;
30089 /* Returns the canonical va_list type specified by TYPE. If there
30090 is no valid TYPE provided, it return NULL_TREE. */
30093 ix86_canonical_va_list_type (tree type)
30097 /* Resolve references and pointers to va_list type. */
30098 if (INDIRECT_REF_P (type))
30099 type = TREE_TYPE (type);
30100 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30101 type = TREE_TYPE (type);
30105 wtype = va_list_type_node;
30106 gcc_assert (wtype != NULL_TREE);
30108 if (TREE_CODE (wtype) == ARRAY_TYPE)
30110 /* If va_list is an array type, the argument may have decayed
30111 to a pointer type, e.g. by being passed to another function.
30112 In that case, unwrap both types so that we can compare the
30113 underlying records. */
30114 if (TREE_CODE (htype) == ARRAY_TYPE
30115 || POINTER_TYPE_P (htype))
30117 wtype = TREE_TYPE (wtype);
30118 htype = TREE_TYPE (htype);
30121 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30122 return va_list_type_node;
30123 wtype = sysv_va_list_type_node;
30124 gcc_assert (wtype != NULL_TREE);
30126 if (TREE_CODE (wtype) == ARRAY_TYPE)
30128 /* If va_list is an array type, the argument may have decayed
30129 to a pointer type, e.g. by being passed to another function.
30130 In that case, unwrap both types so that we can compare the
30131 underlying records. */
30132 if (TREE_CODE (htype) == ARRAY_TYPE
30133 || POINTER_TYPE_P (htype))
30135 wtype = TREE_TYPE (wtype);
30136 htype = TREE_TYPE (htype);
30139 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30140 return sysv_va_list_type_node;
30141 wtype = ms_va_list_type_node;
30142 gcc_assert (wtype != NULL_TREE);
30144 if (TREE_CODE (wtype) == ARRAY_TYPE)
30146 /* If va_list is an array type, the argument may have decayed
30147 to a pointer type, e.g. by being passed to another function.
30148 In that case, unwrap both types so that we can compare the
30149 underlying records. */
30150 if (TREE_CODE (htype) == ARRAY_TYPE
30151 || POINTER_TYPE_P (htype))
30153 wtype = TREE_TYPE (wtype);
30154 htype = TREE_TYPE (htype);
30157 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30158 return ms_va_list_type_node;
30161 return std_canonical_va_list_type (type);
30164 /* Iterate through the target-specific builtin types for va_list.
30165 IDX denotes the iterator, *PTREE is set to the result type of
30166 the va_list builtin, and *PNAME to its internal type.
30167 Returns zero if there is no element for this index, otherwise
30168 IDX should be increased upon the next call.
30169 Note, do not iterate a base builtin's name like __builtin_va_list.
30170 Used from c_common_nodes_and_builtins. */
30173 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30179 *ptree = ms_va_list_type_node;
30180 *pname = "__builtin_ms_va_list";
30183 *ptree = sysv_va_list_type_node;
30184 *pname = "__builtin_sysv_va_list";
30192 /* Initialize the GCC target structure. */
30193 #undef TARGET_RETURN_IN_MEMORY
30194 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30196 #undef TARGET_LEGITIMIZE_ADDRESS
30197 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30199 #undef TARGET_ATTRIBUTE_TABLE
30200 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30201 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30202 # undef TARGET_MERGE_DECL_ATTRIBUTES
30203 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30206 #undef TARGET_COMP_TYPE_ATTRIBUTES
30207 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30209 #undef TARGET_INIT_BUILTINS
30210 #define TARGET_INIT_BUILTINS ix86_init_builtins
30211 #undef TARGET_EXPAND_BUILTIN
30212 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30214 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30215 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30216 ix86_builtin_vectorized_function
30218 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30219 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30221 #undef TARGET_BUILTIN_RECIPROCAL
30222 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30224 #undef TARGET_ASM_FUNCTION_EPILOGUE
30225 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30227 #undef TARGET_ENCODE_SECTION_INFO
30228 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30229 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30231 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30234 #undef TARGET_ASM_OPEN_PAREN
30235 #define TARGET_ASM_OPEN_PAREN ""
30236 #undef TARGET_ASM_CLOSE_PAREN
30237 #define TARGET_ASM_CLOSE_PAREN ""
30239 #undef TARGET_ASM_ALIGNED_HI_OP
30240 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30241 #undef TARGET_ASM_ALIGNED_SI_OP
30242 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30244 #undef TARGET_ASM_ALIGNED_DI_OP
30245 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30248 #undef TARGET_ASM_UNALIGNED_HI_OP
30249 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30250 #undef TARGET_ASM_UNALIGNED_SI_OP
30251 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30252 #undef TARGET_ASM_UNALIGNED_DI_OP
30253 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30255 #undef TARGET_SCHED_ADJUST_COST
30256 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30257 #undef TARGET_SCHED_ISSUE_RATE
30258 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30259 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30260 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30261 ia32_multipass_dfa_lookahead
30263 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30264 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30267 #undef TARGET_HAVE_TLS
30268 #define TARGET_HAVE_TLS true
30270 #undef TARGET_CANNOT_FORCE_CONST_MEM
30271 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30272 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30273 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30275 #undef TARGET_DELEGITIMIZE_ADDRESS
30276 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30278 #undef TARGET_MS_BITFIELD_LAYOUT_P
30279 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30282 #undef TARGET_BINDS_LOCAL_P
30283 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30285 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30286 #undef TARGET_BINDS_LOCAL_P
30287 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30290 #undef TARGET_ASM_OUTPUT_MI_THUNK
30291 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30292 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30293 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30295 #undef TARGET_ASM_FILE_START
30296 #define TARGET_ASM_FILE_START x86_file_start
30298 #undef TARGET_DEFAULT_TARGET_FLAGS
30299 #define TARGET_DEFAULT_TARGET_FLAGS \
30301 | TARGET_SUBTARGET_DEFAULT \
30302 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30304 #undef TARGET_HANDLE_OPTION
30305 #define TARGET_HANDLE_OPTION ix86_handle_option
30307 #undef TARGET_RTX_COSTS
30308 #define TARGET_RTX_COSTS ix86_rtx_costs
30309 #undef TARGET_ADDRESS_COST
30310 #define TARGET_ADDRESS_COST ix86_address_cost
30312 #undef TARGET_FIXED_CONDITION_CODE_REGS
30313 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30314 #undef TARGET_CC_MODES_COMPATIBLE
30315 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30317 #undef TARGET_MACHINE_DEPENDENT_REORG
30318 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30320 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30321 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30323 #undef TARGET_BUILD_BUILTIN_VA_LIST
30324 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30326 #undef TARGET_FN_ABI_VA_LIST
30327 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30329 #undef TARGET_CANONICAL_VA_LIST_TYPE
30330 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30332 #undef TARGET_EXPAND_BUILTIN_VA_START
30333 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30335 #undef TARGET_MD_ASM_CLOBBERS
30336 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30338 #undef TARGET_PROMOTE_PROTOTYPES
30339 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30340 #undef TARGET_STRUCT_VALUE_RTX
30341 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30342 #undef TARGET_SETUP_INCOMING_VARARGS
30343 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30344 #undef TARGET_MUST_PASS_IN_STACK
30345 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30346 #undef TARGET_PASS_BY_REFERENCE
30347 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30348 #undef TARGET_INTERNAL_ARG_POINTER
30349 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30350 #undef TARGET_UPDATE_STACK_BOUNDARY
30351 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30352 #undef TARGET_GET_DRAP_RTX
30353 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30354 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
30355 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
30356 #undef TARGET_STRICT_ARGUMENT_NAMING
30357 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30359 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30360 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30362 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30363 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30365 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30366 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30368 #undef TARGET_C_MODE_FOR_SUFFIX
30369 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30372 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30373 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30376 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30377 #undef TARGET_INSERT_ATTRIBUTES
30378 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30381 #undef TARGET_MANGLE_TYPE
30382 #define TARGET_MANGLE_TYPE ix86_mangle_type
30384 #undef TARGET_STACK_PROTECT_FAIL
30385 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30387 #undef TARGET_FUNCTION_VALUE
30388 #define TARGET_FUNCTION_VALUE ix86_function_value
30390 #undef TARGET_SECONDARY_RELOAD
30391 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30393 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30394 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30396 #undef TARGET_SET_CURRENT_FUNCTION
30397 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30399 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30400 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30402 #undef TARGET_OPTION_SAVE
30403 #define TARGET_OPTION_SAVE ix86_function_specific_save
30405 #undef TARGET_OPTION_RESTORE
30406 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30408 #undef TARGET_OPTION_PRINT
30409 #define TARGET_OPTION_PRINT ix86_function_specific_print
30411 #undef TARGET_OPTION_CAN_INLINE_P
30412 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30414 #undef TARGET_EXPAND_TO_RTL_HOOK
30415 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30417 #undef TARGET_LEGITIMATE_ADDRESS_P
30418 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30420 struct gcc_target targetm = TARGET_INITIALIZER;
30422 #include "gt-i386.h"