1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned char ix86_tune_features[X86_TUNE_LAST];
1215 /* Feature tests against the various tunings used to create ix86_tune_features
1216 based on the processor mask. */
1217 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1218 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1219 negatively, so enabling for Generic64 seems like good code size
1220 tradeoff. We can't enable it for 32bit generic because it does not
1221 work well with PPro base chips. */
1222 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1224 /* X86_TUNE_PUSH_MEMORY */
1225 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1226 | m_NOCONA | m_CORE2 | m_GENERIC,
1228 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1231 /* X86_TUNE_USE_BIT_TEST */
1234 /* X86_TUNE_UNROLL_STRLEN */
1235 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1237 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1238 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1240 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1241 on simulation result. But after P4 was made, no performance benefit
1242 was observed with branch hints. It also increases the code size.
1243 As a result, icc never generates branch hints. */
1246 /* X86_TUNE_DOUBLE_WITH_ADD */
1249 /* X86_TUNE_USE_SAHF */
1250 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC,
1253 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1254 partial dependencies. */
1255 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1256 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1258 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1259 register stalls on Generic32 compilation setting as well. However
1260 in current implementation the partial register stalls are not eliminated
1261 very well - they can be introduced via subregs synthesized by combine
1262 and can happen in caller/callee saving sequences. Because this option
1263 pays back little on PPro based chips and is in conflict with partial reg
1264 dependencies used by Athlon/P4 based chips, it is better to leave it off
1265 for generic32 for now. */
1268 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1269 m_CORE2 | m_GENERIC,
1271 /* X86_TUNE_USE_HIMODE_FIOP */
1272 m_386 | m_486 | m_K6_GEODE,
1274 /* X86_TUNE_USE_SIMODE_FIOP */
1275 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1277 /* X86_TUNE_USE_MOV0 */
1280 /* X86_TUNE_USE_CLTD */
1281 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1283 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1286 /* X86_TUNE_SPLIT_LONG_MOVES */
1289 /* X86_TUNE_READ_MODIFY_WRITE */
1292 /* X86_TUNE_READ_MODIFY */
1295 /* X86_TUNE_PROMOTE_QIMODE */
1296 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1297 | m_GENERIC /* | m_PENT4 ? */,
1299 /* X86_TUNE_FAST_PREFIX */
1300 ~(m_PENT | m_486 | m_386),
1302 /* X86_TUNE_SINGLE_STRINGOP */
1303 m_386 | m_PENT4 | m_NOCONA,
1305 /* X86_TUNE_QIMODE_MATH */
1308 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1309 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1310 might be considered for Generic32 if our scheme for avoiding partial
1311 stalls was more effective. */
1314 /* X86_TUNE_PROMOTE_QI_REGS */
1317 /* X86_TUNE_PROMOTE_HI_REGS */
1320 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1321 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_ADD_ESP_8 */
1324 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1325 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_SUB_ESP_4 */
1328 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_SUB_ESP_8 */
1331 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1332 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1334 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1335 for DFmode copies */
1336 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1337 | m_GENERIC | m_GEODE),
1339 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1340 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1342 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1343 conflict here in between PPro/Pentium4 based chips that thread 128bit
1344 SSE registers as single units versus K8 based chips that divide SSE
1345 registers to two 64bit halves. This knob promotes all store destinations
1346 to be 128bit to allow register renaming on 128bit SSE units, but usually
1347 results in one extra microop on 64bit SSE units. Experimental results
1348 shows that disabling this option on P4 brings over 20% SPECfp regression,
1349 while enabling it on K8 brings roughly 2.4% regression that can be partly
1350 masked by careful scheduling of moves. */
1351 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1353 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1356 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1357 are resolved on SSE register parts instead of whole registers, so we may
1358 maintain just lower part of scalar values in proper format leaving the
1359 upper part undefined. */
1362 /* X86_TUNE_SSE_TYPELESS_STORES */
1365 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1366 m_PPRO | m_PENT4 | m_NOCONA,
1368 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1369 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1371 /* X86_TUNE_PROLOGUE_USING_MOVE */
1372 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1374 /* X86_TUNE_EPILOGUE_USING_MOVE */
1375 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1377 /* X86_TUNE_SHIFT1 */
1380 /* X86_TUNE_USE_FFREEP */
1383 /* X86_TUNE_INTER_UNIT_MOVES */
1384 ~(m_AMD_MULTIPLE | m_GENERIC),
1386 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1389 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1390 than 4 branch instructions in the 16 byte window. */
1391 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_SCHEDULE */
1394 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1396 /* X86_TUNE_USE_BT */
1397 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1399 /* X86_TUNE_USE_INCDEC */
1400 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1402 /* X86_TUNE_PAD_RETURNS */
1403 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1405 /* X86_TUNE_EXT_80387_CONSTANTS */
1406 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1408 /* X86_TUNE_SHORTEN_X87_SSE */
1411 /* X86_TUNE_AVOID_VECTOR_DECODE */
1414 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1415 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1418 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1419 vector path on AMD machines. */
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1424 m_K8 | m_GENERIC64 | m_AMDFAM10,
1426 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1430 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1431 but one byte longer. */
1434 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1435 operand that cannot be represented using a modRM byte. The XOR
1436 replacement is long decoded, so this split helps here as well. */
1439 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1440 from integer to FP. */
1443 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1444 with a subsequent conditional jump instruction into a single
1445 compare-and-branch uop. */
1449 /* Feature tests against the various architecture variations. */
1450 unsigned char ix86_arch_features[X86_ARCH_LAST];
1452 /* Feature tests against the various architecture variations, used to create
1453 ix86_arch_features based on the processor mask. */
1454 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1455 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1456 ~(m_386 | m_486 | m_PENT | m_K6),
1458 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1461 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1464 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1467 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1471 static const unsigned int x86_accumulate_outgoing_args
1472 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1474 static const unsigned int x86_arch_always_fancy_math_387
1475 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1476 | m_NOCONA | m_CORE2 | m_GENERIC;
1478 static enum stringop_alg stringop_alg = no_stringop;
1480 /* In case the average insn count for single function invocation is
1481 lower than this constant, emit fast (but longer) prologue and
1483 #define FAST_PROLOGUE_INSN_COUNT 20
1485 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1486 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1487 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1488 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1490 /* Array of the smallest class containing reg number REGNO, indexed by
1491 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1493 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1495 /* ax, dx, cx, bx */
1496 AREG, DREG, CREG, BREG,
1497 /* si, di, bp, sp */
1498 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1500 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1501 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1504 /* flags, fpsr, fpcr, frame */
1505 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1507 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1510 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1513 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1514 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1515 /* SSE REX registers */
1516 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1520 /* The "default" register map used in 32bit mode. */
1522 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1524 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1525 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1526 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1527 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1528 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1529 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1530 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1533 static int const x86_64_int_parameter_registers[6] =
1535 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1536 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1539 static int const x86_64_ms_abi_int_parameter_registers[4] =
1541 2 /*RCX*/, 1 /*RDX*/,
1542 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1545 static int const x86_64_int_return_registers[4] =
1547 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1550 /* The "default" register map used in 64bit mode. */
1551 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1553 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1554 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1555 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1556 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1557 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1558 8,9,10,11,12,13,14,15, /* extended integer registers */
1559 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1562 /* Define the register numbers to be used in Dwarf debugging information.
1563 The SVR4 reference port C compiler uses the following register numbers
1564 in its Dwarf output code:
1565 0 for %eax (gcc regno = 0)
1566 1 for %ecx (gcc regno = 2)
1567 2 for %edx (gcc regno = 1)
1568 3 for %ebx (gcc regno = 3)
1569 4 for %esp (gcc regno = 7)
1570 5 for %ebp (gcc regno = 6)
1571 6 for %esi (gcc regno = 4)
1572 7 for %edi (gcc regno = 5)
1573 The following three DWARF register numbers are never generated by
1574 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1575 believes these numbers have these meanings.
1576 8 for %eip (no gcc equivalent)
1577 9 for %eflags (gcc regno = 17)
1578 10 for %trapno (no gcc equivalent)
1579 It is not at all clear how we should number the FP stack registers
1580 for the x86 architecture. If the version of SDB on x86/svr4 were
1581 a bit less brain dead with respect to floating-point then we would
1582 have a precedent to follow with respect to DWARF register numbers
1583 for x86 FP registers, but the SDB on x86/svr4 is so completely
1584 broken with respect to FP registers that it is hardly worth thinking
1585 of it as something to strive for compatibility with.
1586 The version of x86/svr4 SDB I have at the moment does (partially)
1587 seem to believe that DWARF register number 11 is associated with
1588 the x86 register %st(0), but that's about all. Higher DWARF
1589 register numbers don't seem to be associated with anything in
1590 particular, and even for DWARF regno 11, SDB only seems to under-
1591 stand that it should say that a variable lives in %st(0) (when
1592 asked via an `=' command) if we said it was in DWARF regno 11,
1593 but SDB still prints garbage when asked for the value of the
1594 variable in question (via a `/' command).
1595 (Also note that the labels SDB prints for various FP stack regs
1596 when doing an `x' command are all wrong.)
1597 Note that these problems generally don't affect the native SVR4
1598 C compiler because it doesn't allow the use of -O with -g and
1599 because when it is *not* optimizing, it allocates a memory
1600 location for each floating-point variable, and the memory
1601 location is what gets described in the DWARF AT_location
1602 attribute for the variable in question.
1603 Regardless of the severe mental illness of the x86/svr4 SDB, we
1604 do something sensible here and we use the following DWARF
1605 register numbers. Note that these are all stack-top-relative
1607 11 for %st(0) (gcc regno = 8)
1608 12 for %st(1) (gcc regno = 9)
1609 13 for %st(2) (gcc regno = 10)
1610 14 for %st(3) (gcc regno = 11)
1611 15 for %st(4) (gcc regno = 12)
1612 16 for %st(5) (gcc regno = 13)
1613 17 for %st(6) (gcc regno = 14)
1614 18 for %st(7) (gcc regno = 15)
1616 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1618 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1619 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1620 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1621 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1622 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1623 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1624 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1627 /* Test and compare insns in i386.md store the information needed to
1628 generate branch and scc insns here. */
1630 rtx ix86_compare_op0 = NULL_RTX;
1631 rtx ix86_compare_op1 = NULL_RTX;
1632 rtx ix86_compare_emitted = NULL_RTX;
1634 /* Size of the register save area. */
1635 #define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
1637 /* Define the structure for the machine field in struct function. */
1639 struct stack_local_entry GTY(())
1641 unsigned short mode;
1644 struct stack_local_entry *next;
1647 /* Structure describing stack frame layout.
1648 Stack grows downward:
1654 saved frame pointer if frame_pointer_needed
1655 <- HARD_FRAME_POINTER
1660 [va_arg registers] (
1661 > to_allocate <- FRAME_POINTER
1671 HOST_WIDE_INT frame;
1673 int outgoing_arguments_size;
1676 HOST_WIDE_INT to_allocate;
1677 /* The offsets relative to ARG_POINTER. */
1678 HOST_WIDE_INT frame_pointer_offset;
1679 HOST_WIDE_INT hard_frame_pointer_offset;
1680 HOST_WIDE_INT stack_pointer_offset;
1682 /* When save_regs_using_mov is set, emit prologue using
1683 move instead of push instructions. */
1684 bool save_regs_using_mov;
1687 /* Code model option. */
1688 enum cmodel ix86_cmodel;
1690 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1692 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1694 /* Which unit we are generating floating point math for. */
1695 enum fpmath_unit ix86_fpmath;
1697 /* Which cpu are we scheduling for. */
1698 enum processor_type ix86_tune;
1700 /* Which instruction set architecture to use. */
1701 enum processor_type ix86_arch;
1703 /* true if sse prefetch instruction is not NOOP. */
1704 int x86_prefetch_sse;
1706 /* ix86_regparm_string as a number */
1707 static int ix86_regparm;
1709 /* -mstackrealign option */
1710 extern int ix86_force_align_arg_pointer;
1711 static const char ix86_force_align_arg_pointer_string[]
1712 = "force_align_arg_pointer";
1714 static rtx (*ix86_gen_leave) (void);
1715 static rtx (*ix86_gen_pop1) (rtx);
1716 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1717 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1718 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1719 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1720 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1721 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1723 /* Preferred alignment for stack boundary in bits. */
1724 unsigned int ix86_preferred_stack_boundary;
1726 /* Alignment for incoming stack boundary in bits specified at
1728 static unsigned int ix86_user_incoming_stack_boundary;
1730 /* Default alignment for incoming stack boundary in bits. */
1731 static unsigned int ix86_default_incoming_stack_boundary;
1733 /* Alignment for incoming stack boundary in bits. */
1734 unsigned int ix86_incoming_stack_boundary;
1736 /* Values 1-5: see jump.c */
1737 int ix86_branch_cost;
1739 /* Calling abi specific va_list type nodes. */
1740 static GTY(()) tree sysv_va_list_type_node;
1741 static GTY(()) tree ms_va_list_type_node;
1743 /* Variables which are this size or smaller are put in the data/bss
1744 or ldata/lbss sections. */
1746 int ix86_section_threshold = 65536;
1748 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1749 char internal_label_prefix[16];
1750 int internal_label_prefix_len;
1752 /* Fence to use after loop using movnt. */
1755 /* Register class used for passing given 64bit part of the argument.
1756 These represent classes as documented by the PS ABI, with the exception
1757 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1758 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1760 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1761 whenever possible (upper half does contain padding). */
1762 enum x86_64_reg_class
1765 X86_64_INTEGER_CLASS,
1766 X86_64_INTEGERSI_CLASS,
1774 X86_64_COMPLEX_X87_CLASS,
1777 static const char * const x86_64_reg_class_name[] =
1779 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1780 "sseup", "x87", "x87up", "cplx87", "no"
1783 #define MAX_CLASSES 4
1785 /* Table of constants used by fldpi, fldln2, etc.... */
1786 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1787 static bool ext_80387_constants_init = 0;
1790 static struct machine_function * ix86_init_machine_status (void);
1791 static rtx ix86_function_value (const_tree, const_tree, bool);
1792 static int ix86_function_regparm (const_tree, const_tree);
1793 static void ix86_compute_frame_layout (struct ix86_frame *);
1794 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1796 static void ix86_add_new_builtins (int);
1798 enum ix86_function_specific_strings
1800 IX86_FUNCTION_SPECIFIC_ARCH,
1801 IX86_FUNCTION_SPECIFIC_TUNE,
1802 IX86_FUNCTION_SPECIFIC_FPMATH,
1803 IX86_FUNCTION_SPECIFIC_MAX
1806 static char *ix86_target_string (int, int, const char *, const char *,
1807 const char *, bool);
1808 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1809 static void ix86_function_specific_save (struct cl_target_option *);
1810 static void ix86_function_specific_restore (struct cl_target_option *);
1811 static void ix86_function_specific_print (FILE *, int,
1812 struct cl_target_option *);
1813 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1814 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1815 static bool ix86_can_inline_p (tree, tree);
1816 static void ix86_set_current_function (tree);
1819 /* The svr4 ABI for the i386 says that records and unions are returned
1821 #ifndef DEFAULT_PCC_STRUCT_RETURN
1822 #define DEFAULT_PCC_STRUCT_RETURN 1
1825 /* Whether -mtune= or -march= were specified */
1826 static int ix86_tune_defaulted;
1827 static int ix86_arch_specified;
1829 /* Bit flags that specify the ISA we are compiling for. */
1830 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1832 /* A mask of ix86_isa_flags that includes bit X if X
1833 was set or cleared on the command line. */
1834 static int ix86_isa_flags_explicit;
1836 /* Define a set of ISAs which are available when a given ISA is
1837 enabled. MMX and SSE ISAs are handled separately. */
1839 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1840 #define OPTION_MASK_ISA_3DNOW_SET \
1841 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1843 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1844 #define OPTION_MASK_ISA_SSE2_SET \
1845 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1846 #define OPTION_MASK_ISA_SSE3_SET \
1847 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1848 #define OPTION_MASK_ISA_SSSE3_SET \
1849 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1850 #define OPTION_MASK_ISA_SSE4_1_SET \
1851 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1852 #define OPTION_MASK_ISA_SSE4_2_SET \
1853 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1854 #define OPTION_MASK_ISA_AVX_SET \
1855 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1856 #define OPTION_MASK_ISA_FMA_SET \
1857 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1859 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1861 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1863 #define OPTION_MASK_ISA_SSE4A_SET \
1864 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1865 #define OPTION_MASK_ISA_SSE5_SET \
1866 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1868 /* AES and PCLMUL need SSE2 because they use xmm registers */
1869 #define OPTION_MASK_ISA_AES_SET \
1870 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1871 #define OPTION_MASK_ISA_PCLMUL_SET \
1872 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1874 #define OPTION_MASK_ISA_ABM_SET \
1875 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1876 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1877 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1878 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1880 /* Define a set of ISAs which aren't available when a given ISA is
1881 disabled. MMX and SSE ISAs are handled separately. */
1883 #define OPTION_MASK_ISA_MMX_UNSET \
1884 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1885 #define OPTION_MASK_ISA_3DNOW_UNSET \
1886 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1887 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1889 #define OPTION_MASK_ISA_SSE_UNSET \
1890 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1891 #define OPTION_MASK_ISA_SSE2_UNSET \
1892 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1893 #define OPTION_MASK_ISA_SSE3_UNSET \
1894 (OPTION_MASK_ISA_SSE3 \
1895 | OPTION_MASK_ISA_SSSE3_UNSET \
1896 | OPTION_MASK_ISA_SSE4A_UNSET )
1897 #define OPTION_MASK_ISA_SSSE3_UNSET \
1898 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1899 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1900 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1901 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1902 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1903 #define OPTION_MASK_ISA_AVX_UNSET \
1904 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1905 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1907 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1909 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1911 #define OPTION_MASK_ISA_SSE4A_UNSET \
1912 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1913 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1914 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1915 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1916 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1917 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1918 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1919 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1921 /* Vectorization library interface and handlers. */
1922 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1923 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1924 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1926 /* Processor target table, indexed by processor number */
1929 const struct processor_costs *cost; /* Processor costs */
1930 const int align_loop; /* Default alignments. */
1931 const int align_loop_max_skip;
1932 const int align_jump;
1933 const int align_jump_max_skip;
1934 const int align_func;
1937 static const struct ptt processor_target_table[PROCESSOR_max] =
1939 {&i386_cost, 4, 3, 4, 3, 4},
1940 {&i486_cost, 16, 15, 16, 15, 16},
1941 {&pentium_cost, 16, 7, 16, 7, 16},
1942 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1943 {&geode_cost, 0, 0, 0, 0, 0},
1944 {&k6_cost, 32, 7, 32, 7, 32},
1945 {&athlon_cost, 16, 7, 16, 7, 16},
1946 {&pentium4_cost, 0, 0, 0, 0, 0},
1947 {&k8_cost, 16, 7, 16, 7, 16},
1948 {&nocona_cost, 0, 0, 0, 0, 0},
1949 {&core2_cost, 16, 10, 16, 10, 16},
1950 {&generic32_cost, 16, 7, 16, 7, 16},
1951 {&generic64_cost, 16, 10, 16, 10, 16},
1952 {&amdfam10_cost, 32, 24, 32, 7, 32}
1955 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1980 /* Implement TARGET_HANDLE_OPTION. */
1983 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1990 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1991 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1995 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1996 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2003 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2004 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2008 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2009 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2019 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2020 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2024 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2025 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2032 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2033 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2037 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2038 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2045 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2046 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2050 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2051 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2058 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2059 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2063 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2064 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2071 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2072 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2076 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2077 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2084 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2085 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2089 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2090 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2097 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2098 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2102 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2103 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2110 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2111 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2115 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2116 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2121 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2122 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2126 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2127 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2133 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2134 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2138 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2139 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2146 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2147 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2151 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2152 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2159 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2160 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2164 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2165 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2172 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2173 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2177 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2178 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2185 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2186 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2190 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2191 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2198 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2199 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2203 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2204 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2211 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2212 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2216 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2217 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2224 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2225 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2229 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2230 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2239 /* Return a string the documents the current -m options. The caller is
2240 responsible for freeing the string. */
2243 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2244 const char *fpmath, bool add_nl_p)
2246 struct ix86_target_opts
2248 const char *option; /* option string */
2249 int mask; /* isa mask options */
2252 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2253 preceding options while match those first. */
2254 static struct ix86_target_opts isa_opts[] =
2256 { "-m64", OPTION_MASK_ISA_64BIT },
2257 { "-msse5", OPTION_MASK_ISA_SSE5 },
2258 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2259 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2260 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2261 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2262 { "-msse3", OPTION_MASK_ISA_SSE3 },
2263 { "-msse2", OPTION_MASK_ISA_SSE2 },
2264 { "-msse", OPTION_MASK_ISA_SSE },
2265 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2266 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2267 { "-mmmx", OPTION_MASK_ISA_MMX },
2268 { "-mabm", OPTION_MASK_ISA_ABM },
2269 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2270 { "-maes", OPTION_MASK_ISA_AES },
2271 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2275 static struct ix86_target_opts flag_opts[] =
2277 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2278 { "-m80387", MASK_80387 },
2279 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2280 { "-malign-double", MASK_ALIGN_DOUBLE },
2281 { "-mcld", MASK_CLD },
2282 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2283 { "-mieee-fp", MASK_IEEE_FP },
2284 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2285 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2286 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2287 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2288 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2289 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2290 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2291 { "-mno-red-zone", MASK_NO_RED_ZONE },
2292 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2293 { "-mrecip", MASK_RECIP },
2294 { "-mrtd", MASK_RTD },
2295 { "-msseregparm", MASK_SSEREGPARM },
2296 { "-mstack-arg-probe", MASK_STACK_PROBE },
2297 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2300 const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0])
2301 + sizeof (flag_opts) / sizeof (flag_opts[0])
2305 char target_other[40];
2314 memset (opts, '\0', sizeof (opts));
2316 /* Add -march= option. */
2319 opts[num][0] = "-march=";
2320 opts[num++][1] = arch;
2323 /* Add -mtune= option. */
2326 opts[num][0] = "-mtune=";
2327 opts[num++][1] = tune;
2330 /* Pick out the options in isa options. */
2331 for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++)
2333 if ((isa & isa_opts[i].mask) != 0)
2335 opts[num++][0] = isa_opts[i].option;
2336 isa &= ~ isa_opts[i].mask;
2340 if (isa && add_nl_p)
2342 opts[num++][0] = isa_other;
2343 sprintf (isa_other, "(other isa: 0x%x)", isa);
2346 /* Add flag options. */
2347 for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++)
2349 if ((flags & flag_opts[i].mask) != 0)
2351 opts[num++][0] = flag_opts[i].option;
2352 flags &= ~ flag_opts[i].mask;
2356 if (flags && add_nl_p)
2358 opts[num++][0] = target_other;
2359 sprintf (target_other, "(other flags: 0x%x)", isa);
2362 /* Add -fpmath= option. */
2365 opts[num][0] = "-mfpmath=";
2366 opts[num++][1] = fpmath;
2373 gcc_assert (num < sizeof (opts) / sizeof (opts[0]));
2375 /* Size the string. */
2377 sep_len = (add_nl_p) ? 3 : 1;
2378 for (i = 0; i < num; i++)
2381 for (j = 0; j < 2; j++)
2383 len += strlen (opts[i][j]);
2386 /* Build the string. */
2387 ret = ptr = (char *) xmalloc (len);
2390 for (i = 0; i < num; i++)
2394 for (j = 0; j < 2; j++)
2395 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2402 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2410 for (j = 0; j < 2; j++)
2413 memcpy (ptr, opts[i][j], len2[j]);
2415 line_len += len2[j];
2420 gcc_assert (ret + len >= ptr);
2425 /* Function that is callable from the debugger to print the current
2428 ix86_debug_options (void)
2430 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2431 ix86_arch_string, ix86_tune_string,
2432 ix86_fpmath_string, true);
2436 fprintf (stderr, "%s\n\n", opts);
2440 fprintf (stderr, "<no options>\n\n");
2445 /* Sometimes certain combinations of command options do not make
2446 sense on a particular target machine. You can define a macro
2447 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2448 defined, is executed once just after all the command options have
2451 Don't use this macro to turn on various extra optimizations for
2452 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2455 override_options (bool main_args_p)
2458 unsigned int ix86_arch_mask, ix86_tune_mask;
2463 /* Comes from final.c -- no real reason to change it. */
2464 #define MAX_CODE_ALIGN 16
2472 PTA_PREFETCH_SSE = 1 << 4,
2474 PTA_3DNOW_A = 1 << 6,
2478 PTA_POPCNT = 1 << 10,
2480 PTA_SSE4A = 1 << 12,
2481 PTA_NO_SAHF = 1 << 13,
2482 PTA_SSE4_1 = 1 << 14,
2483 PTA_SSE4_2 = 1 << 15,
2486 PTA_PCLMUL = 1 << 18,
2493 const char *const name; /* processor name or nickname. */
2494 const enum processor_type processor;
2495 const unsigned /*enum pta_flags*/ flags;
2497 const processor_alias_table[] =
2499 {"i386", PROCESSOR_I386, 0},
2500 {"i486", PROCESSOR_I486, 0},
2501 {"i586", PROCESSOR_PENTIUM, 0},
2502 {"pentium", PROCESSOR_PENTIUM, 0},
2503 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2504 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2505 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2506 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2507 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2508 {"i686", PROCESSOR_PENTIUMPRO, 0},
2509 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2510 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2511 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2512 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2513 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2514 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2515 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2516 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2517 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2518 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2519 | PTA_CX16 | PTA_NO_SAHF)},
2520 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2521 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2524 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2525 |PTA_PREFETCH_SSE)},
2526 {"k6", PROCESSOR_K6, PTA_MMX},
2527 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2528 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2529 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2530 | PTA_PREFETCH_SSE)},
2531 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2532 | PTA_PREFETCH_SSE)},
2533 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2535 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2537 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2539 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2540 | PTA_MMX | PTA_SSE | PTA_SSE2
2542 {"k8", PROCESSOR_K8, (PTA_64BIT
2543 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2544 | PTA_SSE | PTA_SSE2
2546 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2547 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2548 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2550 {"opteron", PROCESSOR_K8, (PTA_64BIT
2551 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2552 | PTA_SSE | PTA_SSE2
2554 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2555 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2556 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2558 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2559 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2560 | PTA_SSE | PTA_SSE2
2562 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2563 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2564 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2566 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2567 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2568 | PTA_SSE | PTA_SSE2
2570 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2571 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2572 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2574 | PTA_CX16 | PTA_ABM)},
2575 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2576 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2577 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2579 | PTA_CX16 | PTA_ABM)},
2580 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2581 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2584 int const pta_size = ARRAY_SIZE (processor_alias_table);
2586 /* Set up prefix/suffix so the error messages refer to either the command
2587 line argument, or the attribute(target). */
2596 prefix = "option(\"";
2601 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2602 SUBTARGET_OVERRIDE_OPTIONS;
2605 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2606 SUBSUBTARGET_OVERRIDE_OPTIONS;
2609 /* -fPIC is the default for x86_64. */
2610 if (TARGET_MACHO && TARGET_64BIT)
2613 /* Set the default values for switches whose default depends on TARGET_64BIT
2614 in case they weren't overwritten by command line options. */
2617 /* Mach-O doesn't support omitting the frame pointer for now. */
2618 if (flag_omit_frame_pointer == 2)
2619 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2620 if (flag_asynchronous_unwind_tables == 2)
2621 flag_asynchronous_unwind_tables = 1;
2622 if (flag_pcc_struct_return == 2)
2623 flag_pcc_struct_return = 0;
2627 if (flag_omit_frame_pointer == 2)
2628 flag_omit_frame_pointer = 0;
2629 if (flag_asynchronous_unwind_tables == 2)
2630 flag_asynchronous_unwind_tables = 0;
2631 if (flag_pcc_struct_return == 2)
2632 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2635 /* Need to check -mtune=generic first. */
2636 if (ix86_tune_string)
2638 if (!strcmp (ix86_tune_string, "generic")
2639 || !strcmp (ix86_tune_string, "i686")
2640 /* As special support for cross compilers we read -mtune=native
2641 as -mtune=generic. With native compilers we won't see the
2642 -mtune=native, as it was changed by the driver. */
2643 || !strcmp (ix86_tune_string, "native"))
2646 ix86_tune_string = "generic64";
2648 ix86_tune_string = "generic32";
2650 /* If this call is for setting the option attribute, allow the
2651 generic32/generic64 that was previously set. */
2652 else if (!main_args_p
2653 && (!strcmp (ix86_tune_string, "generic32")
2654 || !strcmp (ix86_tune_string, "generic64")))
2656 else if (!strncmp (ix86_tune_string, "generic", 7))
2657 error ("bad value (%s) for %stune=%s %s",
2658 ix86_tune_string, prefix, suffix, sw);
2662 if (ix86_arch_string)
2663 ix86_tune_string = ix86_arch_string;
2664 if (!ix86_tune_string)
2666 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2667 ix86_tune_defaulted = 1;
2670 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2671 need to use a sensible tune option. */
2672 if (!strcmp (ix86_tune_string, "generic")
2673 || !strcmp (ix86_tune_string, "x86-64")
2674 || !strcmp (ix86_tune_string, "i686"))
2677 ix86_tune_string = "generic64";
2679 ix86_tune_string = "generic32";
2682 if (ix86_stringop_string)
2684 if (!strcmp (ix86_stringop_string, "rep_byte"))
2685 stringop_alg = rep_prefix_1_byte;
2686 else if (!strcmp (ix86_stringop_string, "libcall"))
2687 stringop_alg = libcall;
2688 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2689 stringop_alg = rep_prefix_4_byte;
2690 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2691 stringop_alg = rep_prefix_8_byte;
2692 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2693 stringop_alg = loop_1_byte;
2694 else if (!strcmp (ix86_stringop_string, "loop"))
2695 stringop_alg = loop;
2696 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2697 stringop_alg = unrolled_loop;
2699 error ("bad value (%s) for %sstringop-strategy=%s %s",
2700 ix86_stringop_string, prefix, suffix, sw);
2702 if (!strcmp (ix86_tune_string, "x86-64"))
2703 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2704 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2705 prefix, suffix, prefix, suffix, prefix, suffix);
2707 if (!ix86_arch_string)
2708 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2710 ix86_arch_specified = 1;
2712 if (!strcmp (ix86_arch_string, "generic"))
2713 error ("generic CPU can be used only for %stune=%s %s",
2714 prefix, suffix, sw);
2715 if (!strncmp (ix86_arch_string, "generic", 7))
2716 error ("bad value (%s) for %sarch=%s %s",
2717 ix86_arch_string, prefix, suffix, sw);
2719 if (ix86_cmodel_string != 0)
2721 if (!strcmp (ix86_cmodel_string, "small"))
2722 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2723 else if (!strcmp (ix86_cmodel_string, "medium"))
2724 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2725 else if (!strcmp (ix86_cmodel_string, "large"))
2726 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2728 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2729 else if (!strcmp (ix86_cmodel_string, "32"))
2730 ix86_cmodel = CM_32;
2731 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2732 ix86_cmodel = CM_KERNEL;
2734 error ("bad value (%s) for %scmodel=%s %s",
2735 ix86_cmodel_string, prefix, suffix, sw);
2739 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2740 use of rip-relative addressing. This eliminates fixups that
2741 would otherwise be needed if this object is to be placed in a
2742 DLL, and is essentially just as efficient as direct addressing. */
2743 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2744 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2745 else if (TARGET_64BIT)
2746 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2748 ix86_cmodel = CM_32;
2750 if (ix86_asm_string != 0)
2753 && !strcmp (ix86_asm_string, "intel"))
2754 ix86_asm_dialect = ASM_INTEL;
2755 else if (!strcmp (ix86_asm_string, "att"))
2756 ix86_asm_dialect = ASM_ATT;
2758 error ("bad value (%s) for %sasm=%s %s",
2759 ix86_asm_string, prefix, suffix, sw);
2761 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2762 error ("code model %qs not supported in the %s bit mode",
2763 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2764 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2765 sorry ("%i-bit mode not compiled in",
2766 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2768 for (i = 0; i < pta_size; i++)
2769 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2771 ix86_arch = processor_alias_table[i].processor;
2772 /* Default cpu tuning to the architecture. */
2773 ix86_tune = ix86_arch;
2775 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2776 error ("CPU you selected does not support x86-64 "
2779 if (processor_alias_table[i].flags & PTA_MMX
2780 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2781 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2782 if (processor_alias_table[i].flags & PTA_3DNOW
2783 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2784 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2785 if (processor_alias_table[i].flags & PTA_3DNOW_A
2786 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2787 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2788 if (processor_alias_table[i].flags & PTA_SSE
2789 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2790 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2791 if (processor_alias_table[i].flags & PTA_SSE2
2792 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2793 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2794 if (processor_alias_table[i].flags & PTA_SSE3
2795 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2796 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2797 if (processor_alias_table[i].flags & PTA_SSSE3
2798 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2799 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2800 if (processor_alias_table[i].flags & PTA_SSE4_1
2801 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2802 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2803 if (processor_alias_table[i].flags & PTA_SSE4_2
2804 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2805 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2806 if (processor_alias_table[i].flags & PTA_AVX
2807 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2808 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2809 if (processor_alias_table[i].flags & PTA_FMA
2810 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2811 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2812 if (processor_alias_table[i].flags & PTA_SSE4A
2813 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2814 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2815 if (processor_alias_table[i].flags & PTA_SSE5
2816 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2817 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2818 if (processor_alias_table[i].flags & PTA_ABM
2819 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2820 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2821 if (processor_alias_table[i].flags & PTA_CX16
2822 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2823 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2824 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2825 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2826 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2827 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2828 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2829 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2830 if (processor_alias_table[i].flags & PTA_AES
2831 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2832 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2833 if (processor_alias_table[i].flags & PTA_PCLMUL
2834 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2835 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2836 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2837 x86_prefetch_sse = true;
2843 error ("bad value (%s) for %sarch=%s %s",
2844 ix86_arch_string, prefix, suffix, sw);
2846 ix86_arch_mask = 1u << ix86_arch;
2847 for (i = 0; i < X86_ARCH_LAST; ++i)
2848 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2850 for (i = 0; i < pta_size; i++)
2851 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2853 ix86_tune = processor_alias_table[i].processor;
2854 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2856 if (ix86_tune_defaulted)
2858 ix86_tune_string = "x86-64";
2859 for (i = 0; i < pta_size; i++)
2860 if (! strcmp (ix86_tune_string,
2861 processor_alias_table[i].name))
2863 ix86_tune = processor_alias_table[i].processor;
2866 error ("CPU you selected does not support x86-64 "
2869 /* Intel CPUs have always interpreted SSE prefetch instructions as
2870 NOPs; so, we can enable SSE prefetch instructions even when
2871 -mtune (rather than -march) points us to a processor that has them.
2872 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2873 higher processors. */
2875 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2876 x86_prefetch_sse = true;
2880 error ("bad value (%s) for %stune=%s %s",
2881 ix86_tune_string, prefix, suffix, sw);
2883 ix86_tune_mask = 1u << ix86_tune;
2884 for (i = 0; i < X86_TUNE_LAST; ++i)
2885 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2888 ix86_cost = &ix86_size_cost;
2890 ix86_cost = processor_target_table[ix86_tune].cost;
2892 /* Arrange to set up i386_stack_locals for all functions. */
2893 init_machine_status = ix86_init_machine_status;
2895 /* Validate -mregparm= value. */
2896 if (ix86_regparm_string)
2899 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2900 i = atoi (ix86_regparm_string);
2901 if (i < 0 || i > REGPARM_MAX)
2902 error ("%sregparm=%d%s is not between 0 and %d",
2903 prefix, i, suffix, REGPARM_MAX);
2908 ix86_regparm = REGPARM_MAX;
2910 /* If the user has provided any of the -malign-* options,
2911 warn and use that value only if -falign-* is not set.
2912 Remove this code in GCC 3.2 or later. */
2913 if (ix86_align_loops_string)
2915 warning (0, "%salign-loops%s is obsolete, use %salign-loops%s",
2916 prefix, suffix, prefix, suffix);
2917 if (align_loops == 0)
2919 i = atoi (ix86_align_loops_string);
2920 if (i < 0 || i > MAX_CODE_ALIGN)
2921 error ("%salign-loops=%d%s is not between 0 and %d",
2922 prefix, i, suffix, MAX_CODE_ALIGN);
2924 align_loops = 1 << i;
2928 if (ix86_align_jumps_string)
2930 warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s",
2931 prefix, suffix, prefix, suffix);
2932 if (align_jumps == 0)
2934 i = atoi (ix86_align_jumps_string);
2935 if (i < 0 || i > MAX_CODE_ALIGN)
2936 error ("%salign-loops=%d%s is not between 0 and %d",
2937 prefix, i, suffix, MAX_CODE_ALIGN);
2939 align_jumps = 1 << i;
2943 if (ix86_align_funcs_string)
2945 warning (0, "%salign-functions%s is obsolete, use %salign-functions%s",
2946 prefix, suffix, prefix, suffix);
2947 if (align_functions == 0)
2949 i = atoi (ix86_align_funcs_string);
2950 if (i < 0 || i > MAX_CODE_ALIGN)
2951 error ("%salign-loops=%d%s is not between 0 and %d",
2952 prefix, i, suffix, MAX_CODE_ALIGN);
2954 align_functions = 1 << i;
2958 /* Default align_* from the processor table. */
2959 if (align_loops == 0)
2961 align_loops = processor_target_table[ix86_tune].align_loop;
2962 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2964 if (align_jumps == 0)
2966 align_jumps = processor_target_table[ix86_tune].align_jump;
2967 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2969 if (align_functions == 0)
2971 align_functions = processor_target_table[ix86_tune].align_func;
2974 /* Validate -mbranch-cost= value, or provide default. */
2975 ix86_branch_cost = ix86_cost->branch_cost;
2976 if (ix86_branch_cost_string)
2978 i = atoi (ix86_branch_cost_string);
2980 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2982 ix86_branch_cost = i;
2984 if (ix86_section_threshold_string)
2986 i = atoi (ix86_section_threshold_string);
2988 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
2990 ix86_section_threshold = i;
2993 if (ix86_tls_dialect_string)
2995 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2996 ix86_tls_dialect = TLS_DIALECT_GNU;
2997 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2998 ix86_tls_dialect = TLS_DIALECT_GNU2;
2999 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3000 ix86_tls_dialect = TLS_DIALECT_SUN;
3002 error ("bad value (%s) for %stls-dialect=%s %s",
3003 ix86_tls_dialect_string, prefix, suffix, sw);
3006 if (ix87_precision_string)
3008 i = atoi (ix87_precision_string);
3009 if (i != 32 && i != 64 && i != 80)
3010 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3015 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3017 /* Enable by default the SSE and MMX builtins. Do allow the user to
3018 explicitly disable any of these. In particular, disabling SSE and
3019 MMX for kernel code is extremely useful. */
3020 if (!ix86_arch_specified)
3022 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3023 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3026 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3030 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3032 if (!ix86_arch_specified)
3034 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3036 /* i386 ABI does not specify red zone. It still makes sense to use it
3037 when programmer takes care to stack from being destroyed. */
3038 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3039 target_flags |= MASK_NO_RED_ZONE;
3042 /* Keep nonleaf frame pointers. */
3043 if (flag_omit_frame_pointer)
3044 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3045 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3046 flag_omit_frame_pointer = 1;
3048 /* If we're doing fast math, we don't care about comparison order
3049 wrt NaNs. This lets us use a shorter comparison sequence. */
3050 if (flag_finite_math_only)
3051 target_flags &= ~MASK_IEEE_FP;
3053 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3054 since the insns won't need emulation. */
3055 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3056 target_flags &= ~MASK_NO_FANCY_MATH_387;
3058 /* Likewise, if the target doesn't have a 387, or we've specified
3059 software floating point, don't use 387 inline intrinsics. */
3061 target_flags |= MASK_NO_FANCY_MATH_387;
3063 /* Turn on MMX builtins for -msse. */
3066 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3067 x86_prefetch_sse = true;
3070 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3071 if (TARGET_SSE4_2 || TARGET_ABM)
3072 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3074 /* Validate -mpreferred-stack-boundary= value or default it to
3075 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3076 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3077 if (ix86_preferred_stack_boundary_string)
3079 i = atoi (ix86_preferred_stack_boundary_string);
3080 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3081 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3082 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3084 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3087 /* Set the default value for -mstackrealign. */
3088 if (ix86_force_align_arg_pointer == -1)
3089 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3091 /* Validate -mincoming-stack-boundary= value or default it to
3092 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3093 if (ix86_force_align_arg_pointer)
3094 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3096 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3097 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3098 if (ix86_incoming_stack_boundary_string)
3100 i = atoi (ix86_incoming_stack_boundary_string);
3101 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3102 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3103 i, TARGET_64BIT ? 4 : 2);
3106 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3107 ix86_incoming_stack_boundary
3108 = ix86_user_incoming_stack_boundary;
3112 /* Accept -msseregparm only if at least SSE support is enabled. */
3113 if (TARGET_SSEREGPARM
3115 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3117 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3118 if (ix86_fpmath_string != 0)
3120 if (! strcmp (ix86_fpmath_string, "387"))
3121 ix86_fpmath = FPMATH_387;
3122 else if (! strcmp (ix86_fpmath_string, "sse"))
3126 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3127 ix86_fpmath = FPMATH_387;
3130 ix86_fpmath = FPMATH_SSE;
3132 else if (! strcmp (ix86_fpmath_string, "387,sse")
3133 || ! strcmp (ix86_fpmath_string, "387+sse")
3134 || ! strcmp (ix86_fpmath_string, "sse,387")
3135 || ! strcmp (ix86_fpmath_string, "sse+387")
3136 || ! strcmp (ix86_fpmath_string, "both"))
3140 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3141 ix86_fpmath = FPMATH_387;
3143 else if (!TARGET_80387)
3145 warning (0, "387 instruction set disabled, using SSE arithmetics");
3146 ix86_fpmath = FPMATH_SSE;
3149 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3152 error ("bad value (%s) for %sfpmath=%s %s",
3153 ix86_fpmath_string, prefix, suffix, sw);
3156 /* If the i387 is disabled, then do not return values in it. */
3158 target_flags &= ~MASK_FLOAT_RETURNS;
3160 /* Use external vectorized library in vectorizing intrinsics. */
3161 if (ix86_veclibabi_string)
3163 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3164 ix86_veclib_handler = ix86_veclibabi_svml;
3165 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3166 ix86_veclib_handler = ix86_veclibabi_acml;
3168 error ("unknown vectorization library ABI type (%s) for "
3169 "%sveclibabi=%s %s", ix86_veclibabi_string,
3170 prefix, suffix, sw);
3173 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3174 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3176 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3178 /* ??? Unwind info is not correct around the CFG unless either a frame
3179 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3180 unwind info generation to be aware of the CFG and propagating states
3182 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3183 || flag_exceptions || flag_non_call_exceptions)
3184 && flag_omit_frame_pointer
3185 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3187 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3188 warning (0, "unwind tables currently require either a frame pointer "
3189 "or %saccumulate-outgoing-args%s for correctness",
3191 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3194 /* If stack probes are required, the space used for large function
3195 arguments on the stack must also be probed, so enable
3196 -maccumulate-outgoing-args so this happens in the prologue. */
3197 if (TARGET_STACK_PROBE
3198 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3200 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3201 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3202 "for correctness", prefix, suffix);
3203 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3206 /* For sane SSE instruction set generation we need fcomi instruction.
3207 It is safe to enable all CMOVE instructions. */
3211 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3214 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3215 p = strchr (internal_label_prefix, 'X');
3216 internal_label_prefix_len = p - internal_label_prefix;
3220 /* When scheduling description is not available, disable scheduler pass
3221 so it won't slow down the compilation and make x87 code slower. */
3222 if (!TARGET_SCHEDULE)
3223 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3225 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3226 set_param_value ("simultaneous-prefetches",
3227 ix86_cost->simultaneous_prefetches);
3228 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3229 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3230 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3231 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3232 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3233 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3235 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3236 can be optimized to ap = __builtin_next_arg (0). */
3238 targetm.expand_builtin_va_start = NULL;
3242 ix86_gen_leave = gen_leave_rex64;
3243 ix86_gen_pop1 = gen_popdi1;
3244 ix86_gen_add3 = gen_adddi3;
3245 ix86_gen_sub3 = gen_subdi3;
3246 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3247 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3248 ix86_gen_monitor = gen_sse3_monitor64;
3249 ix86_gen_andsp = gen_anddi3;
3253 ix86_gen_leave = gen_leave;
3254 ix86_gen_pop1 = gen_popsi1;
3255 ix86_gen_add3 = gen_addsi3;
3256 ix86_gen_sub3 = gen_subsi3;
3257 ix86_gen_sub3_carry = gen_subsi3_carry;
3258 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3259 ix86_gen_monitor = gen_sse3_monitor;
3260 ix86_gen_andsp = gen_andsi3;
3264 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3266 target_flags |= MASK_CLD & ~target_flags_explicit;
3269 /* Save the initial options in case the user does function specific options */
3271 target_option_default_node = target_option_current_node
3272 = build_target_option_node ();
3275 /* Save the current options */
3278 ix86_function_specific_save (struct cl_target_option *ptr)
3280 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3281 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3282 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3283 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3285 ptr->arch = ix86_arch;
3286 ptr->tune = ix86_tune;
3287 ptr->fpmath = ix86_fpmath;
3288 ptr->branch_cost = ix86_branch_cost;
3289 ptr->tune_defaulted = ix86_tune_defaulted;
3290 ptr->arch_specified = ix86_arch_specified;
3291 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3292 ptr->target_flags_explicit = target_flags_explicit;
3295 /* Restore the current options */
3298 ix86_function_specific_restore (struct cl_target_option *ptr)
3300 enum processor_type old_tune = ix86_tune;
3301 enum processor_type old_arch = ix86_arch;
3302 unsigned int ix86_arch_mask, ix86_tune_mask;
3305 ix86_arch = ptr->arch;
3306 ix86_tune = ptr->tune;
3307 ix86_fpmath = ptr->fpmath;
3308 ix86_branch_cost = ptr->branch_cost;
3309 ix86_tune_defaulted = ptr->tune_defaulted;
3310 ix86_arch_specified = ptr->arch_specified;
3311 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3312 target_flags_explicit = ptr->target_flags_explicit;
3314 /* Recreate the arch feature tests if the arch changed */
3315 if (old_arch != ix86_arch)
3317 ix86_arch_mask = 1u << ix86_arch;
3318 for (i = 0; i < X86_ARCH_LAST; ++i)
3319 ix86_arch_features[i]
3320 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3323 /* Recreate the tune optimization tests */
3324 if (old_tune != ix86_tune)
3326 ix86_tune_mask = 1u << ix86_tune;
3327 for (i = 0; i < X86_TUNE_LAST; ++i)
3328 ix86_tune_features[i]
3329 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3333 /* Print the current options */
3336 ix86_function_specific_print (FILE *file, int indent,
3337 struct cl_target_option *ptr)
3340 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3341 NULL, NULL, NULL, false);
3343 fprintf (file, "%*sarch = %d (%s)\n",
3346 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3347 ? cpu_names[ptr->arch]
3350 fprintf (file, "%*stune = %d (%s)\n",
3353 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3354 ? cpu_names[ptr->tune]
3357 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3358 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3359 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3360 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3364 fprintf (file, "%*s%s\n", indent, "", target_string);
3365 free (target_string);
3370 /* Inner function to process the attribute((target(...))), take an argument and
3371 set the current options from the argument. If we have a list, recursively go
3375 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3380 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3381 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3382 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3383 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3398 enum ix86_opt_type type;
3403 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3404 IX86_ATTR_ISA ("abm", OPT_mabm),
3405 IX86_ATTR_ISA ("aes", OPT_maes),
3406 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3407 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3408 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3409 IX86_ATTR_ISA ("sse", OPT_msse),
3410 IX86_ATTR_ISA ("sse2", OPT_msse2),
3411 IX86_ATTR_ISA ("sse3", OPT_msse3),
3412 IX86_ATTR_ISA ("sse4", OPT_msse4),
3413 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3414 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3415 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3416 IX86_ATTR_ISA ("sse5", OPT_msse5),
3417 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3419 /* string options */
3420 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3421 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3422 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3425 IX86_ATTR_YES ("cld",
3429 IX86_ATTR_NO ("fancy-math-387",
3430 OPT_mfancy_math_387,
3431 MASK_NO_FANCY_MATH_387),
3433 IX86_ATTR_NO ("fused-madd",
3435 MASK_NO_FUSED_MADD),
3437 IX86_ATTR_YES ("ieee-fp",
3441 IX86_ATTR_YES ("inline-all-stringops",
3442 OPT_minline_all_stringops,
3443 MASK_INLINE_ALL_STRINGOPS),
3445 IX86_ATTR_YES ("inline-stringops-dynamically",
3446 OPT_minline_stringops_dynamically,
3447 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3449 IX86_ATTR_NO ("align-stringops",
3450 OPT_mno_align_stringops,
3451 MASK_NO_ALIGN_STRINGOPS),
3453 IX86_ATTR_YES ("recip",
3459 /* If this is a list, recurse to get the options. */
3460 if (TREE_CODE (args) == TREE_LIST)
3464 for (; args; args = TREE_CHAIN (args))
3465 if (TREE_VALUE (args)
3466 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3472 else if (TREE_CODE (args) != STRING_CST)
3475 /* Handle multiple arguments separated by commas. */
3476 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3478 while (next_optstr && *next_optstr != '\0')
3480 char *p = next_optstr;
3482 char *comma = strchr (next_optstr, ',');
3483 const char *opt_string;
3484 size_t len, opt_len;
3489 enum ix86_opt_type type = ix86_opt_unknown;
3495 len = comma - next_optstr;
3496 next_optstr = comma + 1;
3504 /* Recognize no-xxx. */
3505 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3514 /* Find the option. */
3517 for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++)
3519 type = attrs[i].type;
3520 opt_len = attrs[i].len;
3521 if (ch == attrs[i].string[0]
3522 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3523 && memcmp (p, attrs[i].string, opt_len) == 0)
3526 mask = attrs[i].mask;
3527 opt_string = attrs[i].string;
3532 /* Process the option. */
3535 error ("attribute(target(\"%s\")) is unknown", orig_p);
3539 else if (type == ix86_opt_isa)
3540 ix86_handle_option (opt, p, opt_set_p);
3542 else if (type == ix86_opt_yes || type == ix86_opt_no)
3544 if (type == ix86_opt_no)
3545 opt_set_p = !opt_set_p;
3548 target_flags |= mask;
3550 target_flags &= ~mask;
3553 else if (type == ix86_opt_str)
3557 error ("option(\"%s\") was already specified", opt_string);
3561 p_strings[opt] = xstrdup (p + opt_len);
3571 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3574 ix86_valid_target_attribute_tree (tree args)
3576 const char *orig_arch_string = ix86_arch_string;
3577 const char *orig_tune_string = ix86_tune_string;
3578 const char *orig_fpmath_string = ix86_fpmath_string;
3579 int orig_tune_defaulted = ix86_tune_defaulted;
3580 int orig_arch_specified = ix86_arch_specified;
3581 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3584 struct cl_target_option *def
3585 = TREE_TARGET_OPTION (target_option_default_node);
3587 /* Process each of the options on the chain. */
3588 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3591 /* If the changed options are different from the default, rerun override_options,
3592 and then save the options away. The string options are are attribute options,
3593 and will be undone when we copy the save structure. */
3594 if (ix86_isa_flags != def->ix86_isa_flags
3595 || target_flags != def->target_flags
3596 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3597 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3598 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3600 /* If we are using the default tune= or arch=, undo the string assigned,
3601 and use the default. */
3602 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3603 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3604 else if (!orig_arch_specified)
3605 ix86_arch_string = NULL;
3607 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3608 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3609 else if (orig_tune_defaulted)
3610 ix86_tune_string = NULL;
3612 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3613 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3614 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3615 else if (!TARGET_64BIT && TARGET_SSE)
3616 ix86_fpmath_string = "sse,387";
3618 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3619 override_options (false);
3621 /* Add any builtin functions with the new isa if any. */
3622 ix86_add_new_builtins (ix86_isa_flags);
3624 /* Save the current options unless we are validating options for
3626 t = build_target_option_node ();
3628 ix86_arch_string = orig_arch_string;
3629 ix86_tune_string = orig_tune_string;
3630 ix86_fpmath_string = orig_fpmath_string;
3632 /* Free up memory allocated to hold the strings */
3633 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3634 if (option_strings[i])
3635 free (option_strings[i]);
3641 /* Hook to validate attribute((target("string"))). */
3644 ix86_valid_target_attribute_p (tree fndecl,
3645 tree ARG_UNUSED (name),
3647 int ARG_UNUSED (flags))
3649 struct cl_target_option cur_target;
3651 tree old_optimize = build_optimization_node ();
3652 tree new_target, new_optimize;
3653 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3655 /* If the function changed the optimization levels as well as setting target
3656 options, start with the optimizations specified. */
3657 if (func_optimize && func_optimize != old_optimize)
3658 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3660 /* The target attributes may also change some optimization flags, so update
3661 the optimization options if necessary. */
3662 cl_target_option_save (&cur_target);
3663 new_target = ix86_valid_target_attribute_tree (args);
3664 new_optimize = build_optimization_node ();
3671 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3673 if (old_optimize != new_optimize)
3674 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3677 cl_target_option_restore (&cur_target);
3679 if (old_optimize != new_optimize)
3680 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3686 /* Hook to determine if one function can safely inline another. */
3689 ix86_can_inline_p (tree caller, tree callee)
3692 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3693 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3695 /* If callee has no option attributes, then it is ok to inline. */
3699 /* If caller has no option attributes, but callee does then it is not ok to
3701 else if (!caller_tree)
3706 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3707 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3709 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3710 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3712 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3713 != callee_opts->ix86_isa_flags)
3716 /* See if we have the same non-isa options. */
3717 else if (caller_opts->target_flags != callee_opts->target_flags)
3720 /* See if arch, tune, etc. are the same. */
3721 else if (caller_opts->arch != callee_opts->arch)
3724 else if (caller_opts->tune != callee_opts->tune)
3727 else if (caller_opts->fpmath != callee_opts->fpmath)
3730 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3741 /* Remember the last target of ix86_set_current_function. */
3742 static GTY(()) tree ix86_previous_fndecl;
3744 /* Establish appropriate back-end context for processing the function
3745 FNDECL. The argument might be NULL to indicate processing at top
3746 level, outside of any function scope. */
3748 ix86_set_current_function (tree fndecl)
3750 /* Only change the context if the function changes. This hook is called
3751 several times in the course of compiling a function, and we don't want to
3752 slow things down too much or call target_reinit when it isn't safe. */
3753 if (fndecl && fndecl != ix86_previous_fndecl)
3755 tree old_tree = (ix86_previous_fndecl
3756 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3759 tree new_tree = (fndecl
3760 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3763 ix86_previous_fndecl = fndecl;
3764 if (old_tree == new_tree)
3769 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3775 struct cl_target_option *def
3776 = TREE_TARGET_OPTION (target_option_current_node);
3778 cl_target_option_restore (def);
3785 /* Return true if this goes in large data/bss. */
3788 ix86_in_large_data_p (tree exp)
3790 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3793 /* Functions are never large data. */
3794 if (TREE_CODE (exp) == FUNCTION_DECL)
3797 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3799 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3800 if (strcmp (section, ".ldata") == 0
3801 || strcmp (section, ".lbss") == 0)
3807 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3809 /* If this is an incomplete type with size 0, then we can't put it
3810 in data because it might be too big when completed. */
3811 if (!size || size > ix86_section_threshold)
3818 /* Switch to the appropriate section for output of DECL.
3819 DECL is either a `VAR_DECL' node or a constant of some sort.
3820 RELOC indicates whether forming the initial value of DECL requires
3821 link-time relocations. */
3823 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3827 x86_64_elf_select_section (tree decl, int reloc,
3828 unsigned HOST_WIDE_INT align)
3830 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3831 && ix86_in_large_data_p (decl))
3833 const char *sname = NULL;
3834 unsigned int flags = SECTION_WRITE;
3835 switch (categorize_decl_for_section (decl, reloc))
3840 case SECCAT_DATA_REL:
3841 sname = ".ldata.rel";
3843 case SECCAT_DATA_REL_LOCAL:
3844 sname = ".ldata.rel.local";
3846 case SECCAT_DATA_REL_RO:
3847 sname = ".ldata.rel.ro";
3849 case SECCAT_DATA_REL_RO_LOCAL:
3850 sname = ".ldata.rel.ro.local";
3854 flags |= SECTION_BSS;
3857 case SECCAT_RODATA_MERGE_STR:
3858 case SECCAT_RODATA_MERGE_STR_INIT:
3859 case SECCAT_RODATA_MERGE_CONST:
3863 case SECCAT_SRODATA:
3870 /* We don't split these for medium model. Place them into
3871 default sections and hope for best. */
3873 case SECCAT_EMUTLS_VAR:
3874 case SECCAT_EMUTLS_TMPL:
3879 /* We might get called with string constants, but get_named_section
3880 doesn't like them as they are not DECLs. Also, we need to set
3881 flags in that case. */
3883 return get_section (sname, flags, NULL);
3884 return get_named_section (decl, sname, reloc);
3887 return default_elf_select_section (decl, reloc, align);
3890 /* Build up a unique section name, expressed as a
3891 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3892 RELOC indicates whether the initial value of EXP requires
3893 link-time relocations. */
3895 static void ATTRIBUTE_UNUSED
3896 x86_64_elf_unique_section (tree decl, int reloc)
3898 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3899 && ix86_in_large_data_p (decl))
3901 const char *prefix = NULL;
3902 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3903 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3905 switch (categorize_decl_for_section (decl, reloc))
3908 case SECCAT_DATA_REL:
3909 case SECCAT_DATA_REL_LOCAL:
3910 case SECCAT_DATA_REL_RO:
3911 case SECCAT_DATA_REL_RO_LOCAL:
3912 prefix = one_only ? ".ld" : ".ldata";
3915 prefix = one_only ? ".lb" : ".lbss";
3918 case SECCAT_RODATA_MERGE_STR:
3919 case SECCAT_RODATA_MERGE_STR_INIT:
3920 case SECCAT_RODATA_MERGE_CONST:
3921 prefix = one_only ? ".lr" : ".lrodata";
3923 case SECCAT_SRODATA:
3930 /* We don't split these for medium model. Place them into
3931 default sections and hope for best. */
3933 case SECCAT_EMUTLS_VAR:
3934 prefix = targetm.emutls.var_section;
3936 case SECCAT_EMUTLS_TMPL:
3937 prefix = targetm.emutls.tmpl_section;
3942 const char *name, *linkonce;
3945 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
3946 name = targetm.strip_name_encoding (name);
3948 /* If we're using one_only, then there needs to be a .gnu.linkonce
3949 prefix to the section name. */
3950 linkonce = one_only ? ".gnu.linkonce" : "";
3952 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
3954 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
3958 default_unique_section (decl, reloc);
3961 #ifdef COMMON_ASM_OP
3962 /* This says how to output assembler code to declare an
3963 uninitialized external linkage data object.
3965 For medium model x86-64 we need to use .largecomm opcode for
3968 x86_elf_aligned_common (FILE *file,
3969 const char *name, unsigned HOST_WIDE_INT size,
3972 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3973 && size > (unsigned int)ix86_section_threshold)
3974 fprintf (file, ".largecomm\t");
3976 fprintf (file, "%s", COMMON_ASM_OP);
3977 assemble_name (file, name);
3978 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3979 size, align / BITS_PER_UNIT);
3983 /* Utility function for targets to use in implementing
3984 ASM_OUTPUT_ALIGNED_BSS. */
3987 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3988 const char *name, unsigned HOST_WIDE_INT size,
3991 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3992 && size > (unsigned int)ix86_section_threshold)
3993 switch_to_section (get_named_section (decl, ".lbss", 0));
3995 switch_to_section (bss_section);
3996 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
3997 #ifdef ASM_DECLARE_OBJECT_NAME
3998 last_assemble_variable_decl = decl;
3999 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4001 /* Standard thing is just output label for the object. */
4002 ASM_OUTPUT_LABEL (file, name);
4003 #endif /* ASM_DECLARE_OBJECT_NAME */
4004 ASM_OUTPUT_SKIP (file, size ? size : 1);
4008 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4010 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4011 make the problem with not enough registers even worse. */
4012 #ifdef INSN_SCHEDULING
4014 flag_schedule_insns = 0;
4018 /* The Darwin libraries never set errno, so we might as well
4019 avoid calling them when that's the only reason we would. */
4020 flag_errno_math = 0;
4022 /* The default values of these switches depend on the TARGET_64BIT
4023 that is not known at this moment. Mark these values with 2 and
4024 let user the to override these. In case there is no command line option
4025 specifying them, we will set the defaults in override_options. */
4027 flag_omit_frame_pointer = 2;
4028 flag_pcc_struct_return = 2;
4029 flag_asynchronous_unwind_tables = 2;
4030 flag_vect_cost_model = 1;
4031 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4032 SUBTARGET_OPTIMIZATION_OPTIONS;
4036 /* Decide whether we can make a sibling call to a function. DECL is the
4037 declaration of the function being targeted by the call and EXP is the
4038 CALL_EXPR representing the call. */
4041 ix86_function_ok_for_sibcall (tree decl, tree exp)
4046 /* If we are generating position-independent code, we cannot sibcall
4047 optimize any indirect call, or a direct call to a global function,
4048 as the PLT requires %ebx be live. */
4049 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4056 func = TREE_TYPE (CALL_EXPR_FN (exp));
4057 if (POINTER_TYPE_P (func))
4058 func = TREE_TYPE (func);
4061 /* Check that the return value locations are the same. Like
4062 if we are returning floats on the 80387 register stack, we cannot
4063 make a sibcall from a function that doesn't return a float to a
4064 function that does or, conversely, from a function that does return
4065 a float to a function that doesn't; the necessary stack adjustment
4066 would not be executed. This is also the place we notice
4067 differences in the return value ABI. Note that it is ok for one
4068 of the functions to have void return type as long as the return
4069 value of the other is passed in a register. */
4070 a = ix86_function_value (TREE_TYPE (exp), func, false);
4071 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4073 if (STACK_REG_P (a) || STACK_REG_P (b))
4075 if (!rtx_equal_p (a, b))
4078 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4080 else if (!rtx_equal_p (a, b))
4083 /* If this call is indirect, we'll need to be able to use a call-clobbered
4084 register for the address of the target function. Make sure that all
4085 such registers are not used for passing parameters. */
4086 if (!decl && !TARGET_64BIT)
4090 /* We're looking at the CALL_EXPR, we need the type of the function. */
4091 type = CALL_EXPR_FN (exp); /* pointer expression */
4092 type = TREE_TYPE (type); /* pointer type */
4093 type = TREE_TYPE (type); /* function type */
4095 if (ix86_function_regparm (type, NULL) >= 3)
4097 /* ??? Need to count the actual number of registers to be used,
4098 not the possible number of registers. Fix later. */
4103 /* Dllimport'd functions are also called indirectly. */
4104 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4105 && decl && DECL_DLLIMPORT_P (decl)
4106 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4109 /* Otherwise okay. That also includes certain types of indirect calls. */
4113 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4114 calling convention attributes;
4115 arguments as in struct attribute_spec.handler. */
4118 ix86_handle_cconv_attribute (tree *node, tree name,
4120 int flags ATTRIBUTE_UNUSED,
4123 if (TREE_CODE (*node) != FUNCTION_TYPE
4124 && TREE_CODE (*node) != METHOD_TYPE
4125 && TREE_CODE (*node) != FIELD_DECL
4126 && TREE_CODE (*node) != TYPE_DECL)
4128 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4129 IDENTIFIER_POINTER (name));
4130 *no_add_attrs = true;
4134 /* Can combine regparm with all attributes but fastcall. */
4135 if (is_attribute_p ("regparm", name))
4139 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4141 error ("fastcall and regparm attributes are not compatible");
4144 cst = TREE_VALUE (args);
4145 if (TREE_CODE (cst) != INTEGER_CST)
4147 warning (OPT_Wattributes,
4148 "%qs attribute requires an integer constant argument",
4149 IDENTIFIER_POINTER (name));
4150 *no_add_attrs = true;
4152 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4154 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4155 IDENTIFIER_POINTER (name), REGPARM_MAX);
4156 *no_add_attrs = true;
4164 /* Do not warn when emulating the MS ABI. */
4165 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4166 warning (OPT_Wattributes, "%qs attribute ignored",
4167 IDENTIFIER_POINTER (name));
4168 *no_add_attrs = true;
4172 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4173 if (is_attribute_p ("fastcall", name))
4175 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4177 error ("fastcall and cdecl attributes are not compatible");
4179 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4181 error ("fastcall and stdcall attributes are not compatible");
4183 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4185 error ("fastcall and regparm attributes are not compatible");
4189 /* Can combine stdcall with fastcall (redundant), regparm and
4191 else if (is_attribute_p ("stdcall", name))
4193 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4195 error ("stdcall and cdecl attributes are not compatible");
4197 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4199 error ("stdcall and fastcall attributes are not compatible");
4203 /* Can combine cdecl with regparm and sseregparm. */
4204 else if (is_attribute_p ("cdecl", name))
4206 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4208 error ("stdcall and cdecl attributes are not compatible");
4210 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4212 error ("fastcall and cdecl attributes are not compatible");
4216 /* Can combine sseregparm with all attributes. */
4221 /* Return 0 if the attributes for two types are incompatible, 1 if they
4222 are compatible, and 2 if they are nearly compatible (which causes a
4223 warning to be generated). */
4226 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4228 /* Check for mismatch of non-default calling convention. */
4229 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4231 if (TREE_CODE (type1) != FUNCTION_TYPE
4232 && TREE_CODE (type1) != METHOD_TYPE)
4235 /* Check for mismatched fastcall/regparm types. */
4236 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4237 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4238 || (ix86_function_regparm (type1, NULL)
4239 != ix86_function_regparm (type2, NULL)))
4242 /* Check for mismatched sseregparm types. */
4243 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4244 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4247 /* Check for mismatched return types (cdecl vs stdcall). */
4248 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4249 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4255 /* Return the regparm value for a function with the indicated TYPE and DECL.
4256 DECL may be NULL when calling function indirectly
4257 or considering a libcall. */
4260 ix86_function_regparm (const_tree type, const_tree decl)
4263 int regparm = ix86_regparm;
4265 static bool error_issued;
4269 if (ix86_function_type_abi (type) == DEFAULT_ABI)
4271 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
4274 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4278 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4280 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4282 /* We can't use regparm(3) for nested functions because
4283 these pass static chain pointer in %ecx register. */
4284 if (!error_issued && regparm == 3
4285 && decl_function_context (decl)
4286 && !DECL_NO_STATIC_CHAIN (decl))
4288 error ("nested functions are limited to 2 register parameters");
4289 error_issued = true;
4297 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4300 /* Use register calling convention for local functions when possible. */
4301 if (decl && TREE_CODE (decl) == FUNCTION_DECL
4304 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4305 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4308 int local_regparm, globals = 0, regno;
4311 /* Make sure no regparm register is taken by a
4312 fixed register variable. */
4313 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4314 if (fixed_regs[local_regparm])
4317 /* We can't use regparm(3) for nested functions as these use
4318 static chain pointer in third argument. */
4319 if (local_regparm == 3
4320 && decl_function_context (decl)
4321 && !DECL_NO_STATIC_CHAIN (decl))
4324 /* If the function realigns its stackpointer, the prologue will
4325 clobber %ecx. If we've already generated code for the callee,
4326 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4327 scanning the attributes for the self-realigning property. */
4328 f = DECL_STRUCT_FUNCTION (decl);
4329 /* Since current internal arg pointer won't conflict with
4330 parameter passing regs, so no need to change stack
4331 realignment and adjust regparm number.
4333 Each fixed register usage increases register pressure,
4334 so less registers should be used for argument passing.
4335 This functionality can be overriden by an explicit
4337 for (regno = 0; regno <= DI_REG; regno++)
4338 if (fixed_regs[regno])
4342 = globals < local_regparm ? local_regparm - globals : 0;
4344 if (local_regparm > regparm)
4345 regparm = local_regparm;
4352 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4353 DFmode (2) arguments in SSE registers for a function with the
4354 indicated TYPE and DECL. DECL may be NULL when calling function
4355 indirectly or considering a libcall. Otherwise return 0. */
4358 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4360 gcc_assert (!TARGET_64BIT);
4362 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4363 by the sseregparm attribute. */
4364 if (TARGET_SSEREGPARM
4365 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4372 error ("Calling %qD with attribute sseregparm without "
4373 "SSE/SSE2 enabled", decl);
4375 error ("Calling %qT with attribute sseregparm without "
4376 "SSE/SSE2 enabled", type);
4384 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4385 (and DFmode for SSE2) arguments in SSE registers. */
4386 if (decl && TARGET_SSE_MATH && !profile_flag)
4388 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4389 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4391 return TARGET_SSE2 ? 2 : 1;
4397 /* Return true if EAX is live at the start of the function. Used by
4398 ix86_expand_prologue to determine if we need special help before
4399 calling allocate_stack_worker. */
4402 ix86_eax_live_at_start_p (void)
4404 /* Cheat. Don't bother working forward from ix86_function_regparm
4405 to the function type to whether an actual argument is located in
4406 eax. Instead just look at cfg info, which is still close enough
4407 to correct at this point. This gives false positives for broken
4408 functions that might use uninitialized data that happens to be
4409 allocated in eax, but who cares? */
4410 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4413 /* Value is the number of bytes of arguments automatically
4414 popped when returning from a subroutine call.
4415 FUNDECL is the declaration node of the function (as a tree),
4416 FUNTYPE is the data type of the function (as a tree),
4417 or for a library call it is an identifier node for the subroutine name.
4418 SIZE is the number of bytes of arguments passed on the stack.
4420 On the 80386, the RTD insn may be used to pop them if the number
4421 of args is fixed, but if the number is variable then the caller
4422 must pop them all. RTD can't be used for library calls now
4423 because the library is compiled with the Unix compiler.
4424 Use of RTD is a selectable option, since it is incompatible with
4425 standard Unix calling sequences. If the option is not selected,
4426 the caller must always pop the args.
4428 The attribute stdcall is equivalent to RTD on a per module basis. */
4431 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4435 /* None of the 64-bit ABIs pop arguments. */
4439 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4441 /* Cdecl functions override -mrtd, and never pop the stack. */
4442 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4444 /* Stdcall and fastcall functions will pop the stack if not
4446 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4447 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4450 if (rtd && ! stdarg_p (funtype))
4454 /* Lose any fake structure return argument if it is passed on the stack. */
4455 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4456 && !KEEP_AGGREGATE_RETURN_POINTER)
4458 int nregs = ix86_function_regparm (funtype, fundecl);
4460 return GET_MODE_SIZE (Pmode);
4466 /* Argument support functions. */
4468 /* Return true when register may be used to pass function parameters. */
4470 ix86_function_arg_regno_p (int regno)
4473 const int *parm_regs;
4478 return (regno < REGPARM_MAX
4479 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4481 return (regno < REGPARM_MAX
4482 || (TARGET_MMX && MMX_REGNO_P (regno)
4483 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4484 || (TARGET_SSE && SSE_REGNO_P (regno)
4485 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4490 if (SSE_REGNO_P (regno) && TARGET_SSE)
4495 if (TARGET_SSE && SSE_REGNO_P (regno)
4496 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4500 /* TODO: The function should depend on current function ABI but
4501 builtins.c would need updating then. Therefore we use the
4504 /* RAX is used as hidden argument to va_arg functions. */
4505 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4508 if (DEFAULT_ABI == MS_ABI)
4509 parm_regs = x86_64_ms_abi_int_parameter_registers;
4511 parm_regs = x86_64_int_parameter_registers;
4512 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4513 : X86_64_REGPARM_MAX); i++)
4514 if (regno == parm_regs[i])
4519 /* Return if we do not know how to pass TYPE solely in registers. */
4522 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4524 if (must_pass_in_stack_var_size_or_pad (mode, type))
4527 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4528 The layout_type routine is crafty and tries to trick us into passing
4529 currently unsupported vector types on the stack by using TImode. */
4530 return (!TARGET_64BIT && mode == TImode
4531 && type && TREE_CODE (type) != VECTOR_TYPE);
4534 /* It returns the size, in bytes, of the area reserved for arguments passed
4535 in registers for the function represented by fndecl dependent to the used
4538 ix86_reg_parm_stack_space (const_tree fndecl)
4541 /* For libcalls it is possible that there is no fndecl at hand.
4542 Therefore assume for this case the default abi of the target. */
4544 call_abi = DEFAULT_ABI;
4546 call_abi = ix86_function_abi (fndecl);
4552 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4555 ix86_function_type_abi (const_tree fntype)
4557 if (TARGET_64BIT && fntype != NULL)
4560 if (DEFAULT_ABI == SYSV_ABI)
4561 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4563 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4571 ix86_function_abi (const_tree fndecl)
4575 return ix86_function_type_abi (TREE_TYPE (fndecl));
4578 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4581 ix86_cfun_abi (void)
4583 if (! cfun || ! TARGET_64BIT)
4585 return cfun->machine->call_abi;
4589 extern void init_regs (void);
4591 /* Implementation of call abi switching target hook. Specific to FNDECL
4592 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4594 To prevent redudant calls of costy function init_regs (), it checks not to
4595 reset register usage for default abi. */
4597 ix86_call_abi_override (const_tree fndecl)
4599 if (fndecl == NULL_TREE)
4600 cfun->machine->call_abi = DEFAULT_ABI;
4602 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4603 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
4605 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
4607 call_used_regs[4 /*RSI*/] = 0;
4608 call_used_regs[5 /*RDI*/] = 0;
4612 else if (TARGET_64BIT)
4614 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
4616 call_used_regs[4 /*RSI*/] = 1;
4617 call_used_regs[5 /*RDI*/] = 1;
4623 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4624 for a call to a function whose data type is FNTYPE.
4625 For a library call, FNTYPE is 0. */
4628 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4629 tree fntype, /* tree ptr for function decl */
4630 rtx libname, /* SYMBOL_REF of library name or 0 */
4633 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4634 memset (cum, 0, sizeof (*cum));
4636 cum->call_abi = ix86_function_type_abi (fntype);
4637 /* Set up the number of registers to use for passing arguments. */
4638 cum->nregs = ix86_regparm;
4641 if (cum->call_abi != DEFAULT_ABI)
4642 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4647 cum->sse_nregs = SSE_REGPARM_MAX;
4650 if (cum->call_abi != DEFAULT_ABI)
4651 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4652 : X64_SSE_REGPARM_MAX;
4656 cum->mmx_nregs = MMX_REGPARM_MAX;
4657 cum->warn_avx = true;
4658 cum->warn_sse = true;
4659 cum->warn_mmx = true;
4661 /* Because type might mismatch in between caller and callee, we need to
4662 use actual type of function for local calls.
4663 FIXME: cgraph_analyze can be told to actually record if function uses
4664 va_start so for local functions maybe_vaarg can be made aggressive
4666 FIXME: once typesytem is fixed, we won't need this code anymore. */
4668 fntype = TREE_TYPE (fndecl);
4669 cum->maybe_vaarg = (fntype
4670 ? (!prototype_p (fntype) || stdarg_p (fntype))
4675 /* If there are variable arguments, then we won't pass anything
4676 in registers in 32-bit mode. */
4677 if (stdarg_p (fntype))
4688 /* Use ecx and edx registers if function has fastcall attribute,
4689 else look for regparm information. */
4692 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4698 cum->nregs = ix86_function_regparm (fntype, fndecl);
4701 /* Set up the number of SSE registers used for passing SFmode
4702 and DFmode arguments. Warn for mismatching ABI. */
4703 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4707 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4708 But in the case of vector types, it is some vector mode.
4710 When we have only some of our vector isa extensions enabled, then there
4711 are some modes for which vector_mode_supported_p is false. For these
4712 modes, the generic vector support in gcc will choose some non-vector mode
4713 in order to implement the type. By computing the natural mode, we'll
4714 select the proper ABI location for the operand and not depend on whatever
4715 the middle-end decides to do with these vector types. */
4717 static enum machine_mode
4718 type_natural_mode (const_tree type)
4720 enum machine_mode mode = TYPE_MODE (type);
4722 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4724 HOST_WIDE_INT size = int_size_in_bytes (type);
4725 if ((size == 8 || size == 16)
4726 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4727 && TYPE_VECTOR_SUBPARTS (type) > 1)
4729 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4731 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4732 mode = MIN_MODE_VECTOR_FLOAT;
4734 mode = MIN_MODE_VECTOR_INT;
4736 /* Get the mode which has this inner mode and number of units. */
4737 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4738 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4739 && GET_MODE_INNER (mode) == innermode)
4749 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4750 this may not agree with the mode that the type system has chosen for the
4751 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4752 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4755 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4760 if (orig_mode != BLKmode)
4761 tmp = gen_rtx_REG (orig_mode, regno);
4764 tmp = gen_rtx_REG (mode, regno);
4765 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4766 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4772 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4773 of this code is to classify each 8bytes of incoming argument by the register
4774 class and assign registers accordingly. */
4776 /* Return the union class of CLASS1 and CLASS2.
4777 See the x86-64 PS ABI for details. */
4779 static enum x86_64_reg_class
4780 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4782 /* Rule #1: If both classes are equal, this is the resulting class. */
4783 if (class1 == class2)
4786 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4788 if (class1 == X86_64_NO_CLASS)
4790 if (class2 == X86_64_NO_CLASS)
4793 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4794 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4795 return X86_64_MEMORY_CLASS;
4797 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4798 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4799 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4800 return X86_64_INTEGERSI_CLASS;
4801 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4802 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4803 return X86_64_INTEGER_CLASS;
4805 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4807 if (class1 == X86_64_X87_CLASS
4808 || class1 == X86_64_X87UP_CLASS
4809 || class1 == X86_64_COMPLEX_X87_CLASS
4810 || class2 == X86_64_X87_CLASS
4811 || class2 == X86_64_X87UP_CLASS
4812 || class2 == X86_64_COMPLEX_X87_CLASS)
4813 return X86_64_MEMORY_CLASS;
4815 /* Rule #6: Otherwise class SSE is used. */
4816 return X86_64_SSE_CLASS;
4819 /* Classify the argument of type TYPE and mode MODE.
4820 CLASSES will be filled by the register class used to pass each word
4821 of the operand. The number of words is returned. In case the parameter
4822 should be passed in memory, 0 is returned. As a special case for zero
4823 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4825 BIT_OFFSET is used internally for handling records and specifies offset
4826 of the offset in bits modulo 256 to avoid overflow cases.
4828 See the x86-64 PS ABI for details.
4832 classify_argument (enum machine_mode mode, const_tree type,
4833 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4835 HOST_WIDE_INT bytes =
4836 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4837 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4839 /* Variable sized entities are always passed/returned in memory. */
4843 if (mode != VOIDmode
4844 && targetm.calls.must_pass_in_stack (mode, type))
4847 if (type && AGGREGATE_TYPE_P (type))
4851 enum x86_64_reg_class subclasses[MAX_CLASSES];
4853 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
4857 for (i = 0; i < words; i++)
4858 classes[i] = X86_64_NO_CLASS;
4860 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4861 signalize memory class, so handle it as special case. */
4864 classes[0] = X86_64_NO_CLASS;
4868 /* Classify each field of record and merge classes. */
4869 switch (TREE_CODE (type))
4872 /* And now merge the fields of structure. */
4873 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4875 if (TREE_CODE (field) == FIELD_DECL)
4879 if (TREE_TYPE (field) == error_mark_node)
4882 /* Bitfields are always classified as integer. Handle them
4883 early, since later code would consider them to be
4884 misaligned integers. */
4885 if (DECL_BIT_FIELD (field))
4887 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4888 i < ((int_bit_position (field) + (bit_offset % 64))
4889 + tree_low_cst (DECL_SIZE (field), 0)
4892 merge_classes (X86_64_INTEGER_CLASS,
4897 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4898 TREE_TYPE (field), subclasses,
4899 (int_bit_position (field)
4900 + bit_offset) % 256);
4903 for (i = 0; i < num; i++)
4906 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
4908 merge_classes (subclasses[i], classes[i + pos]);
4916 /* Arrays are handled as small records. */
4919 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
4920 TREE_TYPE (type), subclasses, bit_offset);
4924 /* The partial classes are now full classes. */
4925 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
4926 subclasses[0] = X86_64_SSE_CLASS;
4927 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
4928 subclasses[0] = X86_64_INTEGER_CLASS;
4930 for (i = 0; i < words; i++)
4931 classes[i] = subclasses[i % num];
4936 case QUAL_UNION_TYPE:
4937 /* Unions are similar to RECORD_TYPE but offset is always 0.
4939 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4941 if (TREE_CODE (field) == FIELD_DECL)
4945 if (TREE_TYPE (field) == error_mark_node)
4948 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
4949 TREE_TYPE (field), subclasses,
4953 for (i = 0; i < num; i++)
4954 classes[i] = merge_classes (subclasses[i], classes[i]);
4963 /* Final merger cleanup. */
4964 for (i = 0; i < words; i++)
4966 /* If one class is MEMORY, everything should be passed in
4968 if (classes[i] == X86_64_MEMORY_CLASS)
4971 /* The X86_64_SSEUP_CLASS should be always preceded by
4972 X86_64_SSE_CLASS. */
4973 if (classes[i] == X86_64_SSEUP_CLASS
4974 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4975 classes[i] = X86_64_SSE_CLASS;
4977 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4978 if (classes[i] == X86_64_X87UP_CLASS
4979 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4980 classes[i] = X86_64_SSE_CLASS;
4985 /* Compute alignment needed. We align all types to natural boundaries with
4986 exception of XFmode that is aligned to 64bits. */
4987 if (mode != VOIDmode && mode != BLKmode)
4989 int mode_alignment = GET_MODE_BITSIZE (mode);
4992 mode_alignment = 128;
4993 else if (mode == XCmode)
4994 mode_alignment = 256;
4995 if (COMPLEX_MODE_P (mode))
4996 mode_alignment /= 2;
4997 /* Misaligned fields are always returned in memory. */
4998 if (bit_offset % mode_alignment)
5002 /* for V1xx modes, just use the base mode */
5003 if (VECTOR_MODE_P (mode) && mode != V1DImode
5004 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5005 mode = GET_MODE_INNER (mode);
5007 /* Classification of atomic types. */
5012 classes[0] = X86_64_SSE_CLASS;
5015 classes[0] = X86_64_SSE_CLASS;
5016 classes[1] = X86_64_SSEUP_CLASS;
5025 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5026 classes[0] = X86_64_INTEGERSI_CLASS;
5028 classes[0] = X86_64_INTEGER_CLASS;
5032 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5039 if (!(bit_offset % 64))
5040 classes[0] = X86_64_SSESF_CLASS;
5042 classes[0] = X86_64_SSE_CLASS;
5045 classes[0] = X86_64_SSEDF_CLASS;
5048 classes[0] = X86_64_X87_CLASS;
5049 classes[1] = X86_64_X87UP_CLASS;
5052 classes[0] = X86_64_SSE_CLASS;
5053 classes[1] = X86_64_SSEUP_CLASS;
5056 classes[0] = X86_64_SSE_CLASS;
5059 classes[0] = X86_64_SSEDF_CLASS;
5060 classes[1] = X86_64_SSEDF_CLASS;
5063 classes[0] = X86_64_COMPLEX_X87_CLASS;
5066 /* This modes is larger than 16 bytes. */
5074 classes[0] = X86_64_AVX_CLASS;
5082 classes[0] = X86_64_SSE_CLASS;
5083 classes[1] = X86_64_SSEUP_CLASS;
5090 classes[0] = X86_64_SSE_CLASS;
5096 gcc_assert (VECTOR_MODE_P (mode));
5101 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5103 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5104 classes[0] = X86_64_INTEGERSI_CLASS;
5106 classes[0] = X86_64_INTEGER_CLASS;
5107 classes[1] = X86_64_INTEGER_CLASS;
5108 return 1 + (bytes > 8);
5112 /* Examine the argument and return set number of register required in each
5113 class. Return 0 iff parameter should be passed in memory. */
5115 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5116 int *int_nregs, int *sse_nregs)
5118 enum x86_64_reg_class regclass[MAX_CLASSES];
5119 int n = classify_argument (mode, type, regclass, 0);
5125 for (n--; n >= 0; n--)
5126 switch (regclass[n])
5128 case X86_64_INTEGER_CLASS:
5129 case X86_64_INTEGERSI_CLASS:
5132 case X86_64_AVX_CLASS:
5133 case X86_64_SSE_CLASS:
5134 case X86_64_SSESF_CLASS:
5135 case X86_64_SSEDF_CLASS:
5138 case X86_64_NO_CLASS:
5139 case X86_64_SSEUP_CLASS:
5141 case X86_64_X87_CLASS:
5142 case X86_64_X87UP_CLASS:
5146 case X86_64_COMPLEX_X87_CLASS:
5147 return in_return ? 2 : 0;
5148 case X86_64_MEMORY_CLASS:
5154 /* Construct container for the argument used by GCC interface. See
5155 FUNCTION_ARG for the detailed description. */
5158 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5159 const_tree type, int in_return, int nintregs, int nsseregs,
5160 const int *intreg, int sse_regno)
5162 /* The following variables hold the static issued_error state. */
5163 static bool issued_sse_arg_error;
5164 static bool issued_sse_ret_error;
5165 static bool issued_x87_ret_error;
5167 enum machine_mode tmpmode;
5169 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5170 enum x86_64_reg_class regclass[MAX_CLASSES];
5174 int needed_sseregs, needed_intregs;
5175 rtx exp[MAX_CLASSES];
5178 n = classify_argument (mode, type, regclass, 0);
5181 if (!examine_argument (mode, type, in_return, &needed_intregs,
5184 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5187 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5188 some less clueful developer tries to use floating-point anyway. */
5189 if (needed_sseregs && !TARGET_SSE)
5193 if (!issued_sse_ret_error)
5195 error ("SSE register return with SSE disabled");
5196 issued_sse_ret_error = true;
5199 else if (!issued_sse_arg_error)
5201 error ("SSE register argument with SSE disabled");
5202 issued_sse_arg_error = true;
5207 /* Likewise, error if the ABI requires us to return values in the
5208 x87 registers and the user specified -mno-80387. */
5209 if (!TARGET_80387 && in_return)
5210 for (i = 0; i < n; i++)
5211 if (regclass[i] == X86_64_X87_CLASS
5212 || regclass[i] == X86_64_X87UP_CLASS
5213 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5215 if (!issued_x87_ret_error)
5217 error ("x87 register return with x87 disabled");
5218 issued_x87_ret_error = true;
5223 /* First construct simple cases. Avoid SCmode, since we want to use
5224 single register to pass this type. */
5225 if (n == 1 && mode != SCmode)
5226 switch (regclass[0])
5228 case X86_64_INTEGER_CLASS:
5229 case X86_64_INTEGERSI_CLASS:
5230 return gen_rtx_REG (mode, intreg[0]);
5231 case X86_64_AVX_CLASS:
5232 case X86_64_SSE_CLASS:
5233 case X86_64_SSESF_CLASS:
5234 case X86_64_SSEDF_CLASS:
5235 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
5236 case X86_64_X87_CLASS:
5237 case X86_64_COMPLEX_X87_CLASS:
5238 return gen_rtx_REG (mode, FIRST_STACK_REG);
5239 case X86_64_NO_CLASS:
5240 /* Zero sized array, struct or class. */
5245 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5246 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5247 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5250 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5251 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5252 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5253 && regclass[1] == X86_64_INTEGER_CLASS
5254 && (mode == CDImode || mode == TImode || mode == TFmode)
5255 && intreg[0] + 1 == intreg[1])
5256 return gen_rtx_REG (mode, intreg[0]);
5258 /* Otherwise figure out the entries of the PARALLEL. */
5259 for (i = 0; i < n; i++)
5261 switch (regclass[i])
5263 case X86_64_NO_CLASS:
5265 case X86_64_INTEGER_CLASS:
5266 case X86_64_INTEGERSI_CLASS:
5267 /* Merge TImodes on aligned occasions here too. */
5268 if (i * 8 + 8 > bytes)
5269 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5270 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5274 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5275 if (tmpmode == BLKmode)
5277 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5278 gen_rtx_REG (tmpmode, *intreg),
5282 case X86_64_SSESF_CLASS:
5283 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5284 gen_rtx_REG (SFmode,
5285 SSE_REGNO (sse_regno)),
5289 case X86_64_SSEDF_CLASS:
5290 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5291 gen_rtx_REG (DFmode,
5292 SSE_REGNO (sse_regno)),
5296 case X86_64_SSE_CLASS:
5297 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
5301 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5302 gen_rtx_REG (tmpmode,
5303 SSE_REGNO (sse_regno)),
5305 if (tmpmode == TImode)
5314 /* Empty aligned struct, union or class. */
5318 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5319 for (i = 0; i < nexps; i++)
5320 XVECEXP (ret, 0, i) = exp [i];
5324 /* Update the data in CUM to advance over an argument of mode MODE
5325 and data type TYPE. (TYPE is null for libcalls where that information
5326 may not be available.) */
5329 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5330 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5346 cum->words += words;
5347 cum->nregs -= words;
5348 cum->regno += words;
5350 if (cum->nregs <= 0)
5358 if (cum->float_in_sse < 2)
5361 if (cum->float_in_sse < 1)
5379 if (!type || !AGGREGATE_TYPE_P (type))
5381 cum->sse_words += words;
5382 cum->sse_nregs -= 1;
5383 cum->sse_regno += 1;
5384 if (cum->sse_nregs <= 0)
5397 if (!type || !AGGREGATE_TYPE_P (type))
5399 cum->mmx_words += words;
5400 cum->mmx_nregs -= 1;
5401 cum->mmx_regno += 1;
5402 if (cum->mmx_nregs <= 0)
5413 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5414 tree type, HOST_WIDE_INT words, int named)
5416 int int_nregs, sse_nregs;
5418 /* Unnamed 256bit vector mode parameters are passed on stack. */
5419 if (!named && VALID_AVX256_REG_MODE (mode))
5422 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5423 cum->words += words;
5424 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5426 cum->nregs -= int_nregs;
5427 cum->sse_nregs -= sse_nregs;
5428 cum->regno += int_nregs;
5429 cum->sse_regno += sse_nregs;
5432 cum->words += words;
5436 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5437 HOST_WIDE_INT words)
5439 /* Otherwise, this should be passed indirect. */
5440 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5442 cum->words += words;
5451 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5452 tree type, int named)
5454 HOST_WIDE_INT bytes, words;
5456 if (mode == BLKmode)
5457 bytes = int_size_in_bytes (type);
5459 bytes = GET_MODE_SIZE (mode);
5460 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5463 mode = type_natural_mode (type);
5465 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5466 function_arg_advance_ms_64 (cum, bytes, words);
5467 else if (TARGET_64BIT)
5468 function_arg_advance_64 (cum, mode, type, words, named);
5470 function_arg_advance_32 (cum, mode, type, bytes, words);
5473 /* Define where to put the arguments to a function.
5474 Value is zero to push the argument on the stack,
5475 or a hard register in which to store the argument.
5477 MODE is the argument's machine mode.
5478 TYPE is the data type of the argument (as a tree).
5479 This is null for libcalls where that information may
5481 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5482 the preceding args and about the function being called.
5483 NAMED is nonzero if this argument is a named parameter
5484 (otherwise it is an extra parameter matching an ellipsis). */
5487 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5488 enum machine_mode orig_mode, tree type,
5489 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5491 static bool warnedavx, warnedsse, warnedmmx;
5493 /* Avoid the AL settings for the Unix64 ABI. */
5494 if (mode == VOIDmode)
5510 if (words <= cum->nregs)
5512 int regno = cum->regno;
5514 /* Fastcall allocates the first two DWORD (SImode) or
5515 smaller arguments to ECX and EDX if it isn't an
5521 || (type && AGGREGATE_TYPE_P (type)))
5524 /* ECX not EAX is the first allocated register. */
5525 if (regno == AX_REG)
5528 return gen_rtx_REG (mode, regno);
5533 if (cum->float_in_sse < 2)
5536 if (cum->float_in_sse < 1)
5540 /* In 32bit, we pass TImode in xmm registers. */
5547 if (!type || !AGGREGATE_TYPE_P (type))
5549 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5552 warning (0, "SSE vector argument without SSE enabled "
5556 return gen_reg_or_parallel (mode, orig_mode,
5557 cum->sse_regno + FIRST_SSE_REG);
5562 /* In 32bit, we pass OImode in ymm registers. */
5569 if (!type || !AGGREGATE_TYPE_P (type))
5571 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5574 warning (0, "AVX vector argument without AVX enabled "
5578 return gen_reg_or_parallel (mode, orig_mode,
5579 cum->sse_regno + FIRST_SSE_REG);
5588 if (!type || !AGGREGATE_TYPE_P (type))
5590 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5593 warning (0, "MMX vector argument without MMX enabled "
5597 return gen_reg_or_parallel (mode, orig_mode,
5598 cum->mmx_regno + FIRST_MMX_REG);
5607 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5608 enum machine_mode orig_mode, tree type, int named)
5610 static bool warnedavx;
5612 /* Handle a hidden AL argument containing number of registers
5613 for varargs x86-64 functions. */
5614 if (mode == VOIDmode)
5615 return GEN_INT (cum->maybe_vaarg
5616 ? (cum->sse_nregs < 0
5617 ? (cum->call_abi == DEFAULT_ABI
5619 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5620 : X64_SSE_REGPARM_MAX))
5635 /* In 64bit, we pass TImode in interger registers and OImode on
5637 if (!type || !AGGREGATE_TYPE_P (type))
5639 if (!TARGET_AVX && !warnedavx && cum->warn_avx)
5642 warning (0, "AVX vector argument without AVX enabled "
5647 /* Unnamed 256bit vector mode parameters are passed on stack. */
5653 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5655 &x86_64_int_parameter_registers [cum->regno],
5660 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5661 enum machine_mode orig_mode, int named,
5662 HOST_WIDE_INT bytes)
5666 /* Avoid the AL settings for the Unix64 ABI. */
5667 if (mode == VOIDmode)
5670 /* If we've run out of registers, it goes on the stack. */
5671 if (cum->nregs == 0)
5674 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5676 /* Only floating point modes are passed in anything but integer regs. */
5677 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5680 regno = cum->regno + FIRST_SSE_REG;
5685 /* Unnamed floating parameters are passed in both the
5686 SSE and integer registers. */
5687 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5688 t2 = gen_rtx_REG (mode, regno);
5689 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5690 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5691 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5694 /* Handle aggregated types passed in register. */
5695 if (orig_mode == BLKmode)
5697 if (bytes > 0 && bytes <= 8)
5698 mode = (bytes > 4 ? DImode : SImode);
5699 if (mode == BLKmode)
5703 return gen_reg_or_parallel (mode, orig_mode, regno);
5707 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5708 tree type, int named)
5710 enum machine_mode mode = omode;
5711 HOST_WIDE_INT bytes, words;
5713 if (mode == BLKmode)
5714 bytes = int_size_in_bytes (type);
5716 bytes = GET_MODE_SIZE (mode);
5717 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5719 /* To simplify the code below, represent vector types with a vector mode
5720 even if MMX/SSE are not active. */
5721 if (type && TREE_CODE (type) == VECTOR_TYPE)
5722 mode = type_natural_mode (type);
5724 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5725 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5726 else if (TARGET_64BIT)
5727 return function_arg_64 (cum, mode, omode, type, named);
5729 return function_arg_32 (cum, mode, omode, type, bytes, words);
5732 /* A C expression that indicates when an argument must be passed by
5733 reference. If nonzero for an argument, a copy of that argument is
5734 made in memory and a pointer to the argument is passed instead of
5735 the argument itself. The pointer is passed in whatever way is
5736 appropriate for passing a pointer to that type. */
5739 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5740 enum machine_mode mode ATTRIBUTE_UNUSED,
5741 const_tree type, bool named ATTRIBUTE_UNUSED)
5743 /* See Windows x64 Software Convention. */
5744 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5746 int msize = (int) GET_MODE_SIZE (mode);
5749 /* Arrays are passed by reference. */
5750 if (TREE_CODE (type) == ARRAY_TYPE)
5753 if (AGGREGATE_TYPE_P (type))
5755 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5756 are passed by reference. */
5757 msize = int_size_in_bytes (type);
5761 /* __m128 is passed by reference. */
5763 case 1: case 2: case 4: case 8:
5769 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
5775 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
5778 contains_aligned_value_p (tree type)
5780 enum machine_mode mode = TYPE_MODE (type);
5781 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
5785 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
5787 if (TYPE_ALIGN (type) < 128)
5790 if (AGGREGATE_TYPE_P (type))
5792 /* Walk the aggregates recursively. */
5793 switch (TREE_CODE (type))
5797 case QUAL_UNION_TYPE:
5801 /* Walk all the structure fields. */
5802 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5804 if (TREE_CODE (field) == FIELD_DECL
5805 && contains_aligned_value_p (TREE_TYPE (field)))
5812 /* Just for use if some languages passes arrays by value. */
5813 if (contains_aligned_value_p (TREE_TYPE (type)))
5824 /* Gives the alignment boundary, in bits, of an argument with the
5825 specified mode and type. */
5828 ix86_function_arg_boundary (enum machine_mode mode, tree type)
5833 /* Since canonical type is used for call, we convert it to
5834 canonical type if needed. */
5835 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
5836 type = TYPE_CANONICAL (type);
5837 align = TYPE_ALIGN (type);
5840 align = GET_MODE_ALIGNMENT (mode);
5841 if (align < PARM_BOUNDARY)
5842 align = PARM_BOUNDARY;
5843 /* In 32bit, only _Decimal128 and __float128 are aligned to their
5844 natural boundaries. */
5845 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
5847 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
5848 make an exception for SSE modes since these require 128bit
5851 The handling here differs from field_alignment. ICC aligns MMX
5852 arguments to 4 byte boundaries, while structure fields are aligned
5853 to 8 byte boundaries. */
5856 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
5857 align = PARM_BOUNDARY;
5861 if (!contains_aligned_value_p (type))
5862 align = PARM_BOUNDARY;
5865 if (align > BIGGEST_ALIGNMENT)
5866 align = BIGGEST_ALIGNMENT;
5870 /* Return true if N is a possible register number of function value. */
5873 ix86_function_value_regno_p (int regno)
5880 case FIRST_FLOAT_REG:
5881 /* TODO: The function should depend on current function ABI but
5882 builtins.c would need updating then. Therefore we use the
5884 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
5886 return TARGET_FLOAT_RETURNS_IN_80387;
5892 if (TARGET_MACHO || TARGET_64BIT)
5900 /* Define how to find the value returned by a function.
5901 VALTYPE is the data type of the value (as a tree).
5902 If the precise function being called is known, FUNC is its FUNCTION_DECL;
5903 otherwise, FUNC is 0. */
5906 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
5907 const_tree fntype, const_tree fn)
5911 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
5912 we normally prevent this case when mmx is not available. However
5913 some ABIs may require the result to be returned like DImode. */
5914 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5915 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
5917 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
5918 we prevent this case when sse is not available. However some ABIs
5919 may require the result to be returned like integer TImode. */
5920 else if (mode == TImode
5921 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5922 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
5924 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
5925 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
5926 regno = FIRST_FLOAT_REG;
5928 /* Most things go in %eax. */
5931 /* Override FP return register with %xmm0 for local functions when
5932 SSE math is enabled or for functions with sseregparm attribute. */
5933 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
5935 int sse_level = ix86_function_sseregparm (fntype, fn, false);
5936 if ((sse_level >= 1 && mode == SFmode)
5937 || (sse_level == 2 && mode == DFmode))
5938 regno = FIRST_SSE_REG;
5941 return gen_rtx_REG (orig_mode, regno);
5945 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
5950 /* Handle libcalls, which don't provide a type node. */
5951 if (valtype == NULL)
5963 return gen_rtx_REG (mode, FIRST_SSE_REG);
5966 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
5970 return gen_rtx_REG (mode, AX_REG);
5974 ret = construct_container (mode, orig_mode, valtype, 1,
5975 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5976 x86_64_int_return_registers, 0);
5978 /* For zero sized structures, construct_container returns NULL, but we
5979 need to keep rest of compiler happy by returning meaningful value. */
5981 ret = gen_rtx_REG (orig_mode, AX_REG);
5987 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
5989 unsigned int regno = AX_REG;
5993 switch (GET_MODE_SIZE (mode))
5996 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5997 && !COMPLEX_MODE_P (mode))
5998 regno = FIRST_SSE_REG;
6002 if (mode == SFmode || mode == DFmode)
6003 regno = FIRST_SSE_REG;
6009 return gen_rtx_REG (orig_mode, regno);
6013 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6014 enum machine_mode orig_mode, enum machine_mode mode)
6016 const_tree fn, fntype;
6019 if (fntype_or_decl && DECL_P (fntype_or_decl))
6020 fn = fntype_or_decl;
6021 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6023 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6024 return function_value_ms_64 (orig_mode, mode);
6025 else if (TARGET_64BIT)
6026 return function_value_64 (orig_mode, mode, valtype);
6028 return function_value_32 (orig_mode, mode, fntype, fn);
6032 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6033 bool outgoing ATTRIBUTE_UNUSED)
6035 enum machine_mode mode, orig_mode;
6037 orig_mode = TYPE_MODE (valtype);
6038 mode = type_natural_mode (valtype);
6039 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6043 ix86_libcall_value (enum machine_mode mode)
6045 return ix86_function_value_1 (NULL, NULL, mode, mode);
6048 /* Return true iff type is returned in memory. */
6050 static int ATTRIBUTE_UNUSED
6051 return_in_memory_32 (const_tree type, enum machine_mode mode)
6055 if (mode == BLKmode)
6058 size = int_size_in_bytes (type);
6060 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6063 if (VECTOR_MODE_P (mode) || mode == TImode)
6065 /* User-created vectors small enough to fit in EAX. */
6069 /* MMX/3dNow values are returned in MM0,
6070 except when it doesn't exits. */
6072 return (TARGET_MMX ? 0 : 1);
6074 /* SSE values are returned in XMM0, except when it doesn't exist. */
6076 return (TARGET_SSE ? 0 : 1);
6087 static int ATTRIBUTE_UNUSED
6088 return_in_memory_64 (const_tree type, enum machine_mode mode)
6090 int needed_intregs, needed_sseregs;
6091 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6094 static int ATTRIBUTE_UNUSED
6095 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6097 HOST_WIDE_INT size = int_size_in_bytes (type);
6099 /* __m128 is returned in xmm0. */
6100 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6101 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6104 /* Otherwise, the size must be exactly in [1248]. */
6105 return (size != 1 && size != 2 && size != 4 && size != 8);
6109 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6111 #ifdef SUBTARGET_RETURN_IN_MEMORY
6112 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6114 const enum machine_mode mode = type_natural_mode (type);
6116 if (TARGET_64BIT_MS_ABI)
6117 return return_in_memory_ms_64 (type, mode);
6118 else if (TARGET_64BIT)
6119 return return_in_memory_64 (type, mode);
6121 return return_in_memory_32 (type, mode);
6125 /* Return false iff TYPE is returned in memory. This version is used
6126 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6127 but differs notably in that when MMX is available, 8-byte vectors
6128 are returned in memory, rather than in MMX registers. */
6131 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6134 enum machine_mode mode = type_natural_mode (type);
6137 return return_in_memory_64 (type, mode);
6139 if (mode == BLKmode)
6142 size = int_size_in_bytes (type);
6144 if (VECTOR_MODE_P (mode))
6146 /* Return in memory only if MMX registers *are* available. This
6147 seems backwards, but it is consistent with the existing
6154 else if (mode == TImode)
6156 else if (mode == XFmode)
6162 /* When returning SSE vector types, we have a choice of either
6163 (1) being abi incompatible with a -march switch, or
6164 (2) generating an error.
6165 Given no good solution, I think the safest thing is one warning.
6166 The user won't be able to use -Werror, but....
6168 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6169 called in response to actually generating a caller or callee that
6170 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6171 via aggregate_value_p for general type probing from tree-ssa. */
6174 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6176 static bool warnedsse, warnedmmx;
6178 if (!TARGET_64BIT && type)
6180 /* Look at the return type of the function, not the function type. */
6181 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6183 if (!TARGET_SSE && !warnedsse)
6186 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6189 warning (0, "SSE vector return without SSE enabled "
6194 if (!TARGET_MMX && !warnedmmx)
6196 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6199 warning (0, "MMX vector return without MMX enabled "
6209 /* Create the va_list data type. */
6211 /* Returns the calling convention specific va_list date type.
6212 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6215 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6217 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6219 /* For i386 we use plain pointer to argument area. */
6220 if (!TARGET_64BIT || abi == MS_ABI)
6221 return build_pointer_type (char_type_node);
6223 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6224 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6226 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6227 unsigned_type_node);
6228 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6229 unsigned_type_node);
6230 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6232 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6235 va_list_gpr_counter_field = f_gpr;
6236 va_list_fpr_counter_field = f_fpr;
6238 DECL_FIELD_CONTEXT (f_gpr) = record;
6239 DECL_FIELD_CONTEXT (f_fpr) = record;
6240 DECL_FIELD_CONTEXT (f_ovf) = record;
6241 DECL_FIELD_CONTEXT (f_sav) = record;
6243 TREE_CHAIN (record) = type_decl;
6244 TYPE_NAME (record) = type_decl;
6245 TYPE_FIELDS (record) = f_gpr;
6246 TREE_CHAIN (f_gpr) = f_fpr;
6247 TREE_CHAIN (f_fpr) = f_ovf;
6248 TREE_CHAIN (f_ovf) = f_sav;
6250 layout_type (record);
6252 /* The correct type is an array type of one element. */
6253 return build_array_type (record, build_index_type (size_zero_node));
6256 /* Setup the builtin va_list data type and for 64-bit the additional
6257 calling convention specific va_list data types. */
6260 ix86_build_builtin_va_list (void)
6262 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6264 /* Initialize abi specific va_list builtin types. */
6268 if (DEFAULT_ABI == MS_ABI)
6270 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6271 if (TREE_CODE (t) != RECORD_TYPE)
6272 t = build_variant_type_copy (t);
6273 sysv_va_list_type_node = t;
6278 if (TREE_CODE (t) != RECORD_TYPE)
6279 t = build_variant_type_copy (t);
6280 sysv_va_list_type_node = t;
6282 if (DEFAULT_ABI != MS_ABI)
6284 t = ix86_build_builtin_va_list_abi (MS_ABI);
6285 if (TREE_CODE (t) != RECORD_TYPE)
6286 t = build_variant_type_copy (t);
6287 ms_va_list_type_node = t;
6292 if (TREE_CODE (t) != RECORD_TYPE)
6293 t = build_variant_type_copy (t);
6294 ms_va_list_type_node = t;
6301 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6304 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6313 int regparm = ix86_regparm;
6315 if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
6316 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6318 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
6321 /* Indicate to allocate space on the stack for varargs save area. */
6322 ix86_save_varrargs_registers = 1;
6324 save_area = frame_pointer_rtx;
6325 set = get_varargs_alias_set ();
6327 for (i = cum->regno;
6329 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6332 mem = gen_rtx_MEM (Pmode,
6333 plus_constant (save_area, i * UNITS_PER_WORD));
6334 MEM_NOTRAP_P (mem) = 1;
6335 set_mem_alias_set (mem, set);
6336 emit_move_insn (mem, gen_rtx_REG (Pmode,
6337 x86_64_int_parameter_registers[i]));
6340 if (cum->sse_nregs && cfun->va_list_fpr_size)
6342 /* Now emit code to save SSE registers. The AX parameter contains number
6343 of SSE parameter registers used to call this function. We use
6344 sse_prologue_save insn template that produces computed jump across
6345 SSE saves. We need some preparation work to get this working. */
6347 label = gen_label_rtx ();
6348 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6350 /* Compute address to jump to :
6351 label - eax*4 + nnamed_sse_arguments*4 Or
6352 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6353 tmp_reg = gen_reg_rtx (Pmode);
6354 nsse_reg = gen_reg_rtx (Pmode);
6355 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6356 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6357 gen_rtx_MULT (Pmode, nsse_reg,
6360 /* vmovaps is one byte longer than movaps. */
6362 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6363 gen_rtx_PLUS (Pmode, tmp_reg,
6369 gen_rtx_CONST (DImode,
6370 gen_rtx_PLUS (DImode,
6372 GEN_INT (cum->sse_regno
6373 * (TARGET_AVX ? 5 : 4)))));
6375 emit_move_insn (nsse_reg, label_ref);
6376 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6378 /* Compute address of memory block we save into. We always use pointer
6379 pointing 127 bytes after first byte to store - this is needed to keep
6380 instruction size limited by 4 bytes (5 bytes for AVX) with one
6381 byte displacement. */
6382 tmp_reg = gen_reg_rtx (Pmode);
6383 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6384 plus_constant (save_area,
6385 8 * X86_64_REGPARM_MAX + 127)));
6386 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6387 MEM_NOTRAP_P (mem) = 1;
6388 set_mem_alias_set (mem, set);
6389 set_mem_align (mem, BITS_PER_WORD);
6391 /* And finally do the dirty job! */
6392 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6393 GEN_INT (cum->sse_regno), label));
6398 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6400 alias_set_type set = get_varargs_alias_set ();
6403 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6407 mem = gen_rtx_MEM (Pmode,
6408 plus_constant (virtual_incoming_args_rtx,
6409 i * UNITS_PER_WORD));
6410 MEM_NOTRAP_P (mem) = 1;
6411 set_mem_alias_set (mem, set);
6413 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6414 emit_move_insn (mem, reg);
6419 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6420 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6423 CUMULATIVE_ARGS next_cum;
6426 /* This argument doesn't appear to be used anymore. Which is good,
6427 because the old code here didn't suppress rtl generation. */
6428 gcc_assert (!no_rtl);
6433 fntype = TREE_TYPE (current_function_decl);
6435 /* For varargs, we do not want to skip the dummy va_dcl argument.
6436 For stdargs, we do want to skip the last named argument. */
6438 if (stdarg_p (fntype))
6439 function_arg_advance (&next_cum, mode, type, 1);
6441 if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
6442 setup_incoming_varargs_ms_64 (&next_cum);
6444 setup_incoming_varargs_64 (&next_cum);
6447 /* Checks if TYPE is of kind va_list char *. */
6450 is_va_list_char_pointer (tree type)
6454 /* For 32-bit it is always true. */
6457 canonic = ix86_canonical_va_list_type (type);
6458 return (canonic == ms_va_list_type_node
6459 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6462 /* Implement va_start. */
6465 ix86_va_start (tree valist, rtx nextarg)
6467 HOST_WIDE_INT words, n_gpr, n_fpr;
6468 tree f_gpr, f_fpr, f_ovf, f_sav;
6469 tree gpr, fpr, ovf, sav, t;
6472 /* Only 64bit target needs something special. */
6473 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6475 std_expand_builtin_va_start (valist, nextarg);
6479 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6480 f_fpr = TREE_CHAIN (f_gpr);
6481 f_ovf = TREE_CHAIN (f_fpr);
6482 f_sav = TREE_CHAIN (f_ovf);
6484 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6485 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6486 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6487 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6488 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6490 /* Count number of gp and fp argument registers used. */
6491 words = crtl->args.info.words;
6492 n_gpr = crtl->args.info.regno;
6493 n_fpr = crtl->args.info.sse_regno;
6495 if (cfun->va_list_gpr_size)
6497 type = TREE_TYPE (gpr);
6498 t = build2 (MODIFY_EXPR, type,
6499 gpr, build_int_cst (type, n_gpr * 8));
6500 TREE_SIDE_EFFECTS (t) = 1;
6501 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6504 if (cfun->va_list_fpr_size)
6506 type = TREE_TYPE (fpr);
6507 t = build2 (MODIFY_EXPR, type, fpr,
6508 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6509 TREE_SIDE_EFFECTS (t) = 1;
6510 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6513 /* Find the overflow area. */
6514 type = TREE_TYPE (ovf);
6515 t = make_tree (type, crtl->args.internal_arg_pointer);
6517 t = build2 (POINTER_PLUS_EXPR, type, t,
6518 size_int (words * UNITS_PER_WORD));
6519 t = build2 (MODIFY_EXPR, type, ovf, t);
6520 TREE_SIDE_EFFECTS (t) = 1;
6521 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6523 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
6525 /* Find the register save area.
6526 Prologue of the function save it right above stack frame. */
6527 type = TREE_TYPE (sav);
6528 t = make_tree (type, frame_pointer_rtx);
6529 t = build2 (MODIFY_EXPR, type, sav, t);
6530 TREE_SIDE_EFFECTS (t) = 1;
6531 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6535 /* Implement va_arg. */
6538 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6541 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6542 tree f_gpr, f_fpr, f_ovf, f_sav;
6543 tree gpr, fpr, ovf, sav, t;
6545 tree lab_false, lab_over = NULL_TREE;
6550 enum machine_mode nat_mode;
6553 /* Only 64bit target needs something special. */
6554 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6555 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6557 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6558 f_fpr = TREE_CHAIN (f_gpr);
6559 f_ovf = TREE_CHAIN (f_fpr);
6560 f_sav = TREE_CHAIN (f_ovf);
6562 valist = build_va_arg_indirect_ref (valist);
6563 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6564 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6565 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6566 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6568 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6570 type = build_pointer_type (type);
6571 size = int_size_in_bytes (type);
6572 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6574 nat_mode = type_natural_mode (type);
6583 /* Unnamed 256bit vector mode parameters are passed on stack. */
6584 if (ix86_cfun_abi () == SYSV_ABI)
6591 container = construct_container (nat_mode, TYPE_MODE (type),
6592 type, 0, X86_64_REGPARM_MAX,
6593 X86_64_SSE_REGPARM_MAX, intreg,
6598 /* Pull the value out of the saved registers. */
6600 addr = create_tmp_var (ptr_type_node, "addr");
6601 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6605 int needed_intregs, needed_sseregs;
6607 tree int_addr, sse_addr;
6609 lab_false = create_artificial_label ();
6610 lab_over = create_artificial_label ();
6612 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6614 need_temp = (!REG_P (container)
6615 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6616 || TYPE_ALIGN (type) > 128));
6618 /* In case we are passing structure, verify that it is consecutive block
6619 on the register save area. If not we need to do moves. */
6620 if (!need_temp && !REG_P (container))
6622 /* Verify that all registers are strictly consecutive */
6623 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6627 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6629 rtx slot = XVECEXP (container, 0, i);
6630 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6631 || INTVAL (XEXP (slot, 1)) != i * 16)
6639 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6641 rtx slot = XVECEXP (container, 0, i);
6642 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6643 || INTVAL (XEXP (slot, 1)) != i * 8)
6655 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6656 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6657 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6658 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6661 /* First ensure that we fit completely in registers. */
6664 t = build_int_cst (TREE_TYPE (gpr),
6665 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6666 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6667 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6668 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6669 gimplify_and_add (t, pre_p);
6673 t = build_int_cst (TREE_TYPE (fpr),
6674 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6675 + X86_64_REGPARM_MAX * 8);
6676 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6677 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6678 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6679 gimplify_and_add (t, pre_p);
6682 /* Compute index to start of area used for integer regs. */
6685 /* int_addr = gpr + sav; */
6686 t = fold_convert (sizetype, gpr);
6687 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6688 gimplify_assign (int_addr, t, pre_p);
6692 /* sse_addr = fpr + sav; */
6693 t = fold_convert (sizetype, fpr);
6694 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6695 gimplify_assign (sse_addr, t, pre_p);
6700 tree temp = create_tmp_var (type, "va_arg_tmp");
6703 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6704 gimplify_assign (addr, t, pre_p);
6706 for (i = 0; i < XVECLEN (container, 0); i++)
6708 rtx slot = XVECEXP (container, 0, i);
6709 rtx reg = XEXP (slot, 0);
6710 enum machine_mode mode = GET_MODE (reg);
6711 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6712 tree addr_type = build_pointer_type (piece_type);
6715 tree dest_addr, dest;
6717 if (SSE_REGNO_P (REGNO (reg)))
6719 src_addr = sse_addr;
6720 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6724 src_addr = int_addr;
6725 src_offset = REGNO (reg) * 8;
6727 src_addr = fold_convert (addr_type, src_addr);
6728 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
6729 size_int (src_offset));
6730 src = build_va_arg_indirect_ref (src_addr);
6732 dest_addr = fold_convert (addr_type, addr);
6733 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
6734 size_int (INTVAL (XEXP (slot, 1))));
6735 dest = build_va_arg_indirect_ref (dest_addr);
6737 gimplify_assign (dest, src, pre_p);
6743 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
6744 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
6745 gimplify_assign (gpr, t, pre_p);
6750 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
6751 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
6752 gimplify_assign (fpr, t, pre_p);
6755 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
6757 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
6760 /* ... otherwise out of the overflow area. */
6762 /* When we align parameter on stack for caller, if the parameter
6763 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
6764 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
6765 here with caller. */
6766 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
6767 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
6768 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
6770 /* Care for on-stack alignment if needed. */
6771 if (arg_boundary <= 64
6772 || integer_zerop (TYPE_SIZE (type)))
6776 HOST_WIDE_INT align = arg_boundary / 8;
6777 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
6778 size_int (align - 1));
6779 t = fold_convert (sizetype, t);
6780 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6782 t = fold_convert (TREE_TYPE (ovf), t);
6784 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
6785 gimplify_assign (addr, t, pre_p);
6787 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
6788 size_int (rsize * UNITS_PER_WORD));
6789 gimplify_assign (unshare_expr (ovf), t, pre_p);
6792 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
6794 ptrtype = build_pointer_type (type);
6795 addr = fold_convert (ptrtype, addr);
6798 addr = build_va_arg_indirect_ref (addr);
6799 return build_va_arg_indirect_ref (addr);
6802 /* Return nonzero if OPNUM's MEM should be matched
6803 in movabs* patterns. */
6806 ix86_check_movabs (rtx insn, int opnum)
6810 set = PATTERN (insn);
6811 if (GET_CODE (set) == PARALLEL)
6812 set = XVECEXP (set, 0, 0);
6813 gcc_assert (GET_CODE (set) == SET);
6814 mem = XEXP (set, opnum);
6815 while (GET_CODE (mem) == SUBREG)
6816 mem = SUBREG_REG (mem);
6817 gcc_assert (MEM_P (mem));
6818 return (volatile_ok || !MEM_VOLATILE_P (mem));
6821 /* Initialize the table of extra 80387 mathematical constants. */
6824 init_ext_80387_constants (void)
6826 static const char * cst[5] =
6828 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
6829 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
6830 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
6831 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
6832 "3.1415926535897932385128089594061862044", /* 4: fldpi */
6836 for (i = 0; i < 5; i++)
6838 real_from_string (&ext_80387_constants_table[i], cst[i]);
6839 /* Ensure each constant is rounded to XFmode precision. */
6840 real_convert (&ext_80387_constants_table[i],
6841 XFmode, &ext_80387_constants_table[i]);
6844 ext_80387_constants_init = 1;
6847 /* Return true if the constant is something that can be loaded with
6848 a special instruction. */
6851 standard_80387_constant_p (rtx x)
6853 enum machine_mode mode = GET_MODE (x);
6857 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
6860 if (x == CONST0_RTX (mode))
6862 if (x == CONST1_RTX (mode))
6865 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6867 /* For XFmode constants, try to find a special 80387 instruction when
6868 optimizing for size or on those CPUs that benefit from them. */
6870 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
6874 if (! ext_80387_constants_init)
6875 init_ext_80387_constants ();
6877 for (i = 0; i < 5; i++)
6878 if (real_identical (&r, &ext_80387_constants_table[i]))
6882 /* Load of the constant -0.0 or -1.0 will be split as
6883 fldz;fchs or fld1;fchs sequence. */
6884 if (real_isnegzero (&r))
6886 if (real_identical (&r, &dconstm1))
6892 /* Return the opcode of the special instruction to be used to load
6896 standard_80387_constant_opcode (rtx x)
6898 switch (standard_80387_constant_p (x))
6922 /* Return the CONST_DOUBLE representing the 80387 constant that is
6923 loaded by the specified special instruction. The argument IDX
6924 matches the return value from standard_80387_constant_p. */
6927 standard_80387_constant_rtx (int idx)
6931 if (! ext_80387_constants_init)
6932 init_ext_80387_constants ();
6948 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
6952 /* Return 1 if mode is a valid mode for sse. */
6954 standard_sse_mode_p (enum machine_mode mode)
6971 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
6972 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
6973 modes and AVX is enabled. */
6976 standard_sse_constant_p (rtx x)
6978 enum machine_mode mode = GET_MODE (x);
6980 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
6982 if (vector_all_ones_operand (x, mode))
6984 if (standard_sse_mode_p (mode))
6985 return TARGET_SSE2 ? 2 : -2;
6986 else if (VALID_AVX256_REG_MODE (mode))
6987 return TARGET_AVX ? 3 : -3;
6993 /* Return the opcode of the special instruction to be used to load
6997 standard_sse_constant_opcode (rtx insn, rtx x)
6999 switch (standard_sse_constant_p (x))
7002 switch (get_attr_mode (insn))
7005 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7007 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7009 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7011 return "vxorps\t%x0, %x0, %x0";
7013 return "vxorpd\t%x0, %x0, %x0";
7015 return "vpxor\t%x0, %x0, %x0";
7021 switch (get_attr_mode (insn))
7026 return "vpcmpeqd\t%0, %0, %0";
7032 return "pcmpeqd\t%0, %0";
7037 /* Returns 1 if OP contains a symbol reference */
7040 symbolic_reference_mentioned_p (rtx op)
7045 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7048 fmt = GET_RTX_FORMAT (GET_CODE (op));
7049 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7055 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7056 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7060 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7067 /* Return 1 if it is appropriate to emit `ret' instructions in the
7068 body of a function. Do this only if the epilogue is simple, needing a
7069 couple of insns. Prior to reloading, we can't tell how many registers
7070 must be saved, so return 0 then. Return 0 if there is no frame
7071 marker to de-allocate. */
7074 ix86_can_use_return_insn_p (void)
7076 struct ix86_frame frame;
7078 if (! reload_completed || frame_pointer_needed)
7081 /* Don't allow more than 32 pop, since that's all we can do
7082 with one instruction. */
7083 if (crtl->args.pops_args
7084 && crtl->args.size >= 32768)
7087 ix86_compute_frame_layout (&frame);
7088 return frame.to_allocate == 0 && frame.nregs == 0;
7091 /* Value should be nonzero if functions must have frame pointers.
7092 Zero means the frame pointer need not be set up (and parms may
7093 be accessed via the stack pointer) in functions that seem suitable. */
7096 ix86_frame_pointer_required (void)
7098 /* If we accessed previous frames, then the generated code expects
7099 to be able to access the saved ebp value in our frame. */
7100 if (cfun->machine->accesses_prev_frame)
7103 /* Several x86 os'es need a frame pointer for other reasons,
7104 usually pertaining to setjmp. */
7105 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7108 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7109 the frame pointer by default. Turn it back on now if we've not
7110 got a leaf function. */
7111 if (TARGET_OMIT_LEAF_FRAME_POINTER
7112 && (!current_function_is_leaf
7113 || ix86_current_function_calls_tls_descriptor))
7122 /* Record that the current function accesses previous call frames. */
7125 ix86_setup_frame_addresses (void)
7127 cfun->machine->accesses_prev_frame = 1;
7130 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7131 # define USE_HIDDEN_LINKONCE 1
7133 # define USE_HIDDEN_LINKONCE 0
7136 static int pic_labels_used;
7138 /* Fills in the label name that should be used for a pc thunk for
7139 the given register. */
7142 get_pc_thunk_name (char name[32], unsigned int regno)
7144 gcc_assert (!TARGET_64BIT);
7146 if (USE_HIDDEN_LINKONCE)
7147 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7149 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7153 /* This function generates code for -fpic that loads %ebx with
7154 the return address of the caller and then returns. */
7157 ix86_file_end (void)
7162 for (regno = 0; regno < 8; ++regno)
7166 if (! ((pic_labels_used >> regno) & 1))
7169 get_pc_thunk_name (name, regno);
7174 switch_to_section (darwin_sections[text_coal_section]);
7175 fputs ("\t.weak_definition\t", asm_out_file);
7176 assemble_name (asm_out_file, name);
7177 fputs ("\n\t.private_extern\t", asm_out_file);
7178 assemble_name (asm_out_file, name);
7179 fputs ("\n", asm_out_file);
7180 ASM_OUTPUT_LABEL (asm_out_file, name);
7184 if (USE_HIDDEN_LINKONCE)
7188 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7190 TREE_PUBLIC (decl) = 1;
7191 TREE_STATIC (decl) = 1;
7192 DECL_ONE_ONLY (decl) = 1;
7194 (*targetm.asm_out.unique_section) (decl, 0);
7195 switch_to_section (get_named_section (decl, NULL, 0));
7197 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7198 fputs ("\t.hidden\t", asm_out_file);
7199 assemble_name (asm_out_file, name);
7200 fputc ('\n', asm_out_file);
7201 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7205 switch_to_section (text_section);
7206 ASM_OUTPUT_LABEL (asm_out_file, name);
7209 xops[0] = gen_rtx_REG (Pmode, regno);
7210 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7211 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7212 output_asm_insn ("ret", xops);
7215 if (NEED_INDICATE_EXEC_STACK)
7216 file_end_indicate_exec_stack ();
7219 /* Emit code for the SET_GOT patterns. */
7222 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7228 if (TARGET_VXWORKS_RTP && flag_pic)
7230 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7231 xops[2] = gen_rtx_MEM (Pmode,
7232 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7233 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7235 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7236 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7237 an unadorned address. */
7238 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7239 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7240 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7244 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7246 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7248 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7251 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7253 output_asm_insn ("call\t%a2", xops);
7256 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7257 is what will be referenced by the Mach-O PIC subsystem. */
7259 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7262 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7263 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7266 output_asm_insn ("pop%z0\t%0", xops);
7271 get_pc_thunk_name (name, REGNO (dest));
7272 pic_labels_used |= 1 << REGNO (dest);
7274 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7275 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7276 output_asm_insn ("call\t%X2", xops);
7277 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7278 is what will be referenced by the Mach-O PIC subsystem. */
7281 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
7283 targetm.asm_out.internal_label (asm_out_file, "L",
7284 CODE_LABEL_NUMBER (label));
7291 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7292 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7294 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7299 /* Generate an "push" pattern for input ARG. */
7304 return gen_rtx_SET (VOIDmode,
7306 gen_rtx_PRE_DEC (Pmode,
7307 stack_pointer_rtx)),
7311 /* Return >= 0 if there is an unused call-clobbered register available
7312 for the entire function. */
7315 ix86_select_alt_pic_regnum (void)
7317 if (current_function_is_leaf && !crtl->profile
7318 && !ix86_current_function_calls_tls_descriptor)
7321 /* Can't use the same register for both PIC and DRAP. */
7323 drap = REGNO (crtl->drap_reg);
7326 for (i = 2; i >= 0; --i)
7327 if (i != drap && !df_regs_ever_live_p (i))
7331 return INVALID_REGNUM;
7334 /* Return 1 if we need to save REGNO. */
7336 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7338 if (pic_offset_table_rtx
7339 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7340 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7342 || crtl->calls_eh_return
7343 || crtl->uses_const_pool))
7345 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7350 if (crtl->calls_eh_return && maybe_eh_return)
7355 unsigned test = EH_RETURN_DATA_REGNO (i);
7356 if (test == INVALID_REGNUM)
7364 && regno == REGNO (crtl->drap_reg))
7367 return (df_regs_ever_live_p (regno)
7368 && !call_used_regs[regno]
7369 && !fixed_regs[regno]
7370 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7373 /* Return number of registers to be saved on the stack. */
7376 ix86_nsaved_regs (void)
7381 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
7382 if (ix86_save_reg (regno, true))
7387 /* Given FROM and TO register numbers, say whether this elimination is
7388 allowed. If stack alignment is needed, we can only replace argument
7389 pointer with hard frame pointer, or replace frame pointer with stack
7390 pointer. Otherwise, frame pointer elimination is automatically
7391 handled and all other eliminations are valid. */
7394 ix86_can_eliminate (int from, int to)
7396 if (stack_realign_fp)
7397 return ((from == ARG_POINTER_REGNUM
7398 && to == HARD_FRAME_POINTER_REGNUM)
7399 || (from == FRAME_POINTER_REGNUM
7400 && to == STACK_POINTER_REGNUM));
7402 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7405 /* Return the offset between two registers, one to be eliminated, and the other
7406 its replacement, at the start of a routine. */
7409 ix86_initial_elimination_offset (int from, int to)
7411 struct ix86_frame frame;
7412 ix86_compute_frame_layout (&frame);
7414 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7415 return frame.hard_frame_pointer_offset;
7416 else if (from == FRAME_POINTER_REGNUM
7417 && to == HARD_FRAME_POINTER_REGNUM)
7418 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7421 gcc_assert (to == STACK_POINTER_REGNUM);
7423 if (from == ARG_POINTER_REGNUM)
7424 return frame.stack_pointer_offset;
7426 gcc_assert (from == FRAME_POINTER_REGNUM);
7427 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7431 /* Fill structure ix86_frame about frame of currently computed function. */
7434 ix86_compute_frame_layout (struct ix86_frame *frame)
7436 HOST_WIDE_INT total_size;
7437 unsigned int stack_alignment_needed;
7438 HOST_WIDE_INT offset;
7439 unsigned int preferred_alignment;
7440 HOST_WIDE_INT size = get_frame_size ();
7442 frame->nregs = ix86_nsaved_regs ();
7445 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7446 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7448 gcc_assert (!size || stack_alignment_needed);
7449 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7450 gcc_assert (preferred_alignment <= stack_alignment_needed);
7452 /* During reload iteration the amount of registers saved can change.
7453 Recompute the value as needed. Do not recompute when amount of registers
7454 didn't change as reload does multiple calls to the function and does not
7455 expect the decision to change within single iteration. */
7456 if (!optimize_function_for_size_p (cfun)
7457 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7459 int count = frame->nregs;
7461 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7462 /* The fast prologue uses move instead of push to save registers. This
7463 is significantly longer, but also executes faster as modern hardware
7464 can execute the moves in parallel, but can't do that for push/pop.
7466 Be careful about choosing what prologue to emit: When function takes
7467 many instructions to execute we may use slow version as well as in
7468 case function is known to be outside hot spot (this is known with
7469 feedback only). Weight the size of function by number of registers
7470 to save as it is cheap to use one or two push instructions but very
7471 slow to use many of them. */
7473 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7474 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7475 || (flag_branch_probabilities
7476 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7477 cfun->machine->use_fast_prologue_epilogue = false;
7479 cfun->machine->use_fast_prologue_epilogue
7480 = !expensive_function_p (count);
7482 if (TARGET_PROLOGUE_USING_MOVE
7483 && cfun->machine->use_fast_prologue_epilogue)
7484 frame->save_regs_using_mov = true;
7486 frame->save_regs_using_mov = false;
7489 /* Skip return address and saved base pointer. */
7490 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7492 frame->hard_frame_pointer_offset = offset;
7494 /* Set offset to aligned because the realigned frame starts from
7496 if (stack_realign_fp)
7497 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7499 /* Register save area */
7500 offset += frame->nregs * UNITS_PER_WORD;
7503 if (ix86_save_varrargs_registers)
7505 offset += X86_64_VARARGS_SIZE;
7506 frame->va_arg_size = X86_64_VARARGS_SIZE;
7509 frame->va_arg_size = 0;
7511 /* Align start of frame for local function. */
7512 frame->padding1 = ((offset + stack_alignment_needed - 1)
7513 & -stack_alignment_needed) - offset;
7515 offset += frame->padding1;
7517 /* Frame pointer points here. */
7518 frame->frame_pointer_offset = offset;
7522 /* Add outgoing arguments area. Can be skipped if we eliminated
7523 all the function calls as dead code.
7524 Skipping is however impossible when function calls alloca. Alloca
7525 expander assumes that last crtl->outgoing_args_size
7526 of stack frame are unused. */
7527 if (ACCUMULATE_OUTGOING_ARGS
7528 && (!current_function_is_leaf || cfun->calls_alloca
7529 || ix86_current_function_calls_tls_descriptor))
7531 offset += crtl->outgoing_args_size;
7532 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7535 frame->outgoing_arguments_size = 0;
7537 /* Align stack boundary. Only needed if we're calling another function
7539 if (!current_function_is_leaf || cfun->calls_alloca
7540 || ix86_current_function_calls_tls_descriptor)
7541 frame->padding2 = ((offset + preferred_alignment - 1)
7542 & -preferred_alignment) - offset;
7544 frame->padding2 = 0;
7546 offset += frame->padding2;
7548 /* We've reached end of stack frame. */
7549 frame->stack_pointer_offset = offset;
7551 /* Size prologue needs to allocate. */
7552 frame->to_allocate =
7553 (size + frame->padding1 + frame->padding2
7554 + frame->outgoing_arguments_size + frame->va_arg_size);
7556 if ((!frame->to_allocate && frame->nregs <= 1)
7557 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7558 frame->save_regs_using_mov = false;
7560 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7561 && current_function_is_leaf
7562 && !ix86_current_function_calls_tls_descriptor)
7564 frame->red_zone_size = frame->to_allocate;
7565 if (frame->save_regs_using_mov)
7566 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7567 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7568 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7571 frame->red_zone_size = 0;
7572 frame->to_allocate -= frame->red_zone_size;
7573 frame->stack_pointer_offset -= frame->red_zone_size;
7575 fprintf (stderr, "\n");
7576 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7577 fprintf (stderr, "size: %ld\n", (long)size);
7578 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7579 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7580 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7581 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7582 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7583 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7584 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7585 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7586 (long)frame->hard_frame_pointer_offset);
7587 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7588 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7589 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7590 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7594 /* Emit code to save registers in the prologue. */
7597 ix86_emit_save_regs (void)
7602 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
7603 if (ix86_save_reg (regno, true))
7605 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7606 RTX_FRAME_RELATED_P (insn) = 1;
7610 /* Emit code to save registers using MOV insns. First register
7611 is restored from POINTER + OFFSET. */
7613 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7618 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7619 if (ix86_save_reg (regno, true))
7621 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7623 gen_rtx_REG (Pmode, regno));
7624 RTX_FRAME_RELATED_P (insn) = 1;
7625 offset += UNITS_PER_WORD;
7629 /* Expand prologue or epilogue stack adjustment.
7630 The pattern exist to put a dependency on all ebp-based memory accesses.
7631 STYLE should be negative if instructions should be marked as frame related,
7632 zero if %r11 register is live and cannot be freely used and positive
7636 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7641 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7642 else if (x86_64_immediate_operand (offset, DImode))
7643 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7647 /* r11 is used by indirect sibcall return as well, set before the
7648 epilogue and used after the epilogue. ATM indirect sibcall
7649 shouldn't be used together with huge frame sizes in one
7650 function because of the frame_size check in sibcall.c. */
7652 r11 = gen_rtx_REG (DImode, R11_REG);
7653 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
7655 RTX_FRAME_RELATED_P (insn) = 1;
7656 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
7660 RTX_FRAME_RELATED_P (insn) = 1;
7663 /* Find an available register to be used as dynamic realign argument
7664 pointer regsiter. Such a register will be written in prologue and
7665 used in begin of body, so it must not be
7666 1. parameter passing register.
7668 We reuse static-chain register if it is available. Otherwise, we
7669 use DI for i386 and R13 for x86-64. We chose R13 since it has
7672 Return: the regno of chosen register. */
7675 find_drap_reg (void)
7677 tree decl = cfun->decl;
7681 /* Use R13 for nested function or function need static chain.
7682 Since function with tail call may use any caller-saved
7683 registers in epilogue, DRAP must not use caller-saved
7684 register in such case. */
7685 if ((decl_function_context (decl)
7686 && !DECL_NO_STATIC_CHAIN (decl))
7687 || crtl->tail_call_emit)
7694 /* Use DI for nested function or function need static chain.
7695 Since function with tail call may use any caller-saved
7696 registers in epilogue, DRAP must not use caller-saved
7697 register in such case. */
7698 if ((decl_function_context (decl)
7699 && !DECL_NO_STATIC_CHAIN (decl))
7700 || crtl->tail_call_emit)
7703 /* Reuse static chain register if it isn't used for parameter
7705 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
7706 && !lookup_attribute ("fastcall",
7707 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
7714 /* Update incoming stack boundary and estimated stack alignment. */
7717 ix86_update_stack_boundary (void)
7719 /* Prefer the one specified at command line. */
7720 ix86_incoming_stack_boundary
7721 = (ix86_user_incoming_stack_boundary
7722 ? ix86_user_incoming_stack_boundary
7723 : ix86_default_incoming_stack_boundary);
7725 /* Incoming stack alignment can be changed on individual functions
7726 via force_align_arg_pointer attribute. We use the smallest
7727 incoming stack boundary. */
7728 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
7729 && lookup_attribute (ix86_force_align_arg_pointer_string,
7730 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7731 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
7733 /* Stack at entrance of main is aligned by runtime. We use the
7734 smallest incoming stack boundary. */
7735 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
7736 && DECL_NAME (current_function_decl)
7737 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7738 && DECL_FILE_SCOPE_P (current_function_decl))
7739 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7741 /* x86_64 vararg needs 16byte stack alignment for register save
7745 && crtl->stack_alignment_estimated < 128)
7746 crtl->stack_alignment_estimated = 128;
7749 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7750 needed or an rtx for DRAP otherwise. */
7753 ix86_get_drap_rtx (void)
7755 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
7756 crtl->need_drap = true;
7758 if (stack_realign_drap)
7760 /* Assign DRAP to vDRAP and returns vDRAP */
7761 unsigned int regno = find_drap_reg ();
7766 arg_ptr = gen_rtx_REG (Pmode, regno);
7767 crtl->drap_reg = arg_ptr;
7770 drap_vreg = copy_to_reg (arg_ptr);
7774 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7775 RTX_FRAME_RELATED_P (insn) = 1;
7782 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7785 ix86_internal_arg_pointer (void)
7787 return virtual_incoming_args_rtx;
7790 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
7791 This is called from dwarf2out.c to emit call frame instructions
7792 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
7794 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
7796 rtx unspec = SET_SRC (pattern);
7797 gcc_assert (GET_CODE (unspec) == UNSPEC);
7801 case UNSPEC_REG_SAVE:
7802 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
7803 SET_DEST (pattern));
7805 case UNSPEC_DEF_CFA:
7806 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
7807 INTVAL (XVECEXP (unspec, 0, 0)));
7814 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
7815 to be generated in correct form. */
7817 ix86_finalize_stack_realign_flags (void)
7819 /* Check if stack realign is really needed after reload, and
7820 stores result in cfun */
7821 unsigned int incoming_stack_boundary
7822 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7823 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7824 unsigned int stack_realign = (incoming_stack_boundary
7825 < (current_function_is_leaf
7826 ? crtl->max_used_stack_slot_alignment
7827 : crtl->stack_alignment_needed));
7829 if (crtl->stack_realign_finalized)
7831 /* After stack_realign_needed is finalized, we can't no longer
7833 gcc_assert (crtl->stack_realign_needed == stack_realign);
7837 crtl->stack_realign_needed = stack_realign;
7838 crtl->stack_realign_finalized = true;
7842 /* Expand the prologue into a bunch of separate insns. */
7845 ix86_expand_prologue (void)
7849 struct ix86_frame frame;
7850 HOST_WIDE_INT allocate;
7852 ix86_finalize_stack_realign_flags ();
7854 /* DRAP should not coexist with stack_realign_fp */
7855 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
7857 ix86_compute_frame_layout (&frame);
7859 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7860 of DRAP is needed and stack realignment is really needed after reload */
7861 if (crtl->drap_reg && crtl->stack_realign_needed)
7864 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7865 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
7866 ? 0 : UNITS_PER_WORD);
7868 gcc_assert (stack_realign_drap);
7870 /* Grab the argument pointer. */
7871 x = plus_constant (stack_pointer_rtx,
7872 (UNITS_PER_WORD + param_ptr_offset));
7875 /* Only need to push parameter pointer reg if it is caller
7877 if (!call_used_regs[REGNO (crtl->drap_reg)])
7879 /* Push arg pointer reg */
7880 insn = emit_insn (gen_push (y));
7881 RTX_FRAME_RELATED_P (insn) = 1;
7884 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
7885 RTX_FRAME_RELATED_P (insn) = 1;
7887 /* Align the stack. */
7888 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7890 GEN_INT (-align_bytes)));
7891 RTX_FRAME_RELATED_P (insn) = 1;
7893 /* Replicate the return address on the stack so that return
7894 address can be reached via (argp - 1) slot. This is needed
7895 to implement macro RETURN_ADDR_RTX and intrinsic function
7896 expand_builtin_return_addr etc. */
7898 x = gen_frame_mem (Pmode,
7899 plus_constant (x, -UNITS_PER_WORD));
7900 insn = emit_insn (gen_push (x));
7901 RTX_FRAME_RELATED_P (insn) = 1;
7904 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7905 slower on all targets. Also sdb doesn't like it. */
7907 if (frame_pointer_needed)
7909 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
7910 RTX_FRAME_RELATED_P (insn) = 1;
7912 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
7913 RTX_FRAME_RELATED_P (insn) = 1;
7916 if (stack_realign_fp)
7918 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
7919 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
7921 /* Align the stack. */
7922 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
7924 GEN_INT (-align_bytes)));
7925 RTX_FRAME_RELATED_P (insn) = 1;
7928 allocate = frame.to_allocate;
7930 if (!frame.save_regs_using_mov)
7931 ix86_emit_save_regs ();
7933 allocate += frame.nregs * UNITS_PER_WORD;
7935 /* When using red zone we may start register saving before allocating
7936 the stack frame saving one cycle of the prologue. However I will
7937 avoid doing this if I am going to have to probe the stack since
7938 at least on x86_64 the stack probe can turn into a call that clobbers
7939 a red zone location */
7940 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
7941 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
7942 ix86_emit_save_regs_using_mov ((frame_pointer_needed
7943 && !crtl->stack_realign_needed)
7944 ? hard_frame_pointer_rtx
7945 : stack_pointer_rtx,
7946 -frame.nregs * UNITS_PER_WORD);
7950 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
7951 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7952 GEN_INT (-allocate), -1);
7955 /* Only valid for Win32. */
7956 rtx eax = gen_rtx_REG (Pmode, AX_REG);
7960 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
7962 if (cfun->machine->call_abi == MS_ABI)
7965 eax_live = ix86_eax_live_at_start_p ();
7969 emit_insn (gen_push (eax));
7970 allocate -= UNITS_PER_WORD;
7973 emit_move_insn (eax, GEN_INT (allocate));
7976 insn = gen_allocate_stack_worker_64 (eax);
7978 insn = gen_allocate_stack_worker_32 (eax);
7979 insn = emit_insn (insn);
7980 RTX_FRAME_RELATED_P (insn) = 1;
7981 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
7982 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
7983 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
7984 t, REG_NOTES (insn));
7988 if (frame_pointer_needed)
7989 t = plus_constant (hard_frame_pointer_rtx,
7992 - frame.nregs * UNITS_PER_WORD);
7994 t = plus_constant (stack_pointer_rtx, allocate);
7995 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
7999 if (frame.save_regs_using_mov
8000 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8001 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8003 if (!frame_pointer_needed
8004 || !frame.to_allocate
8005 || crtl->stack_realign_needed)
8006 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8009 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8010 -frame.nregs * UNITS_PER_WORD);
8013 pic_reg_used = false;
8014 if (pic_offset_table_rtx
8015 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8018 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8020 if (alt_pic_reg_used != INVALID_REGNUM)
8021 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8023 pic_reg_used = true;
8030 if (ix86_cmodel == CM_LARGE_PIC)
8032 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8033 rtx label = gen_label_rtx ();
8035 LABEL_PRESERVE_P (label) = 1;
8036 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8037 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8038 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8039 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8040 pic_offset_table_rtx, tmp_reg));
8043 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8046 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8049 /* Prevent function calls from being scheduled before the call to mcount.
8050 In the pic_reg_used case, make sure that the got load isn't deleted. */
8054 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8055 emit_insn (gen_blockage ());
8058 if (crtl->drap_reg && !crtl->stack_realign_needed)
8060 /* vDRAP is setup but after reload it turns out stack realign
8061 isn't necessary, here we will emit prologue to setup DRAP
8062 without stack realign adjustment */
8063 int drap_bp_offset = UNITS_PER_WORD * 2;
8064 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8065 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8068 /* Emit cld instruction if stringops are used in the function. */
8069 if (TARGET_CLD && ix86_current_function_needs_cld)
8070 emit_insn (gen_cld ());
8073 /* Emit code to restore saved registers using MOV insns. First register
8074 is restored from POINTER + OFFSET. */
8076 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8077 int maybe_eh_return)
8080 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8082 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8083 if (ix86_save_reg (regno, maybe_eh_return))
8085 /* Ensure that adjust_address won't be forced to produce pointer
8086 out of range allowed by x86-64 instruction set. */
8087 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8091 r11 = gen_rtx_REG (DImode, R11_REG);
8092 emit_move_insn (r11, GEN_INT (offset));
8093 emit_insn (gen_adddi3 (r11, r11, pointer));
8094 base_address = gen_rtx_MEM (Pmode, r11);
8097 emit_move_insn (gen_rtx_REG (Pmode, regno),
8098 adjust_address (base_address, Pmode, offset));
8099 offset += UNITS_PER_WORD;
8103 /* Restore function stack, frame, and registers. */
8106 ix86_expand_epilogue (int style)
8110 struct ix86_frame frame;
8111 HOST_WIDE_INT offset;
8113 ix86_finalize_stack_realign_flags ();
8115 /* When stack is realigned, SP must be valid. */
8116 sp_valid = (!frame_pointer_needed
8117 || current_function_sp_is_unchanging
8118 || stack_realign_fp);
8120 ix86_compute_frame_layout (&frame);
8122 /* Calculate start of saved registers relative to ebp. Special care
8123 must be taken for the normal return case of a function using
8124 eh_return: the eax and edx registers are marked as saved, but not
8125 restored along this path. */
8126 offset = frame.nregs;
8127 if (crtl->calls_eh_return && style != 2)
8129 offset *= -UNITS_PER_WORD;
8131 /* If we're only restoring one register and sp is not valid then
8132 using a move instruction to restore the register since it's
8133 less work than reloading sp and popping the register.
8135 The default code result in stack adjustment using add/lea instruction,
8136 while this code results in LEAVE instruction (or discrete equivalent),
8137 so it is profitable in some other cases as well. Especially when there
8138 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8139 and there is exactly one register to pop. This heuristic may need some
8140 tuning in future. */
8141 if ((!sp_valid && frame.nregs <= 1)
8142 || (TARGET_EPILOGUE_USING_MOVE
8143 && cfun->machine->use_fast_prologue_epilogue
8144 && (frame.nregs > 1 || frame.to_allocate))
8145 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
8146 || (frame_pointer_needed && TARGET_USE_LEAVE
8147 && cfun->machine->use_fast_prologue_epilogue
8148 && frame.nregs == 1)
8149 || crtl->calls_eh_return)
8151 /* Restore registers. We can use ebp or esp to address the memory
8152 locations. If both are available, default to ebp, since offsets
8153 are known to be small. Only exception is esp pointing directly
8154 to the end of block of saved registers, where we may simplify
8157 If we are realigning stack with bp and sp, regs restore can't
8158 be addressed by bp. sp must be used instead. */
8160 if (!frame_pointer_needed
8161 || (sp_valid && !frame.to_allocate)
8162 || stack_realign_fp)
8163 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8164 frame.to_allocate, style == 2);
8166 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8167 offset, style == 2);
8169 /* eh_return epilogues need %ecx added to the stack pointer. */
8172 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8174 /* Stack align doesn't work with eh_return. */
8175 gcc_assert (!crtl->stack_realign_needed);
8177 if (frame_pointer_needed)
8179 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8180 tmp = plus_constant (tmp, UNITS_PER_WORD);
8181 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8183 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8184 emit_move_insn (hard_frame_pointer_rtx, tmp);
8186 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8191 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8192 tmp = plus_constant (tmp, (frame.to_allocate
8193 + frame.nregs * UNITS_PER_WORD));
8194 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8197 else if (!frame_pointer_needed)
8198 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8199 GEN_INT (frame.to_allocate
8200 + frame.nregs * UNITS_PER_WORD),
8202 /* If not an i386, mov & pop is faster than "leave". */
8203 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8204 || !cfun->machine->use_fast_prologue_epilogue)
8205 emit_insn ((*ix86_gen_leave) ());
8208 pro_epilogue_adjust_stack (stack_pointer_rtx,
8209 hard_frame_pointer_rtx,
8212 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8217 /* First step is to deallocate the stack frame so that we can
8220 If we realign stack with frame pointer, then stack pointer
8221 won't be able to recover via lea $offset(%bp), %sp, because
8222 there is a padding area between bp and sp for realign.
8223 "add $to_allocate, %sp" must be used instead. */
8226 gcc_assert (frame_pointer_needed);
8227 gcc_assert (!stack_realign_fp);
8228 pro_epilogue_adjust_stack (stack_pointer_rtx,
8229 hard_frame_pointer_rtx,
8230 GEN_INT (offset), style);
8232 else if (frame.to_allocate)
8233 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8234 GEN_INT (frame.to_allocate), style);
8236 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8237 if (ix86_save_reg (regno, false))
8238 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8239 if (frame_pointer_needed)
8241 /* Leave results in shorter dependency chains on CPUs that are
8242 able to grok it fast. */
8243 if (TARGET_USE_LEAVE)
8244 emit_insn ((*ix86_gen_leave) ());
8247 /* For stack realigned really happens, recover stack
8248 pointer to hard frame pointer is a must, if not using
8250 if (stack_realign_fp)
8251 pro_epilogue_adjust_stack (stack_pointer_rtx,
8252 hard_frame_pointer_rtx,
8254 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8259 if (crtl->drap_reg && crtl->stack_realign_needed)
8261 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8262 ? 0 : UNITS_PER_WORD);
8263 gcc_assert (stack_realign_drap);
8264 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8266 GEN_INT (-(UNITS_PER_WORD
8267 + param_ptr_offset))));
8268 if (!call_used_regs[REGNO (crtl->drap_reg)])
8269 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8273 /* Sibcall epilogues don't want a return instruction. */
8277 if (crtl->args.pops_args && crtl->args.size)
8279 rtx popc = GEN_INT (crtl->args.pops_args);
8281 /* i386 can only pop 64K bytes. If asked to pop more, pop
8282 return address, do explicit add, and jump indirectly to the
8285 if (crtl->args.pops_args >= 65536)
8287 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8289 /* There is no "pascal" calling convention in any 64bit ABI. */
8290 gcc_assert (!TARGET_64BIT);
8292 emit_insn (gen_popsi1 (ecx));
8293 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8294 emit_jump_insn (gen_return_indirect_internal (ecx));
8297 emit_jump_insn (gen_return_pop_internal (popc));
8300 emit_jump_insn (gen_return_internal ());
8303 /* Reset from the function's potential modifications. */
8306 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8307 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8309 if (pic_offset_table_rtx)
8310 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8312 /* Mach-O doesn't support labels at the end of objects, so if
8313 it looks like we might want one, insert a NOP. */
8315 rtx insn = get_last_insn ();
8318 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8319 insn = PREV_INSN (insn);
8323 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8324 fputs ("\tnop\n", file);
8330 /* Extract the parts of an RTL expression that is a valid memory address
8331 for an instruction. Return 0 if the structure of the address is
8332 grossly off. Return -1 if the address contains ASHIFT, so it is not
8333 strictly valid, but still used for computing length of lea instruction. */
8336 ix86_decompose_address (rtx addr, struct ix86_address *out)
8338 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8339 rtx base_reg, index_reg;
8340 HOST_WIDE_INT scale = 1;
8341 rtx scale_rtx = NULL_RTX;
8343 enum ix86_address_seg seg = SEG_DEFAULT;
8345 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8347 else if (GET_CODE (addr) == PLUS)
8357 addends[n++] = XEXP (op, 1);
8360 while (GET_CODE (op) == PLUS);
8365 for (i = n; i >= 0; --i)
8368 switch (GET_CODE (op))
8373 index = XEXP (op, 0);
8374 scale_rtx = XEXP (op, 1);
8378 if (XINT (op, 1) == UNSPEC_TP
8379 && TARGET_TLS_DIRECT_SEG_REFS
8380 && seg == SEG_DEFAULT)
8381 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8410 else if (GET_CODE (addr) == MULT)
8412 index = XEXP (addr, 0); /* index*scale */
8413 scale_rtx = XEXP (addr, 1);
8415 else if (GET_CODE (addr) == ASHIFT)
8419 /* We're called for lea too, which implements ashift on occasion. */
8420 index = XEXP (addr, 0);
8421 tmp = XEXP (addr, 1);
8422 if (!CONST_INT_P (tmp))
8424 scale = INTVAL (tmp);
8425 if ((unsigned HOST_WIDE_INT) scale > 3)
8431 disp = addr; /* displacement */
8433 /* Extract the integral value of scale. */
8436 if (!CONST_INT_P (scale_rtx))
8438 scale = INTVAL (scale_rtx);
8441 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8442 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8444 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8445 if (base_reg && index_reg && scale == 1
8446 && (index_reg == arg_pointer_rtx
8447 || index_reg == frame_pointer_rtx
8448 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8451 tmp = base, base = index, index = tmp;
8452 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8455 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8456 if ((base_reg == hard_frame_pointer_rtx
8457 || base_reg == frame_pointer_rtx
8458 || base_reg == arg_pointer_rtx) && !disp)
8461 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8462 Avoid this by transforming to [%esi+0].
8463 Reload calls address legitimization without cfun defined, so we need
8464 to test cfun for being non-NULL. */
8465 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8466 && base_reg && !index_reg && !disp
8468 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8471 /* Special case: encode reg+reg instead of reg*2. */
8472 if (!base && index && scale && scale == 2)
8473 base = index, base_reg = index_reg, scale = 1;
8475 /* Special case: scaling cannot be encoded without base or displacement. */
8476 if (!base && !disp && index && scale != 1)
8488 /* Return cost of the memory address x.
8489 For i386, it is better to use a complex address than let gcc copy
8490 the address into a reg and make a new pseudo. But not if the address
8491 requires to two regs - that would mean more pseudos with longer
8494 ix86_address_cost (rtx x)
8496 struct ix86_address parts;
8498 int ok = ix86_decompose_address (x, &parts);
8502 if (parts.base && GET_CODE (parts.base) == SUBREG)
8503 parts.base = SUBREG_REG (parts.base);
8504 if (parts.index && GET_CODE (parts.index) == SUBREG)
8505 parts.index = SUBREG_REG (parts.index);
8507 /* Attempt to minimize number of registers in the address. */
8509 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8511 && (!REG_P (parts.index)
8512 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8516 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8518 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8519 && parts.base != parts.index)
8522 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8523 since it's predecode logic can't detect the length of instructions
8524 and it degenerates to vector decoded. Increase cost of such
8525 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8526 to split such addresses or even refuse such addresses at all.
8528 Following addressing modes are affected:
8533 The first and last case may be avoidable by explicitly coding the zero in
8534 memory address, but I don't have AMD-K6 machine handy to check this
8538 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8539 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8540 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8546 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8547 this is used for to form addresses to local data when -fPIC is in
8551 darwin_local_data_pic (rtx disp)
8553 if (GET_CODE (disp) == MINUS)
8555 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
8556 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
8557 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
8559 const char *sym_name = XSTR (XEXP (disp, 1), 0);
8560 if (! strcmp (sym_name, "<pic base>"))
8568 /* Determine if a given RTX is a valid constant. We already know this
8569 satisfies CONSTANT_P. */
8572 legitimate_constant_p (rtx x)
8574 switch (GET_CODE (x))
8579 if (GET_CODE (x) == PLUS)
8581 if (!CONST_INT_P (XEXP (x, 1)))
8586 if (TARGET_MACHO && darwin_local_data_pic (x))
8589 /* Only some unspecs are valid as "constants". */
8590 if (GET_CODE (x) == UNSPEC)
8591 switch (XINT (x, 1))
8596 return TARGET_64BIT;
8599 x = XVECEXP (x, 0, 0);
8600 return (GET_CODE (x) == SYMBOL_REF
8601 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8603 x = XVECEXP (x, 0, 0);
8604 return (GET_CODE (x) == SYMBOL_REF
8605 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
8610 /* We must have drilled down to a symbol. */
8611 if (GET_CODE (x) == LABEL_REF)
8613 if (GET_CODE (x) != SYMBOL_REF)
8618 /* TLS symbols are never valid. */
8619 if (SYMBOL_REF_TLS_MODEL (x))
8622 /* DLLIMPORT symbols are never valid. */
8623 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
8624 && SYMBOL_REF_DLLIMPORT_P (x))
8629 if (GET_MODE (x) == TImode
8630 && x != CONST0_RTX (TImode)
8636 if (x == CONST0_RTX (GET_MODE (x)))
8644 /* Otherwise we handle everything else in the move patterns. */
8648 /* Determine if it's legal to put X into the constant pool. This
8649 is not possible for the address of thread-local symbols, which
8650 is checked above. */
8653 ix86_cannot_force_const_mem (rtx x)
8655 /* We can always put integral constants and vectors in memory. */
8656 switch (GET_CODE (x))
8666 return !legitimate_constant_p (x);
8669 /* Determine if a given RTX is a valid constant address. */
8672 constant_address_p (rtx x)
8674 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
8677 /* Nonzero if the constant value X is a legitimate general operand
8678 when generating PIC code. It is given that flag_pic is on and
8679 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
8682 legitimate_pic_operand_p (rtx x)
8686 switch (GET_CODE (x))
8689 inner = XEXP (x, 0);
8690 if (GET_CODE (inner) == PLUS
8691 && CONST_INT_P (XEXP (inner, 1)))
8692 inner = XEXP (inner, 0);
8694 /* Only some unspecs are valid as "constants". */
8695 if (GET_CODE (inner) == UNSPEC)
8696 switch (XINT (inner, 1))
8701 return TARGET_64BIT;
8703 x = XVECEXP (inner, 0, 0);
8704 return (GET_CODE (x) == SYMBOL_REF
8705 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
8713 return legitimate_pic_address_disp_p (x);
8720 /* Determine if a given CONST RTX is a valid memory displacement
8724 legitimate_pic_address_disp_p (rtx disp)
8728 /* In 64bit mode we can allow direct addresses of symbols and labels
8729 when they are not dynamic symbols. */
8732 rtx op0 = disp, op1;
8734 switch (GET_CODE (disp))
8740 if (GET_CODE (XEXP (disp, 0)) != PLUS)
8742 op0 = XEXP (XEXP (disp, 0), 0);
8743 op1 = XEXP (XEXP (disp, 0), 1);
8744 if (!CONST_INT_P (op1)
8745 || INTVAL (op1) >= 16*1024*1024
8746 || INTVAL (op1) < -16*1024*1024)
8748 if (GET_CODE (op0) == LABEL_REF)
8750 if (GET_CODE (op0) != SYMBOL_REF)
8755 /* TLS references should always be enclosed in UNSPEC. */
8756 if (SYMBOL_REF_TLS_MODEL (op0))
8758 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
8759 && ix86_cmodel != CM_LARGE_PIC)
8767 if (GET_CODE (disp) != CONST)
8769 disp = XEXP (disp, 0);
8773 /* We are unsafe to allow PLUS expressions. This limit allowed distance
8774 of GOT tables. We should not need these anyway. */
8775 if (GET_CODE (disp) != UNSPEC
8776 || (XINT (disp, 1) != UNSPEC_GOTPCREL
8777 && XINT (disp, 1) != UNSPEC_GOTOFF
8778 && XINT (disp, 1) != UNSPEC_PLTOFF))
8781 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
8782 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
8788 if (GET_CODE (disp) == PLUS)
8790 if (!CONST_INT_P (XEXP (disp, 1)))
8792 disp = XEXP (disp, 0);
8796 if (TARGET_MACHO && darwin_local_data_pic (disp))
8799 if (GET_CODE (disp) != UNSPEC)
8802 switch (XINT (disp, 1))
8807 /* We need to check for both symbols and labels because VxWorks loads
8808 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
8810 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8811 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
8813 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
8814 While ABI specify also 32bit relocation but we don't produce it in
8815 small PIC model at all. */
8816 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
8817 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
8819 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
8821 case UNSPEC_GOTTPOFF:
8822 case UNSPEC_GOTNTPOFF:
8823 case UNSPEC_INDNTPOFF:
8826 disp = XVECEXP (disp, 0, 0);
8827 return (GET_CODE (disp) == SYMBOL_REF
8828 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
8830 disp = XVECEXP (disp, 0, 0);
8831 return (GET_CODE (disp) == SYMBOL_REF
8832 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
8834 disp = XVECEXP (disp, 0, 0);
8835 return (GET_CODE (disp) == SYMBOL_REF
8836 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
8842 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
8843 memory address for an instruction. The MODE argument is the machine mode
8844 for the MEM expression that wants to use this address.
8846 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
8847 convert common non-canonical forms to canonical form so that they will
8851 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
8852 rtx addr, int strict)
8854 struct ix86_address parts;
8855 rtx base, index, disp;
8856 HOST_WIDE_INT scale;
8857 const char *reason = NULL;
8858 rtx reason_rtx = NULL_RTX;
8860 if (ix86_decompose_address (addr, &parts) <= 0)
8862 reason = "decomposition failed";
8867 index = parts.index;
8869 scale = parts.scale;
8871 /* Validate base register.
8873 Don't allow SUBREG's that span more than a word here. It can lead to spill
8874 failures when the base is one word out of a two word structure, which is
8875 represented internally as a DImode int. */
8884 else if (GET_CODE (base) == SUBREG
8885 && REG_P (SUBREG_REG (base))
8886 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
8888 reg = SUBREG_REG (base);
8891 reason = "base is not a register";
8895 if (GET_MODE (base) != Pmode)
8897 reason = "base is not in Pmode";
8901 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
8902 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
8904 reason = "base is not valid";
8909 /* Validate index register.
8911 Don't allow SUBREG's that span more than a word here -- same as above. */
8920 else if (GET_CODE (index) == SUBREG
8921 && REG_P (SUBREG_REG (index))
8922 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
8924 reg = SUBREG_REG (index);
8927 reason = "index is not a register";
8931 if (GET_MODE (index) != Pmode)
8933 reason = "index is not in Pmode";
8937 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
8938 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
8940 reason = "index is not valid";
8945 /* Validate scale factor. */
8948 reason_rtx = GEN_INT (scale);
8951 reason = "scale without index";
8955 if (scale != 2 && scale != 4 && scale != 8)
8957 reason = "scale is not a valid multiplier";
8962 /* Validate displacement. */
8967 if (GET_CODE (disp) == CONST
8968 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
8969 switch (XINT (XEXP (disp, 0), 1))
8971 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
8972 used. While ABI specify also 32bit relocations, we don't produce
8973 them at all and use IP relative instead. */
8976 gcc_assert (flag_pic);
8978 goto is_legitimate_pic;
8979 reason = "64bit address unspec";
8982 case UNSPEC_GOTPCREL:
8983 gcc_assert (flag_pic);
8984 goto is_legitimate_pic;
8986 case UNSPEC_GOTTPOFF:
8987 case UNSPEC_GOTNTPOFF:
8988 case UNSPEC_INDNTPOFF:
8994 reason = "invalid address unspec";
8998 else if (SYMBOLIC_CONST (disp)
9002 && MACHOPIC_INDIRECT
9003 && !machopic_operand_p (disp)
9009 if (TARGET_64BIT && (index || base))
9011 /* foo@dtpoff(%rX) is ok. */
9012 if (GET_CODE (disp) != CONST
9013 || GET_CODE (XEXP (disp, 0)) != PLUS
9014 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9015 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9016 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9017 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9019 reason = "non-constant pic memory reference";
9023 else if (! legitimate_pic_address_disp_p (disp))
9025 reason = "displacement is an invalid pic construct";
9029 /* This code used to verify that a symbolic pic displacement
9030 includes the pic_offset_table_rtx register.
9032 While this is good idea, unfortunately these constructs may
9033 be created by "adds using lea" optimization for incorrect
9042 This code is nonsensical, but results in addressing
9043 GOT table with pic_offset_table_rtx base. We can't
9044 just refuse it easily, since it gets matched by
9045 "addsi3" pattern, that later gets split to lea in the
9046 case output register differs from input. While this
9047 can be handled by separate addsi pattern for this case
9048 that never results in lea, this seems to be easier and
9049 correct fix for crash to disable this test. */
9051 else if (GET_CODE (disp) != LABEL_REF
9052 && !CONST_INT_P (disp)
9053 && (GET_CODE (disp) != CONST
9054 || !legitimate_constant_p (disp))
9055 && (GET_CODE (disp) != SYMBOL_REF
9056 || !legitimate_constant_p (disp)))
9058 reason = "displacement is not constant";
9061 else if (TARGET_64BIT
9062 && !x86_64_immediate_operand (disp, VOIDmode))
9064 reason = "displacement is out of range";
9069 /* Everything looks valid. */
9076 /* Return a unique alias set for the GOT. */
9078 static alias_set_type
9079 ix86_GOT_alias_set (void)
9081 static alias_set_type set = -1;
9083 set = new_alias_set ();
9087 /* Return a legitimate reference for ORIG (an address) using the
9088 register REG. If REG is 0, a new pseudo is generated.
9090 There are two types of references that must be handled:
9092 1. Global data references must load the address from the GOT, via
9093 the PIC reg. An insn is emitted to do this load, and the reg is
9096 2. Static data references, constant pool addresses, and code labels
9097 compute the address as an offset from the GOT, whose base is in
9098 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9099 differentiate them from global data objects. The returned
9100 address is the PIC reg + an unspec constant.
9102 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9103 reg also appears in the address. */
9106 legitimize_pic_address (rtx orig, rtx reg)
9113 if (TARGET_MACHO && !TARGET_64BIT)
9116 reg = gen_reg_rtx (Pmode);
9117 /* Use the generic Mach-O PIC machinery. */
9118 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9122 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9124 else if (TARGET_64BIT
9125 && ix86_cmodel != CM_SMALL_PIC
9126 && gotoff_operand (addr, Pmode))
9129 /* This symbol may be referenced via a displacement from the PIC
9130 base address (@GOTOFF). */
9132 if (reload_in_progress)
9133 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9134 if (GET_CODE (addr) == CONST)
9135 addr = XEXP (addr, 0);
9136 if (GET_CODE (addr) == PLUS)
9138 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9140 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9143 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9144 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9146 tmpreg = gen_reg_rtx (Pmode);
9149 emit_move_insn (tmpreg, new_rtx);
9153 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9154 tmpreg, 1, OPTAB_DIRECT);
9157 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9159 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9161 /* This symbol may be referenced via a displacement from the PIC
9162 base address (@GOTOFF). */
9164 if (reload_in_progress)
9165 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9166 if (GET_CODE (addr) == CONST)
9167 addr = XEXP (addr, 0);
9168 if (GET_CODE (addr) == PLUS)
9170 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9172 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9175 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9176 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9177 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9181 emit_move_insn (reg, new_rtx);
9185 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9186 /* We can't use @GOTOFF for text labels on VxWorks;
9187 see gotoff_operand. */
9188 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9190 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9192 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9193 return legitimize_dllimport_symbol (addr, true);
9194 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9195 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9196 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9198 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9199 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9203 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9205 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9206 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9207 new_rtx = gen_const_mem (Pmode, new_rtx);
9208 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9211 reg = gen_reg_rtx (Pmode);
9212 /* Use directly gen_movsi, otherwise the address is loaded
9213 into register for CSE. We don't want to CSE this addresses,
9214 instead we CSE addresses from the GOT table, so skip this. */
9215 emit_insn (gen_movsi (reg, new_rtx));
9220 /* This symbol must be referenced via a load from the
9221 Global Offset Table (@GOT). */
9223 if (reload_in_progress)
9224 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9225 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9226 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9228 new_rtx = force_reg (Pmode, new_rtx);
9229 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9230 new_rtx = gen_const_mem (Pmode, new_rtx);
9231 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9234 reg = gen_reg_rtx (Pmode);
9235 emit_move_insn (reg, new_rtx);
9241 if (CONST_INT_P (addr)
9242 && !x86_64_immediate_operand (addr, VOIDmode))
9246 emit_move_insn (reg, addr);
9250 new_rtx = force_reg (Pmode, addr);
9252 else if (GET_CODE (addr) == CONST)
9254 addr = XEXP (addr, 0);
9256 /* We must match stuff we generate before. Assume the only
9257 unspecs that can get here are ours. Not that we could do
9258 anything with them anyway.... */
9259 if (GET_CODE (addr) == UNSPEC
9260 || (GET_CODE (addr) == PLUS
9261 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9263 gcc_assert (GET_CODE (addr) == PLUS);
9265 if (GET_CODE (addr) == PLUS)
9267 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9269 /* Check first to see if this is a constant offset from a @GOTOFF
9270 symbol reference. */
9271 if (gotoff_operand (op0, Pmode)
9272 && CONST_INT_P (op1))
9276 if (reload_in_progress)
9277 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9278 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9280 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9281 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9282 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9286 emit_move_insn (reg, new_rtx);
9292 if (INTVAL (op1) < -16*1024*1024
9293 || INTVAL (op1) >= 16*1024*1024)
9295 if (!x86_64_immediate_operand (op1, Pmode))
9296 op1 = force_reg (Pmode, op1);
9297 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9303 base = legitimize_pic_address (XEXP (addr, 0), reg);
9304 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9305 base == reg ? NULL_RTX : reg);
9307 if (CONST_INT_P (new_rtx))
9308 new_rtx = plus_constant (base, INTVAL (new_rtx));
9311 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9313 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9314 new_rtx = XEXP (new_rtx, 1);
9316 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9324 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9327 get_thread_pointer (int to_reg)
9331 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9335 reg = gen_reg_rtx (Pmode);
9336 insn = gen_rtx_SET (VOIDmode, reg, tp);
9337 insn = emit_insn (insn);
9342 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9343 false if we expect this to be used for a memory address and true if
9344 we expect to load the address into a register. */
9347 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9349 rtx dest, base, off, pic, tp;
9354 case TLS_MODEL_GLOBAL_DYNAMIC:
9355 dest = gen_reg_rtx (Pmode);
9356 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9358 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9360 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9363 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9364 insns = get_insns ();
9367 RTL_CONST_CALL_P (insns) = 1;
9368 emit_libcall_block (insns, dest, rax, x);
9370 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9371 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9373 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9375 if (TARGET_GNU2_TLS)
9377 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9379 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9383 case TLS_MODEL_LOCAL_DYNAMIC:
9384 base = gen_reg_rtx (Pmode);
9385 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9387 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9389 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9392 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9393 insns = get_insns ();
9396 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9397 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9398 RTL_CONST_CALL_P (insns) = 1;
9399 emit_libcall_block (insns, base, rax, note);
9401 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9402 emit_insn (gen_tls_local_dynamic_base_64 (base));
9404 emit_insn (gen_tls_local_dynamic_base_32 (base));
9406 if (TARGET_GNU2_TLS)
9408 rtx x = ix86_tls_module_base ();
9410 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9411 gen_rtx_MINUS (Pmode, x, tp));
9414 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9415 off = gen_rtx_CONST (Pmode, off);
9417 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9419 if (TARGET_GNU2_TLS)
9421 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9423 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9428 case TLS_MODEL_INITIAL_EXEC:
9432 type = UNSPEC_GOTNTPOFF;
9436 if (reload_in_progress)
9437 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9438 pic = pic_offset_table_rtx;
9439 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9441 else if (!TARGET_ANY_GNU_TLS)
9443 pic = gen_reg_rtx (Pmode);
9444 emit_insn (gen_set_got (pic));
9445 type = UNSPEC_GOTTPOFF;
9450 type = UNSPEC_INDNTPOFF;
9453 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9454 off = gen_rtx_CONST (Pmode, off);
9456 off = gen_rtx_PLUS (Pmode, pic, off);
9457 off = gen_const_mem (Pmode, off);
9458 set_mem_alias_set (off, ix86_GOT_alias_set ());
9460 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9462 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9463 off = force_reg (Pmode, off);
9464 return gen_rtx_PLUS (Pmode, base, off);
9468 base = get_thread_pointer (true);
9469 dest = gen_reg_rtx (Pmode);
9470 emit_insn (gen_subsi3 (dest, base, off));
9474 case TLS_MODEL_LOCAL_EXEC:
9475 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9476 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9477 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9478 off = gen_rtx_CONST (Pmode, off);
9480 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9482 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9483 return gen_rtx_PLUS (Pmode, base, off);
9487 base = get_thread_pointer (true);
9488 dest = gen_reg_rtx (Pmode);
9489 emit_insn (gen_subsi3 (dest, base, off));
9500 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9503 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9504 htab_t dllimport_map;
9507 get_dllimport_decl (tree decl)
9509 struct tree_map *h, in;
9513 size_t namelen, prefixlen;
9519 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9521 in.hash = htab_hash_pointer (decl);
9522 in.base.from = decl;
9523 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9524 h = (struct tree_map *) *loc;
9528 *loc = h = GGC_NEW (struct tree_map);
9530 h->base.from = decl;
9531 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9532 DECL_ARTIFICIAL (to) = 1;
9533 DECL_IGNORED_P (to) = 1;
9534 DECL_EXTERNAL (to) = 1;
9535 TREE_READONLY (to) = 1;
9537 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9538 name = targetm.strip_name_encoding (name);
9539 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9540 ? "*__imp_" : "*__imp__";
9541 namelen = strlen (name);
9542 prefixlen = strlen (prefix);
9543 imp_name = (char *) alloca (namelen + prefixlen + 1);
9544 memcpy (imp_name, prefix, prefixlen);
9545 memcpy (imp_name + prefixlen, name, namelen + 1);
9547 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9548 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9549 SET_SYMBOL_REF_DECL (rtl, to);
9550 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9552 rtl = gen_const_mem (Pmode, rtl);
9553 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9555 SET_DECL_RTL (to, rtl);
9556 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9561 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9562 true if we require the result be a register. */
9565 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9570 gcc_assert (SYMBOL_REF_DECL (symbol));
9571 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
9573 x = DECL_RTL (imp_decl);
9575 x = force_reg (Pmode, x);
9579 /* Try machine-dependent ways of modifying an illegitimate address
9580 to be legitimate. If we find one, return the new, valid address.
9581 This macro is used in only one place: `memory_address' in explow.c.
9583 OLDX is the address as it was before break_out_memory_refs was called.
9584 In some cases it is useful to look at this to decide what needs to be done.
9586 MODE and WIN are passed so that this macro can use
9587 GO_IF_LEGITIMATE_ADDRESS.
9589 It is always safe for this macro to do nothing. It exists to recognize
9590 opportunities to optimize the output.
9592 For the 80386, we handle X+REG by loading X into a register R and
9593 using R+REG. R will go in a general reg and indexing will be used.
9594 However, if REG is a broken-out memory address or multiplication,
9595 nothing needs to be done because REG can certainly go in a general reg.
9597 When -fpic is used, special handling is needed for symbolic references.
9598 See comments by legitimize_pic_address in i386.c for details. */
9601 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
9606 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
9608 return legitimize_tls_address (x, (enum tls_model) log, false);
9609 if (GET_CODE (x) == CONST
9610 && GET_CODE (XEXP (x, 0)) == PLUS
9611 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9612 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
9614 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
9615 (enum tls_model) log, false);
9616 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9619 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9621 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
9622 return legitimize_dllimport_symbol (x, true);
9623 if (GET_CODE (x) == CONST
9624 && GET_CODE (XEXP (x, 0)) == PLUS
9625 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9626 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
9628 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
9629 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
9633 if (flag_pic && SYMBOLIC_CONST (x))
9634 return legitimize_pic_address (x, 0);
9636 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
9637 if (GET_CODE (x) == ASHIFT
9638 && CONST_INT_P (XEXP (x, 1))
9639 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
9642 log = INTVAL (XEXP (x, 1));
9643 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
9644 GEN_INT (1 << log));
9647 if (GET_CODE (x) == PLUS)
9649 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
9651 if (GET_CODE (XEXP (x, 0)) == ASHIFT
9652 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9653 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
9656 log = INTVAL (XEXP (XEXP (x, 0), 1));
9657 XEXP (x, 0) = gen_rtx_MULT (Pmode,
9658 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
9659 GEN_INT (1 << log));
9662 if (GET_CODE (XEXP (x, 1)) == ASHIFT
9663 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9664 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
9667 log = INTVAL (XEXP (XEXP (x, 1), 1));
9668 XEXP (x, 1) = gen_rtx_MULT (Pmode,
9669 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
9670 GEN_INT (1 << log));
9673 /* Put multiply first if it isn't already. */
9674 if (GET_CODE (XEXP (x, 1)) == MULT)
9676 rtx tmp = XEXP (x, 0);
9677 XEXP (x, 0) = XEXP (x, 1);
9682 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
9683 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
9684 created by virtual register instantiation, register elimination, and
9685 similar optimizations. */
9686 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
9689 x = gen_rtx_PLUS (Pmode,
9690 gen_rtx_PLUS (Pmode, XEXP (x, 0),
9691 XEXP (XEXP (x, 1), 0)),
9692 XEXP (XEXP (x, 1), 1));
9696 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
9697 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
9698 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
9699 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
9700 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
9701 && CONSTANT_P (XEXP (x, 1)))
9704 rtx other = NULL_RTX;
9706 if (CONST_INT_P (XEXP (x, 1)))
9708 constant = XEXP (x, 1);
9709 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
9711 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
9713 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
9714 other = XEXP (x, 1);
9722 x = gen_rtx_PLUS (Pmode,
9723 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
9724 XEXP (XEXP (XEXP (x, 0), 1), 0)),
9725 plus_constant (other, INTVAL (constant)));
9729 if (changed && legitimate_address_p (mode, x, FALSE))
9732 if (GET_CODE (XEXP (x, 0)) == MULT)
9735 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
9738 if (GET_CODE (XEXP (x, 1)) == MULT)
9741 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
9745 && REG_P (XEXP (x, 1))
9746 && REG_P (XEXP (x, 0)))
9749 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
9752 x = legitimize_pic_address (x, 0);
9755 if (changed && legitimate_address_p (mode, x, FALSE))
9758 if (REG_P (XEXP (x, 0)))
9760 rtx temp = gen_reg_rtx (Pmode);
9761 rtx val = force_operand (XEXP (x, 1), temp);
9763 emit_move_insn (temp, val);
9769 else if (REG_P (XEXP (x, 1)))
9771 rtx temp = gen_reg_rtx (Pmode);
9772 rtx val = force_operand (XEXP (x, 0), temp);
9774 emit_move_insn (temp, val);
9784 /* Print an integer constant expression in assembler syntax. Addition
9785 and subtraction are the only arithmetic that may appear in these
9786 expressions. FILE is the stdio stream to write to, X is the rtx, and
9787 CODE is the operand print code from the output string. */
9790 output_pic_addr_const (FILE *file, rtx x, int code)
9794 switch (GET_CODE (x))
9797 gcc_assert (flag_pic);
9802 if (! TARGET_MACHO || TARGET_64BIT)
9803 output_addr_const (file, x);
9806 const char *name = XSTR (x, 0);
9808 /* Mark the decl as referenced so that cgraph will
9809 output the function. */
9810 if (SYMBOL_REF_DECL (x))
9811 mark_decl_referenced (SYMBOL_REF_DECL (x));
9814 if (MACHOPIC_INDIRECT
9815 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
9816 name = machopic_indirection_name (x, /*stub_p=*/true);
9818 assemble_name (file, name);
9820 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
9821 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
9822 fputs ("@PLT", file);
9829 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
9830 assemble_name (asm_out_file, buf);
9834 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9838 /* This used to output parentheses around the expression,
9839 but that does not work on the 386 (either ATT or BSD assembler). */
9840 output_pic_addr_const (file, XEXP (x, 0), code);
9844 if (GET_MODE (x) == VOIDmode)
9846 /* We can use %d if the number is <32 bits and positive. */
9847 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
9848 fprintf (file, "0x%lx%08lx",
9849 (unsigned long) CONST_DOUBLE_HIGH (x),
9850 (unsigned long) CONST_DOUBLE_LOW (x));
9852 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
9855 /* We can't handle floating point constants;
9856 PRINT_OPERAND must handle them. */
9857 output_operand_lossage ("floating constant misused");
9861 /* Some assemblers need integer constants to appear first. */
9862 if (CONST_INT_P (XEXP (x, 0)))
9864 output_pic_addr_const (file, XEXP (x, 0), code);
9866 output_pic_addr_const (file, XEXP (x, 1), code);
9870 gcc_assert (CONST_INT_P (XEXP (x, 1)));
9871 output_pic_addr_const (file, XEXP (x, 1), code);
9873 output_pic_addr_const (file, XEXP (x, 0), code);
9879 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
9880 output_pic_addr_const (file, XEXP (x, 0), code);
9882 output_pic_addr_const (file, XEXP (x, 1), code);
9884 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
9888 gcc_assert (XVECLEN (x, 0) == 1);
9889 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
9890 switch (XINT (x, 1))
9893 fputs ("@GOT", file);
9896 fputs ("@GOTOFF", file);
9899 fputs ("@PLTOFF", file);
9901 case UNSPEC_GOTPCREL:
9902 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9903 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
9905 case UNSPEC_GOTTPOFF:
9906 /* FIXME: This might be @TPOFF in Sun ld too. */
9907 fputs ("@GOTTPOFF", file);
9910 fputs ("@TPOFF", file);
9914 fputs ("@TPOFF", file);
9916 fputs ("@NTPOFF", file);
9919 fputs ("@DTPOFF", file);
9921 case UNSPEC_GOTNTPOFF:
9923 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9924 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
9926 fputs ("@GOTNTPOFF", file);
9928 case UNSPEC_INDNTPOFF:
9929 fputs ("@INDNTPOFF", file);
9932 output_operand_lossage ("invalid UNSPEC as operand");
9938 output_operand_lossage ("invalid expression as operand");
9942 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9943 We need to emit DTP-relative relocations. */
9945 static void ATTRIBUTE_UNUSED
9946 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
9948 fputs (ASM_LONG, file);
9949 output_addr_const (file, x);
9950 fputs ("@DTPOFF", file);
9956 fputs (", 0", file);
9963 /* In the name of slightly smaller debug output, and to cater to
9964 general assembler lossage, recognize PIC+GOTOFF and turn it back
9965 into a direct symbol reference.
9967 On Darwin, this is necessary to avoid a crash, because Darwin
9968 has a different PIC label for each routine but the DWARF debugging
9969 information is not associated with any particular routine, so it's
9970 necessary to remove references to the PIC label from RTL stored by
9971 the DWARF output code. */
9974 ix86_delegitimize_address (rtx orig_x)
9977 /* reg_addend is NULL or a multiple of some register. */
9978 rtx reg_addend = NULL_RTX;
9979 /* const_addend is NULL or a const_int. */
9980 rtx const_addend = NULL_RTX;
9981 /* This is the result, or NULL. */
9982 rtx result = NULL_RTX;
9989 if (GET_CODE (x) != CONST
9990 || GET_CODE (XEXP (x, 0)) != UNSPEC
9991 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
9994 return XVECEXP (XEXP (x, 0), 0, 0);
9997 if (GET_CODE (x) != PLUS
9998 || GET_CODE (XEXP (x, 1)) != CONST)
10001 if (REG_P (XEXP (x, 0))
10002 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
10003 /* %ebx + GOT/GOTOFF */
10005 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10007 /* %ebx + %reg * scale + GOT/GOTOFF */
10008 reg_addend = XEXP (x, 0);
10009 if (REG_P (XEXP (reg_addend, 0))
10010 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
10011 reg_addend = XEXP (reg_addend, 1);
10012 else if (REG_P (XEXP (reg_addend, 1))
10013 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
10014 reg_addend = XEXP (reg_addend, 0);
10017 if (!REG_P (reg_addend)
10018 && GET_CODE (reg_addend) != MULT
10019 && GET_CODE (reg_addend) != ASHIFT)
10025 x = XEXP (XEXP (x, 1), 0);
10026 if (GET_CODE (x) == PLUS
10027 && CONST_INT_P (XEXP (x, 1)))
10029 const_addend = XEXP (x, 1);
10033 if (GET_CODE (x) == UNSPEC
10034 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10035 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10036 result = XVECEXP (x, 0, 0);
10038 if (TARGET_MACHO && darwin_local_data_pic (x)
10039 && !MEM_P (orig_x))
10040 result = XEXP (x, 0);
10046 result = gen_rtx_PLUS (Pmode, result, const_addend);
10048 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10052 /* If X is a machine specific address (i.e. a symbol or label being
10053 referenced as a displacement from the GOT implemented using an
10054 UNSPEC), then return the base term. Otherwise return X. */
10057 ix86_find_base_term (rtx x)
10063 if (GET_CODE (x) != CONST)
10065 term = XEXP (x, 0);
10066 if (GET_CODE (term) == PLUS
10067 && (CONST_INT_P (XEXP (term, 1))
10068 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10069 term = XEXP (term, 0);
10070 if (GET_CODE (term) != UNSPEC
10071 || XINT (term, 1) != UNSPEC_GOTPCREL)
10074 term = XVECEXP (term, 0, 0);
10076 if (GET_CODE (term) != SYMBOL_REF
10077 && GET_CODE (term) != LABEL_REF)
10083 term = ix86_delegitimize_address (x);
10085 if (GET_CODE (term) != SYMBOL_REF
10086 && GET_CODE (term) != LABEL_REF)
10093 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10094 int fp, FILE *file)
10096 const char *suffix;
10098 if (mode == CCFPmode || mode == CCFPUmode)
10100 enum rtx_code second_code, bypass_code;
10101 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10102 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10103 code = ix86_fp_compare_code_to_integer (code);
10107 code = reverse_condition (code);
10158 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10162 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10163 Those same assemblers have the same but opposite lossage on cmov. */
10164 if (mode == CCmode)
10165 suffix = fp ? "nbe" : "a";
10166 else if (mode == CCCmode)
10169 gcc_unreachable ();
10185 gcc_unreachable ();
10189 gcc_assert (mode == CCmode || mode == CCCmode);
10206 gcc_unreachable ();
10210 /* ??? As above. */
10211 gcc_assert (mode == CCmode || mode == CCCmode);
10212 suffix = fp ? "nb" : "ae";
10215 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10219 /* ??? As above. */
10220 if (mode == CCmode)
10222 else if (mode == CCCmode)
10223 suffix = fp ? "nb" : "ae";
10225 gcc_unreachable ();
10228 suffix = fp ? "u" : "p";
10231 suffix = fp ? "nu" : "np";
10234 gcc_unreachable ();
10236 fputs (suffix, file);
10239 /* Print the name of register X to FILE based on its machine mode and number.
10240 If CODE is 'w', pretend the mode is HImode.
10241 If CODE is 'b', pretend the mode is QImode.
10242 If CODE is 'k', pretend the mode is SImode.
10243 If CODE is 'q', pretend the mode is DImode.
10244 If CODE is 'x', pretend the mode is V4SFmode.
10245 If CODE is 't', pretend the mode is V8SFmode.
10246 If CODE is 'h', pretend the reg is the 'high' byte register.
10247 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10248 If CODE is 'd', duplicate the operand for AVX instruction.
10252 print_reg (rtx x, int code, FILE *file)
10255 bool duplicated = code == 'd' && TARGET_AVX;
10257 gcc_assert (x == pc_rtx
10258 || (REGNO (x) != ARG_POINTER_REGNUM
10259 && REGNO (x) != FRAME_POINTER_REGNUM
10260 && REGNO (x) != FLAGS_REG
10261 && REGNO (x) != FPSR_REG
10262 && REGNO (x) != FPCR_REG));
10264 if (ASSEMBLER_DIALECT == ASM_ATT)
10269 gcc_assert (TARGET_64BIT);
10270 fputs ("rip", file);
10274 if (code == 'w' || MMX_REG_P (x))
10276 else if (code == 'b')
10278 else if (code == 'k')
10280 else if (code == 'q')
10282 else if (code == 'y')
10284 else if (code == 'h')
10286 else if (code == 'x')
10288 else if (code == 't')
10291 code = GET_MODE_SIZE (GET_MODE (x));
10293 /* Irritatingly, AMD extended registers use different naming convention
10294 from the normal registers. */
10295 if (REX_INT_REG_P (x))
10297 gcc_assert (TARGET_64BIT);
10301 error ("extended registers have no high halves");
10304 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10307 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10310 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10313 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10316 error ("unsupported operand size for extended register");
10326 if (STACK_TOP_P (x))
10335 if (! ANY_FP_REG_P (x))
10336 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10341 reg = hi_reg_name[REGNO (x)];
10344 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10346 reg = qi_reg_name[REGNO (x)];
10349 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10351 reg = qi_high_reg_name[REGNO (x)];
10356 gcc_assert (!duplicated);
10358 fputs (hi_reg_name[REGNO (x)] + 1, file);
10363 gcc_unreachable ();
10369 if (ASSEMBLER_DIALECT == ASM_ATT)
10370 fprintf (file, ", %%%s", reg);
10372 fprintf (file, ", %s", reg);
10376 /* Locate some local-dynamic symbol still in use by this function
10377 so that we can print its name in some tls_local_dynamic_base
10381 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10385 if (GET_CODE (x) == SYMBOL_REF
10386 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10388 cfun->machine->some_ld_name = XSTR (x, 0);
10395 static const char *
10396 get_some_local_dynamic_name (void)
10400 if (cfun->machine->some_ld_name)
10401 return cfun->machine->some_ld_name;
10403 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10405 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10406 return cfun->machine->some_ld_name;
10408 gcc_unreachable ();
10411 /* Meaning of CODE:
10412 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10413 C -- print opcode suffix for set/cmov insn.
10414 c -- like C, but print reversed condition
10415 E,e -- likewise, but for compare-and-branch fused insn.
10416 F,f -- likewise, but for floating-point.
10417 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10419 R -- print the prefix for register names.
10420 z -- print the opcode suffix for the size of the current operand.
10421 * -- print a star (in certain assembler syntax)
10422 A -- print an absolute memory reference.
10423 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10424 s -- print a shift double count, followed by the assemblers argument
10426 b -- print the QImode name of the register for the indicated operand.
10427 %b0 would print %al if operands[0] is reg 0.
10428 w -- likewise, print the HImode name of the register.
10429 k -- likewise, print the SImode name of the register.
10430 q -- likewise, print the DImode name of the register.
10431 x -- likewise, print the V4SFmode name of the register.
10432 t -- likewise, print the V8SFmode name of the register.
10433 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10434 y -- print "st(0)" instead of "st" as a register.
10435 d -- print duplicated register operand for AVX instruction.
10436 D -- print condition for SSE cmp instruction.
10437 P -- if PIC, print an @PLT suffix.
10438 X -- don't print any sort of PIC '@' suffix for a symbol.
10439 & -- print some in-use local-dynamic symbol name.
10440 H -- print a memory address offset by 8; used for sse high-parts
10441 Y -- print condition for SSE5 com* instruction.
10442 + -- print a branch hint as 'cs' or 'ds' prefix
10443 ; -- print a semicolon (after prefixes due to bug in older gas).
10447 print_operand (FILE *file, rtx x, int code)
10454 if (ASSEMBLER_DIALECT == ASM_ATT)
10459 assemble_name (file, get_some_local_dynamic_name ());
10463 switch (ASSEMBLER_DIALECT)
10470 /* Intel syntax. For absolute addresses, registers should not
10471 be surrounded by braces. */
10475 PRINT_OPERAND (file, x, 0);
10482 gcc_unreachable ();
10485 PRINT_OPERAND (file, x, 0);
10490 if (ASSEMBLER_DIALECT == ASM_ATT)
10495 if (ASSEMBLER_DIALECT == ASM_ATT)
10500 if (ASSEMBLER_DIALECT == ASM_ATT)
10505 if (ASSEMBLER_DIALECT == ASM_ATT)
10510 if (ASSEMBLER_DIALECT == ASM_ATT)
10515 if (ASSEMBLER_DIALECT == ASM_ATT)
10520 /* 387 opcodes don't get size suffixes if the operands are
10522 if (STACK_REG_P (x))
10525 /* Likewise if using Intel opcodes. */
10526 if (ASSEMBLER_DIALECT == ASM_INTEL)
10529 /* This is the size of op from size of operand. */
10530 switch (GET_MODE_SIZE (GET_MODE (x)))
10539 #ifdef HAVE_GAS_FILDS_FISTS
10549 if (GET_MODE (x) == SFmode)
10564 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10568 #ifdef GAS_MNEMONICS
10583 gcc_unreachable ();
10600 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
10602 PRINT_OPERAND (file, x, 0);
10603 fputs (", ", file);
10608 /* Little bit of braindamage here. The SSE compare instructions
10609 does use completely different names for the comparisons that the
10610 fp conditional moves. */
10613 switch (GET_CODE (x))
10616 fputs ("eq", file);
10619 fputs ("eq_us", file);
10622 fputs ("lt", file);
10625 fputs ("nge", file);
10628 fputs ("le", file);
10631 fputs ("ngt", file);
10634 fputs ("unord", file);
10637 fputs ("neq", file);
10640 fputs ("neq_oq", file);
10643 fputs ("ge", file);
10646 fputs ("nlt", file);
10649 fputs ("gt", file);
10652 fputs ("nle", file);
10655 fputs ("ord", file);
10658 gcc_unreachable ();
10663 switch (GET_CODE (x))
10667 fputs ("eq", file);
10671 fputs ("lt", file);
10675 fputs ("le", file);
10678 fputs ("unord", file);
10682 fputs ("neq", file);
10686 fputs ("nlt", file);
10690 fputs ("nle", file);
10693 fputs ("ord", file);
10696 gcc_unreachable ();
10701 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10702 if (ASSEMBLER_DIALECT == ASM_ATT)
10704 switch (GET_MODE (x))
10706 case HImode: putc ('w', file); break;
10708 case SFmode: putc ('l', file); break;
10710 case DFmode: putc ('q', file); break;
10711 default: gcc_unreachable ();
10718 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
10721 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10722 if (ASSEMBLER_DIALECT == ASM_ATT)
10725 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
10728 /* Like above, but reverse condition */
10730 /* Check to see if argument to %c is really a constant
10731 and not a condition code which needs to be reversed. */
10732 if (!COMPARISON_P (x))
10734 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
10737 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
10740 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
10741 if (ASSEMBLER_DIALECT == ASM_ATT)
10744 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
10748 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
10752 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
10756 /* It doesn't actually matter what mode we use here, as we're
10757 only going to use this for printing. */
10758 x = adjust_address_nv (x, DImode, 8);
10766 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
10769 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
10772 int pred_val = INTVAL (XEXP (x, 0));
10774 if (pred_val < REG_BR_PROB_BASE * 45 / 100
10775 || pred_val > REG_BR_PROB_BASE * 55 / 100)
10777 int taken = pred_val > REG_BR_PROB_BASE / 2;
10778 int cputaken = final_forward_branch_p (current_output_insn) == 0;
10780 /* Emit hints only in the case default branch prediction
10781 heuristics would fail. */
10782 if (taken != cputaken)
10784 /* We use 3e (DS) prefix for taken branches and
10785 2e (CS) prefix for not taken branches. */
10787 fputs ("ds ; ", file);
10789 fputs ("cs ; ", file);
10797 switch (GET_CODE (x))
10800 fputs ("neq", file);
10803 fputs ("eq", file);
10807 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
10811 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
10815 fputs ("le", file);
10819 fputs ("lt", file);
10822 fputs ("unord", file);
10825 fputs ("ord", file);
10828 fputs ("ueq", file);
10831 fputs ("nlt", file);
10834 fputs ("nle", file);
10837 fputs ("ule", file);
10840 fputs ("ult", file);
10843 fputs ("une", file);
10846 gcc_unreachable ();
10852 fputs (" ; ", file);
10859 output_operand_lossage ("invalid operand code '%c'", code);
10864 print_reg (x, code, file);
10866 else if (MEM_P (x))
10868 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
10869 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
10870 && GET_MODE (x) != BLKmode)
10873 switch (GET_MODE_SIZE (GET_MODE (x)))
10875 case 1: size = "BYTE"; break;
10876 case 2: size = "WORD"; break;
10877 case 4: size = "DWORD"; break;
10878 case 8: size = "QWORD"; break;
10879 case 12: size = "XWORD"; break;
10881 if (GET_MODE (x) == XFmode)
10887 gcc_unreachable ();
10890 /* Check for explicit size override (codes 'b', 'w' and 'k') */
10893 else if (code == 'w')
10895 else if (code == 'k')
10898 fputs (size, file);
10899 fputs (" PTR ", file);
10903 /* Avoid (%rip) for call operands. */
10904 if (CONSTANT_ADDRESS_P (x) && code == 'P'
10905 && !CONST_INT_P (x))
10906 output_addr_const (file, x);
10907 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
10908 output_operand_lossage ("invalid constraints for operand");
10910 output_address (x);
10913 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
10918 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10919 REAL_VALUE_TO_TARGET_SINGLE (r, l);
10921 if (ASSEMBLER_DIALECT == ASM_ATT)
10923 fprintf (file, "0x%08lx", (long unsigned int) l);
10926 /* These float cases don't actually occur as immediate operands. */
10927 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
10931 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10932 fprintf (file, "%s", dstr);
10935 else if (GET_CODE (x) == CONST_DOUBLE
10936 && GET_MODE (x) == XFmode)
10940 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
10941 fprintf (file, "%s", dstr);
10946 /* We have patterns that allow zero sets of memory, for instance.
10947 In 64-bit mode, we should probably support all 8-byte vectors,
10948 since we can in fact encode that into an immediate. */
10949 if (GET_CODE (x) == CONST_VECTOR)
10951 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
10957 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
10959 if (ASSEMBLER_DIALECT == ASM_ATT)
10962 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
10963 || GET_CODE (x) == LABEL_REF)
10965 if (ASSEMBLER_DIALECT == ASM_ATT)
10968 fputs ("OFFSET FLAT:", file);
10971 if (CONST_INT_P (x))
10972 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10974 output_pic_addr_const (file, x, code);
10976 output_addr_const (file, x);
10980 /* Print a memory operand whose address is ADDR. */
10983 print_operand_address (FILE *file, rtx addr)
10985 struct ix86_address parts;
10986 rtx base, index, disp;
10988 int ok = ix86_decompose_address (addr, &parts);
10993 index = parts.index;
10995 scale = parts.scale;
11003 if (ASSEMBLER_DIALECT == ASM_ATT)
11005 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11008 gcc_unreachable ();
11011 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11012 if (TARGET_64BIT && !base && !index)
11016 if (GET_CODE (disp) == CONST
11017 && GET_CODE (XEXP (disp, 0)) == PLUS
11018 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11019 symbol = XEXP (XEXP (disp, 0), 0);
11021 if (GET_CODE (symbol) == LABEL_REF
11022 || (GET_CODE (symbol) == SYMBOL_REF
11023 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11026 if (!base && !index)
11028 /* Displacement only requires special attention. */
11030 if (CONST_INT_P (disp))
11032 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11033 fputs ("ds:", file);
11034 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11037 output_pic_addr_const (file, disp, 0);
11039 output_addr_const (file, disp);
11043 if (ASSEMBLER_DIALECT == ASM_ATT)
11048 output_pic_addr_const (file, disp, 0);
11049 else if (GET_CODE (disp) == LABEL_REF)
11050 output_asm_label (disp);
11052 output_addr_const (file, disp);
11057 print_reg (base, 0, file);
11061 print_reg (index, 0, file);
11063 fprintf (file, ",%d", scale);
11069 rtx offset = NULL_RTX;
11073 /* Pull out the offset of a symbol; print any symbol itself. */
11074 if (GET_CODE (disp) == CONST
11075 && GET_CODE (XEXP (disp, 0)) == PLUS
11076 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11078 offset = XEXP (XEXP (disp, 0), 1);
11079 disp = gen_rtx_CONST (VOIDmode,
11080 XEXP (XEXP (disp, 0), 0));
11084 output_pic_addr_const (file, disp, 0);
11085 else if (GET_CODE (disp) == LABEL_REF)
11086 output_asm_label (disp);
11087 else if (CONST_INT_P (disp))
11090 output_addr_const (file, disp);
11096 print_reg (base, 0, file);
11099 if (INTVAL (offset) >= 0)
11101 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11105 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11112 print_reg (index, 0, file);
11114 fprintf (file, "*%d", scale);
11122 output_addr_const_extra (FILE *file, rtx x)
11126 if (GET_CODE (x) != UNSPEC)
11129 op = XVECEXP (x, 0, 0);
11130 switch (XINT (x, 1))
11132 case UNSPEC_GOTTPOFF:
11133 output_addr_const (file, op);
11134 /* FIXME: This might be @TPOFF in Sun ld. */
11135 fputs ("@GOTTPOFF", file);
11138 output_addr_const (file, op);
11139 fputs ("@TPOFF", file);
11141 case UNSPEC_NTPOFF:
11142 output_addr_const (file, op);
11144 fputs ("@TPOFF", file);
11146 fputs ("@NTPOFF", file);
11148 case UNSPEC_DTPOFF:
11149 output_addr_const (file, op);
11150 fputs ("@DTPOFF", file);
11152 case UNSPEC_GOTNTPOFF:
11153 output_addr_const (file, op);
11155 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11156 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11158 fputs ("@GOTNTPOFF", file);
11160 case UNSPEC_INDNTPOFF:
11161 output_addr_const (file, op);
11162 fputs ("@INDNTPOFF", file);
11172 /* Split one or more DImode RTL references into pairs of SImode
11173 references. The RTL can be REG, offsettable MEM, integer constant, or
11174 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11175 split and "num" is its length. lo_half and hi_half are output arrays
11176 that parallel "operands". */
11179 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11183 rtx op = operands[num];
11185 /* simplify_subreg refuse to split volatile memory addresses,
11186 but we still have to handle it. */
11189 lo_half[num] = adjust_address (op, SImode, 0);
11190 hi_half[num] = adjust_address (op, SImode, 4);
11194 lo_half[num] = simplify_gen_subreg (SImode, op,
11195 GET_MODE (op) == VOIDmode
11196 ? DImode : GET_MODE (op), 0);
11197 hi_half[num] = simplify_gen_subreg (SImode, op,
11198 GET_MODE (op) == VOIDmode
11199 ? DImode : GET_MODE (op), 4);
11203 /* Split one or more TImode RTL references into pairs of DImode
11204 references. The RTL can be REG, offsettable MEM, integer constant, or
11205 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11206 split and "num" is its length. lo_half and hi_half are output arrays
11207 that parallel "operands". */
11210 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11214 rtx op = operands[num];
11216 /* simplify_subreg refuse to split volatile memory addresses, but we
11217 still have to handle it. */
11220 lo_half[num] = adjust_address (op, DImode, 0);
11221 hi_half[num] = adjust_address (op, DImode, 8);
11225 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11226 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11231 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11232 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11233 is the expression of the binary operation. The output may either be
11234 emitted here, or returned to the caller, like all output_* functions.
11236 There is no guarantee that the operands are the same mode, as they
11237 might be within FLOAT or FLOAT_EXTEND expressions. */
11239 #ifndef SYSV386_COMPAT
11240 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11241 wants to fix the assemblers because that causes incompatibility
11242 with gcc. No-one wants to fix gcc because that causes
11243 incompatibility with assemblers... You can use the option of
11244 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11245 #define SYSV386_COMPAT 1
11249 output_387_binary_op (rtx insn, rtx *operands)
11251 static char buf[40];
11254 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11256 #ifdef ENABLE_CHECKING
11257 /* Even if we do not want to check the inputs, this documents input
11258 constraints. Which helps in understanding the following code. */
11259 if (STACK_REG_P (operands[0])
11260 && ((REG_P (operands[1])
11261 && REGNO (operands[0]) == REGNO (operands[1])
11262 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11263 || (REG_P (operands[2])
11264 && REGNO (operands[0]) == REGNO (operands[2])
11265 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11266 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11269 gcc_assert (is_sse);
11272 switch (GET_CODE (operands[3]))
11275 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11276 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11284 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11285 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11293 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11294 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11302 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11303 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11311 gcc_unreachable ();
11318 strcpy (buf, ssep);
11319 if (GET_MODE (operands[0]) == SFmode)
11320 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11322 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11326 strcpy (buf, ssep + 1);
11327 if (GET_MODE (operands[0]) == SFmode)
11328 strcat (buf, "ss\t{%2, %0|%0, %2}");
11330 strcat (buf, "sd\t{%2, %0|%0, %2}");
11336 switch (GET_CODE (operands[3]))
11340 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11342 rtx temp = operands[2];
11343 operands[2] = operands[1];
11344 operands[1] = temp;
11347 /* know operands[0] == operands[1]. */
11349 if (MEM_P (operands[2]))
11355 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11357 if (STACK_TOP_P (operands[0]))
11358 /* How is it that we are storing to a dead operand[2]?
11359 Well, presumably operands[1] is dead too. We can't
11360 store the result to st(0) as st(0) gets popped on this
11361 instruction. Instead store to operands[2] (which I
11362 think has to be st(1)). st(1) will be popped later.
11363 gcc <= 2.8.1 didn't have this check and generated
11364 assembly code that the Unixware assembler rejected. */
11365 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11367 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11371 if (STACK_TOP_P (operands[0]))
11372 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11374 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11379 if (MEM_P (operands[1]))
11385 if (MEM_P (operands[2]))
11391 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11394 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11395 derived assemblers, confusingly reverse the direction of
11396 the operation for fsub{r} and fdiv{r} when the
11397 destination register is not st(0). The Intel assembler
11398 doesn't have this brain damage. Read !SYSV386_COMPAT to
11399 figure out what the hardware really does. */
11400 if (STACK_TOP_P (operands[0]))
11401 p = "{p\t%0, %2|rp\t%2, %0}";
11403 p = "{rp\t%2, %0|p\t%0, %2}";
11405 if (STACK_TOP_P (operands[0]))
11406 /* As above for fmul/fadd, we can't store to st(0). */
11407 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11409 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11414 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11417 if (STACK_TOP_P (operands[0]))
11418 p = "{rp\t%0, %1|p\t%1, %0}";
11420 p = "{p\t%1, %0|rp\t%0, %1}";
11422 if (STACK_TOP_P (operands[0]))
11423 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11425 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11430 if (STACK_TOP_P (operands[0]))
11432 if (STACK_TOP_P (operands[1]))
11433 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11435 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11438 else if (STACK_TOP_P (operands[1]))
11441 p = "{\t%1, %0|r\t%0, %1}";
11443 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11449 p = "{r\t%2, %0|\t%0, %2}";
11451 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11457 gcc_unreachable ();
11464 /* Return needed mode for entity in optimize_mode_switching pass. */
11467 ix86_mode_needed (int entity, rtx insn)
11469 enum attr_i387_cw mode;
11471 /* The mode UNINITIALIZED is used to store control word after a
11472 function call or ASM pattern. The mode ANY specify that function
11473 has no requirements on the control word and make no changes in the
11474 bits we are interested in. */
11477 || (NONJUMP_INSN_P (insn)
11478 && (asm_noperands (PATTERN (insn)) >= 0
11479 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11480 return I387_CW_UNINITIALIZED;
11482 if (recog_memoized (insn) < 0)
11483 return I387_CW_ANY;
11485 mode = get_attr_i387_cw (insn);
11490 if (mode == I387_CW_TRUNC)
11495 if (mode == I387_CW_FLOOR)
11500 if (mode == I387_CW_CEIL)
11505 if (mode == I387_CW_MASK_PM)
11510 gcc_unreachable ();
11513 return I387_CW_ANY;
11516 /* Output code to initialize control word copies used by trunc?f?i and
11517 rounding patterns. CURRENT_MODE is set to current control word,
11518 while NEW_MODE is set to new control word. */
11521 emit_i387_cw_initialization (int mode)
11523 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
11526 enum ix86_stack_slot slot;
11528 rtx reg = gen_reg_rtx (HImode);
11530 emit_insn (gen_x86_fnstcw_1 (stored_mode));
11531 emit_move_insn (reg, copy_rtx (stored_mode));
11533 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
11534 || optimize_function_for_size_p (cfun))
11538 case I387_CW_TRUNC:
11539 /* round toward zero (truncate) */
11540 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
11541 slot = SLOT_CW_TRUNC;
11544 case I387_CW_FLOOR:
11545 /* round down toward -oo */
11546 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11547 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
11548 slot = SLOT_CW_FLOOR;
11552 /* round up toward +oo */
11553 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
11554 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
11555 slot = SLOT_CW_CEIL;
11558 case I387_CW_MASK_PM:
11559 /* mask precision exception for nearbyint() */
11560 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11561 slot = SLOT_CW_MASK_PM;
11565 gcc_unreachable ();
11572 case I387_CW_TRUNC:
11573 /* round toward zero (truncate) */
11574 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
11575 slot = SLOT_CW_TRUNC;
11578 case I387_CW_FLOOR:
11579 /* round down toward -oo */
11580 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
11581 slot = SLOT_CW_FLOOR;
11585 /* round up toward +oo */
11586 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
11587 slot = SLOT_CW_CEIL;
11590 case I387_CW_MASK_PM:
11591 /* mask precision exception for nearbyint() */
11592 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
11593 slot = SLOT_CW_MASK_PM;
11597 gcc_unreachable ();
11601 gcc_assert (slot < MAX_386_STACK_LOCALS);
11603 new_mode = assign_386_stack_local (HImode, slot);
11604 emit_move_insn (new_mode, reg);
11607 /* Output code for INSN to convert a float to a signed int. OPERANDS
11608 are the insn operands. The output may be [HSD]Imode and the input
11609 operand may be [SDX]Fmode. */
11612 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
11614 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11615 int dimode_p = GET_MODE (operands[0]) == DImode;
11616 int round_mode = get_attr_i387_cw (insn);
11618 /* Jump through a hoop or two for DImode, since the hardware has no
11619 non-popping instruction. We used to do this a different way, but
11620 that was somewhat fragile and broke with post-reload splitters. */
11621 if ((dimode_p || fisttp) && !stack_top_dies)
11622 output_asm_insn ("fld\t%y1", operands);
11624 gcc_assert (STACK_TOP_P (operands[1]));
11625 gcc_assert (MEM_P (operands[0]));
11626 gcc_assert (GET_MODE (operands[1]) != TFmode);
11629 output_asm_insn ("fisttp%z0\t%0", operands);
11632 if (round_mode != I387_CW_ANY)
11633 output_asm_insn ("fldcw\t%3", operands);
11634 if (stack_top_dies || dimode_p)
11635 output_asm_insn ("fistp%z0\t%0", operands);
11637 output_asm_insn ("fist%z0\t%0", operands);
11638 if (round_mode != I387_CW_ANY)
11639 output_asm_insn ("fldcw\t%2", operands);
11645 /* Output code for x87 ffreep insn. The OPNO argument, which may only
11646 have the values zero or one, indicates the ffreep insn's operand
11647 from the OPERANDS array. */
11649 static const char *
11650 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
11652 if (TARGET_USE_FFREEP)
11653 #if HAVE_AS_IX86_FFREEP
11654 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
11657 static char retval[] = ".word\t0xc_df";
11658 int regno = REGNO (operands[opno]);
11660 gcc_assert (FP_REGNO_P (regno));
11662 retval[9] = '0' + (regno - FIRST_STACK_REG);
11667 return opno ? "fstp\t%y1" : "fstp\t%y0";
11671 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
11672 should be used. UNORDERED_P is true when fucom should be used. */
11675 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
11677 int stack_top_dies;
11678 rtx cmp_op0, cmp_op1;
11679 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
11683 cmp_op0 = operands[0];
11684 cmp_op1 = operands[1];
11688 cmp_op0 = operands[1];
11689 cmp_op1 = operands[2];
11694 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
11695 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
11696 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
11697 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
11699 if (GET_MODE (operands[0]) == SFmode)
11701 return &ucomiss[TARGET_AVX ? 0 : 1];
11703 return &comiss[TARGET_AVX ? 0 : 1];
11706 return &ucomisd[TARGET_AVX ? 0 : 1];
11708 return &comisd[TARGET_AVX ? 0 : 1];
11711 gcc_assert (STACK_TOP_P (cmp_op0));
11713 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
11715 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
11717 if (stack_top_dies)
11719 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
11720 return output_387_ffreep (operands, 1);
11723 return "ftst\n\tfnstsw\t%0";
11726 if (STACK_REG_P (cmp_op1)
11728 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
11729 && REGNO (cmp_op1) != FIRST_STACK_REG)
11731 /* If both the top of the 387 stack dies, and the other operand
11732 is also a stack register that dies, then this must be a
11733 `fcompp' float compare */
11737 /* There is no double popping fcomi variant. Fortunately,
11738 eflags is immune from the fstp's cc clobbering. */
11740 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
11742 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
11743 return output_387_ffreep (operands, 0);
11748 return "fucompp\n\tfnstsw\t%0";
11750 return "fcompp\n\tfnstsw\t%0";
11755 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
11757 static const char * const alt[16] =
11759 "fcom%z2\t%y2\n\tfnstsw\t%0",
11760 "fcomp%z2\t%y2\n\tfnstsw\t%0",
11761 "fucom%z2\t%y2\n\tfnstsw\t%0",
11762 "fucomp%z2\t%y2\n\tfnstsw\t%0",
11764 "ficom%z2\t%y2\n\tfnstsw\t%0",
11765 "ficomp%z2\t%y2\n\tfnstsw\t%0",
11769 "fcomi\t{%y1, %0|%0, %y1}",
11770 "fcomip\t{%y1, %0|%0, %y1}",
11771 "fucomi\t{%y1, %0|%0, %y1}",
11772 "fucomip\t{%y1, %0|%0, %y1}",
11783 mask = eflags_p << 3;
11784 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
11785 mask |= unordered_p << 1;
11786 mask |= stack_top_dies;
11788 gcc_assert (mask < 16);
11797 ix86_output_addr_vec_elt (FILE *file, int value)
11799 const char *directive = ASM_LONG;
11803 directive = ASM_QUAD;
11805 gcc_assert (!TARGET_64BIT);
11808 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
11812 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
11814 const char *directive = ASM_LONG;
11817 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
11818 directive = ASM_QUAD;
11820 gcc_assert (!TARGET_64BIT);
11822 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
11823 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
11824 fprintf (file, "%s%s%d-%s%d\n",
11825 directive, LPREFIX, value, LPREFIX, rel);
11826 else if (HAVE_AS_GOTOFF_IN_DATA)
11827 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
11829 else if (TARGET_MACHO)
11831 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
11832 machopic_output_function_base_name (file);
11833 fprintf(file, "\n");
11837 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
11838 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
11841 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
11845 ix86_expand_clear (rtx dest)
11849 /* We play register width games, which are only valid after reload. */
11850 gcc_assert (reload_completed);
11852 /* Avoid HImode and its attendant prefix byte. */
11853 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
11854 dest = gen_rtx_REG (SImode, REGNO (dest));
11855 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
11857 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
11858 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
11860 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11861 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
11867 /* X is an unchanging MEM. If it is a constant pool reference, return
11868 the constant pool rtx, else NULL. */
11871 maybe_get_pool_constant (rtx x)
11873 x = ix86_delegitimize_address (XEXP (x, 0));
11875 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
11876 return get_pool_constant (x);
11882 ix86_expand_move (enum machine_mode mode, rtx operands[])
11885 enum tls_model model;
11890 if (GET_CODE (op1) == SYMBOL_REF)
11892 model = SYMBOL_REF_TLS_MODEL (op1);
11895 op1 = legitimize_tls_address (op1, model, true);
11896 op1 = force_operand (op1, op0);
11900 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11901 && SYMBOL_REF_DLLIMPORT_P (op1))
11902 op1 = legitimize_dllimport_symbol (op1, false);
11904 else if (GET_CODE (op1) == CONST
11905 && GET_CODE (XEXP (op1, 0)) == PLUS
11906 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
11908 rtx addend = XEXP (XEXP (op1, 0), 1);
11909 rtx symbol = XEXP (XEXP (op1, 0), 0);
11912 model = SYMBOL_REF_TLS_MODEL (symbol);
11914 tmp = legitimize_tls_address (symbol, model, true);
11915 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11916 && SYMBOL_REF_DLLIMPORT_P (symbol))
11917 tmp = legitimize_dllimport_symbol (symbol, true);
11921 tmp = force_operand (tmp, NULL);
11922 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
11923 op0, 1, OPTAB_DIRECT);
11929 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
11931 if (TARGET_MACHO && !TARGET_64BIT)
11936 rtx temp = ((reload_in_progress
11937 || ((op0 && REG_P (op0))
11939 ? op0 : gen_reg_rtx (Pmode));
11940 op1 = machopic_indirect_data_reference (op1, temp);
11941 op1 = machopic_legitimize_pic_address (op1, mode,
11942 temp == op1 ? 0 : temp);
11944 else if (MACHOPIC_INDIRECT)
11945 op1 = machopic_indirect_data_reference (op1, 0);
11953 op1 = force_reg (Pmode, op1);
11954 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
11956 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
11957 op1 = legitimize_pic_address (op1, reg);
11966 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
11967 || !push_operand (op0, mode))
11969 op1 = force_reg (mode, op1);
11971 if (push_operand (op0, mode)
11972 && ! general_no_elim_operand (op1, mode))
11973 op1 = copy_to_mode_reg (mode, op1);
11975 /* Force large constants in 64bit compilation into register
11976 to get them CSEed. */
11977 if (can_create_pseudo_p ()
11978 && (mode == DImode) && TARGET_64BIT
11979 && immediate_operand (op1, mode)
11980 && !x86_64_zext_immediate_operand (op1, VOIDmode)
11981 && !register_operand (op0, mode)
11983 op1 = copy_to_mode_reg (mode, op1);
11985 if (can_create_pseudo_p ()
11986 && FLOAT_MODE_P (mode)
11987 && GET_CODE (op1) == CONST_DOUBLE)
11989 /* If we are loading a floating point constant to a register,
11990 force the value to memory now, since we'll get better code
11991 out the back end. */
11993 op1 = validize_mem (force_const_mem (mode, op1));
11994 if (!register_operand (op0, mode))
11996 rtx temp = gen_reg_rtx (mode);
11997 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
11998 emit_move_insn (op0, temp);
12004 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12008 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12010 rtx op0 = operands[0], op1 = operands[1];
12011 unsigned int align = GET_MODE_ALIGNMENT (mode);
12013 /* Force constants other than zero into memory. We do not know how
12014 the instructions used to build constants modify the upper 64 bits
12015 of the register, once we have that information we may be able
12016 to handle some of them more efficiently. */
12017 if (can_create_pseudo_p ()
12018 && register_operand (op0, mode)
12019 && (CONSTANT_P (op1)
12020 || (GET_CODE (op1) == SUBREG
12021 && CONSTANT_P (SUBREG_REG (op1))))
12022 && standard_sse_constant_p (op1) <= 0)
12023 op1 = validize_mem (force_const_mem (mode, op1));
12025 /* We need to check memory alignment for SSE mode since attribute
12026 can make operands unaligned. */
12027 if (can_create_pseudo_p ()
12028 && SSE_REG_MODE_P (mode)
12029 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12030 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12034 /* ix86_expand_vector_move_misalign() does not like constants ... */
12035 if (CONSTANT_P (op1)
12036 || (GET_CODE (op1) == SUBREG
12037 && CONSTANT_P (SUBREG_REG (op1))))
12038 op1 = validize_mem (force_const_mem (mode, op1));
12040 /* ... nor both arguments in memory. */
12041 if (!register_operand (op0, mode)
12042 && !register_operand (op1, mode))
12043 op1 = force_reg (mode, op1);
12045 tmp[0] = op0; tmp[1] = op1;
12046 ix86_expand_vector_move_misalign (mode, tmp);
12050 /* Make operand1 a register if it isn't already. */
12051 if (can_create_pseudo_p ()
12052 && !register_operand (op0, mode)
12053 && !register_operand (op1, mode))
12055 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12059 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12062 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12063 straight to ix86_expand_vector_move. */
12064 /* Code generation for scalar reg-reg moves of single and double precision data:
12065 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12069 if (x86_sse_partial_reg_dependency == true)
12074 Code generation for scalar loads of double precision data:
12075 if (x86_sse_split_regs == true)
12076 movlpd mem, reg (gas syntax)
12080 Code generation for unaligned packed loads of single precision data
12081 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12082 if (x86_sse_unaligned_move_optimal)
12085 if (x86_sse_partial_reg_dependency == true)
12097 Code generation for unaligned packed loads of double precision data
12098 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12099 if (x86_sse_unaligned_move_optimal)
12102 if (x86_sse_split_regs == true)
12115 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12124 switch (GET_MODE_CLASS (mode))
12126 case MODE_VECTOR_INT:
12128 switch (GET_MODE_SIZE (mode))
12131 op0 = gen_lowpart (V16QImode, op0);
12132 op1 = gen_lowpart (V16QImode, op1);
12133 emit_insn (gen_avx_movdqu (op0, op1));
12136 op0 = gen_lowpart (V32QImode, op0);
12137 op1 = gen_lowpart (V32QImode, op1);
12138 emit_insn (gen_avx_movdqu256 (op0, op1));
12141 gcc_unreachable ();
12144 case MODE_VECTOR_FLOAT:
12145 op0 = gen_lowpart (mode, op0);
12146 op1 = gen_lowpart (mode, op1);
12151 emit_insn (gen_avx_movups (op0, op1));
12154 emit_insn (gen_avx_movups256 (op0, op1));
12157 emit_insn (gen_avx_movupd (op0, op1));
12160 emit_insn (gen_avx_movupd256 (op0, op1));
12163 gcc_unreachable ();
12168 gcc_unreachable ();
12176 /* If we're optimizing for size, movups is the smallest. */
12177 if (optimize_insn_for_size_p ())
12179 op0 = gen_lowpart (V4SFmode, op0);
12180 op1 = gen_lowpart (V4SFmode, op1);
12181 emit_insn (gen_sse_movups (op0, op1));
12185 /* ??? If we have typed data, then it would appear that using
12186 movdqu is the only way to get unaligned data loaded with
12188 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12190 op0 = gen_lowpart (V16QImode, op0);
12191 op1 = gen_lowpart (V16QImode, op1);
12192 emit_insn (gen_sse2_movdqu (op0, op1));
12196 if (TARGET_SSE2 && mode == V2DFmode)
12200 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12202 op0 = gen_lowpart (V2DFmode, op0);
12203 op1 = gen_lowpart (V2DFmode, op1);
12204 emit_insn (gen_sse2_movupd (op0, op1));
12208 /* When SSE registers are split into halves, we can avoid
12209 writing to the top half twice. */
12210 if (TARGET_SSE_SPLIT_REGS)
12212 emit_clobber (op0);
12217 /* ??? Not sure about the best option for the Intel chips.
12218 The following would seem to satisfy; the register is
12219 entirely cleared, breaking the dependency chain. We
12220 then store to the upper half, with a dependency depth
12221 of one. A rumor has it that Intel recommends two movsd
12222 followed by an unpacklpd, but this is unconfirmed. And
12223 given that the dependency depth of the unpacklpd would
12224 still be one, I'm not sure why this would be better. */
12225 zero = CONST0_RTX (V2DFmode);
12228 m = adjust_address (op1, DFmode, 0);
12229 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12230 m = adjust_address (op1, DFmode, 8);
12231 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12235 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12237 op0 = gen_lowpart (V4SFmode, op0);
12238 op1 = gen_lowpart (V4SFmode, op1);
12239 emit_insn (gen_sse_movups (op0, op1));
12243 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12244 emit_move_insn (op0, CONST0_RTX (mode));
12246 emit_clobber (op0);
12248 if (mode != V4SFmode)
12249 op0 = gen_lowpart (V4SFmode, op0);
12250 m = adjust_address (op1, V2SFmode, 0);
12251 emit_insn (gen_sse_loadlps (op0, op0, m));
12252 m = adjust_address (op1, V2SFmode, 8);
12253 emit_insn (gen_sse_loadhps (op0, op0, m));
12256 else if (MEM_P (op0))
12258 /* If we're optimizing for size, movups is the smallest. */
12259 if (optimize_insn_for_size_p ())
12261 op0 = gen_lowpart (V4SFmode, op0);
12262 op1 = gen_lowpart (V4SFmode, op1);
12263 emit_insn (gen_sse_movups (op0, op1));
12267 /* ??? Similar to above, only less clear because of quote
12268 typeless stores unquote. */
12269 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12270 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12272 op0 = gen_lowpart (V16QImode, op0);
12273 op1 = gen_lowpart (V16QImode, op1);
12274 emit_insn (gen_sse2_movdqu (op0, op1));
12278 if (TARGET_SSE2 && mode == V2DFmode)
12280 m = adjust_address (op0, DFmode, 0);
12281 emit_insn (gen_sse2_storelpd (m, op1));
12282 m = adjust_address (op0, DFmode, 8);
12283 emit_insn (gen_sse2_storehpd (m, op1));
12287 if (mode != V4SFmode)
12288 op1 = gen_lowpart (V4SFmode, op1);
12289 m = adjust_address (op0, V2SFmode, 0);
12290 emit_insn (gen_sse_storelps (m, op1));
12291 m = adjust_address (op0, V2SFmode, 8);
12292 emit_insn (gen_sse_storehps (m, op1));
12296 gcc_unreachable ();
12299 /* Expand a push in MODE. This is some mode for which we do not support
12300 proper push instructions, at least from the registers that we expect
12301 the value to live in. */
12304 ix86_expand_push (enum machine_mode mode, rtx x)
12308 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12309 GEN_INT (-GET_MODE_SIZE (mode)),
12310 stack_pointer_rtx, 1, OPTAB_DIRECT);
12311 if (tmp != stack_pointer_rtx)
12312 emit_move_insn (stack_pointer_rtx, tmp);
12314 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12315 emit_move_insn (tmp, x);
12318 /* Helper function of ix86_fixup_binary_operands to canonicalize
12319 operand order. Returns true if the operands should be swapped. */
12322 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12325 rtx dst = operands[0];
12326 rtx src1 = operands[1];
12327 rtx src2 = operands[2];
12329 /* If the operation is not commutative, we can't do anything. */
12330 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12333 /* Highest priority is that src1 should match dst. */
12334 if (rtx_equal_p (dst, src1))
12336 if (rtx_equal_p (dst, src2))
12339 /* Next highest priority is that immediate constants come second. */
12340 if (immediate_operand (src2, mode))
12342 if (immediate_operand (src1, mode))
12345 /* Lowest priority is that memory references should come second. */
12355 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12356 destination to use for the operation. If different from the true
12357 destination in operands[0], a copy operation will be required. */
12360 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12363 rtx dst = operands[0];
12364 rtx src1 = operands[1];
12365 rtx src2 = operands[2];
12367 /* Canonicalize operand order. */
12368 if (ix86_swap_binary_operands_p (code, mode, operands))
12372 /* It is invalid to swap operands of different modes. */
12373 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12380 /* Both source operands cannot be in memory. */
12381 if (MEM_P (src1) && MEM_P (src2))
12383 /* Optimization: Only read from memory once. */
12384 if (rtx_equal_p (src1, src2))
12386 src2 = force_reg (mode, src2);
12390 src2 = force_reg (mode, src2);
12393 /* If the destination is memory, and we do not have matching source
12394 operands, do things in registers. */
12395 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12396 dst = gen_reg_rtx (mode);
12398 /* Source 1 cannot be a constant. */
12399 if (CONSTANT_P (src1))
12400 src1 = force_reg (mode, src1);
12402 /* Source 1 cannot be a non-matching memory. */
12403 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12404 src1 = force_reg (mode, src1);
12406 operands[1] = src1;
12407 operands[2] = src2;
12411 /* Similarly, but assume that the destination has already been
12412 set up properly. */
12415 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12416 enum machine_mode mode, rtx operands[])
12418 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12419 gcc_assert (dst == operands[0]);
12422 /* Attempt to expand a binary operator. Make the expansion closer to the
12423 actual machine, then just general_operand, which will allow 3 separate
12424 memory references (one output, two input) in a single insn. */
12427 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12430 rtx src1, src2, dst, op, clob;
12432 dst = ix86_fixup_binary_operands (code, mode, operands);
12433 src1 = operands[1];
12434 src2 = operands[2];
12436 /* Emit the instruction. */
12438 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12439 if (reload_in_progress)
12441 /* Reload doesn't know about the flags register, and doesn't know that
12442 it doesn't want to clobber it. We can only do this with PLUS. */
12443 gcc_assert (code == PLUS);
12448 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12449 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12452 /* Fix up the destination if needed. */
12453 if (dst != operands[0])
12454 emit_move_insn (operands[0], dst);
12457 /* Return TRUE or FALSE depending on whether the binary operator meets the
12458 appropriate constraints. */
12461 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12464 rtx dst = operands[0];
12465 rtx src1 = operands[1];
12466 rtx src2 = operands[2];
12468 /* Both source operands cannot be in memory. */
12469 if (MEM_P (src1) && MEM_P (src2))
12472 /* Canonicalize operand order for commutative operators. */
12473 if (ix86_swap_binary_operands_p (code, mode, operands))
12480 /* If the destination is memory, we must have a matching source operand. */
12481 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12484 /* Source 1 cannot be a constant. */
12485 if (CONSTANT_P (src1))
12488 /* Source 1 cannot be a non-matching memory. */
12489 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12495 /* Attempt to expand a unary operator. Make the expansion closer to the
12496 actual machine, then just general_operand, which will allow 2 separate
12497 memory references (one output, one input) in a single insn. */
12500 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12503 int matching_memory;
12504 rtx src, dst, op, clob;
12509 /* If the destination is memory, and we do not have matching source
12510 operands, do things in registers. */
12511 matching_memory = 0;
12514 if (rtx_equal_p (dst, src))
12515 matching_memory = 1;
12517 dst = gen_reg_rtx (mode);
12520 /* When source operand is memory, destination must match. */
12521 if (MEM_P (src) && !matching_memory)
12522 src = force_reg (mode, src);
12524 /* Emit the instruction. */
12526 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
12527 if (reload_in_progress || code == NOT)
12529 /* Reload doesn't know about the flags register, and doesn't know that
12530 it doesn't want to clobber it. */
12531 gcc_assert (code == NOT);
12536 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12537 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12540 /* Fix up the destination if needed. */
12541 if (dst != operands[0])
12542 emit_move_insn (operands[0], dst);
12545 /* Return TRUE or FALSE depending on whether the unary operator meets the
12546 appropriate constraints. */
12549 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
12550 enum machine_mode mode ATTRIBUTE_UNUSED,
12551 rtx operands[2] ATTRIBUTE_UNUSED)
12553 /* If one of operands is memory, source and destination must match. */
12554 if ((MEM_P (operands[0])
12555 || MEM_P (operands[1]))
12556 && ! rtx_equal_p (operands[0], operands[1]))
12561 /* Post-reload splitter for converting an SF or DFmode value in an
12562 SSE register into an unsigned SImode. */
12565 ix86_split_convert_uns_si_sse (rtx operands[])
12567 enum machine_mode vecmode;
12568 rtx value, large, zero_or_two31, input, two31, x;
12570 large = operands[1];
12571 zero_or_two31 = operands[2];
12572 input = operands[3];
12573 two31 = operands[4];
12574 vecmode = GET_MODE (large);
12575 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
12577 /* Load up the value into the low element. We must ensure that the other
12578 elements are valid floats -- zero is the easiest such value. */
12581 if (vecmode == V4SFmode)
12582 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
12584 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
12588 input = gen_rtx_REG (vecmode, REGNO (input));
12589 emit_move_insn (value, CONST0_RTX (vecmode));
12590 if (vecmode == V4SFmode)
12591 emit_insn (gen_sse_movss (value, value, input));
12593 emit_insn (gen_sse2_movsd (value, value, input));
12596 emit_move_insn (large, two31);
12597 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
12599 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
12600 emit_insn (gen_rtx_SET (VOIDmode, large, x));
12602 x = gen_rtx_AND (vecmode, zero_or_two31, large);
12603 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
12605 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
12606 emit_insn (gen_rtx_SET (VOIDmode, value, x));
12608 large = gen_rtx_REG (V4SImode, REGNO (large));
12609 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
12611 x = gen_rtx_REG (V4SImode, REGNO (value));
12612 if (vecmode == V4SFmode)
12613 emit_insn (gen_sse2_cvttps2dq (x, value));
12615 emit_insn (gen_sse2_cvttpd2dq (x, value));
12618 emit_insn (gen_xorv4si3 (value, value, large));
12621 /* Convert an unsigned DImode value into a DFmode, using only SSE.
12622 Expects the 64-bit DImode to be supplied in a pair of integral
12623 registers. Requires SSE2; will use SSE3 if available. For x86_32,
12624 -mfpmath=sse, !optimize_size only. */
12627 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
12629 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
12630 rtx int_xmm, fp_xmm;
12631 rtx biases, exponents;
12634 int_xmm = gen_reg_rtx (V4SImode);
12635 if (TARGET_INTER_UNIT_MOVES)
12636 emit_insn (gen_movdi_to_sse (int_xmm, input));
12637 else if (TARGET_SSE_SPLIT_REGS)
12639 emit_clobber (int_xmm);
12640 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
12644 x = gen_reg_rtx (V2DImode);
12645 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
12646 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
12649 x = gen_rtx_CONST_VECTOR (V4SImode,
12650 gen_rtvec (4, GEN_INT (0x43300000UL),
12651 GEN_INT (0x45300000UL),
12652 const0_rtx, const0_rtx));
12653 exponents = validize_mem (force_const_mem (V4SImode, x));
12655 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
12656 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
12658 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
12659 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
12660 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
12661 (0x1.0p84 + double(fp_value_hi_xmm)).
12662 Note these exponents differ by 32. */
12664 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
12666 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
12667 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
12668 real_ldexp (&bias_lo_rvt, &dconst1, 52);
12669 real_ldexp (&bias_hi_rvt, &dconst1, 84);
12670 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
12671 x = const_double_from_real_value (bias_hi_rvt, DFmode);
12672 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
12673 biases = validize_mem (force_const_mem (V2DFmode, biases));
12674 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
12676 /* Add the upper and lower DFmode values together. */
12678 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
12681 x = copy_to_mode_reg (V2DFmode, fp_xmm);
12682 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
12683 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
12686 ix86_expand_vector_extract (false, target, fp_xmm, 0);
12689 /* Not used, but eases macroization of patterns. */
12691 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
12692 rtx input ATTRIBUTE_UNUSED)
12694 gcc_unreachable ();
12697 /* Convert an unsigned SImode value into a DFmode. Only currently used
12698 for SSE, but applicable anywhere. */
12701 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
12703 REAL_VALUE_TYPE TWO31r;
12706 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
12707 NULL, 1, OPTAB_DIRECT);
12709 fp = gen_reg_rtx (DFmode);
12710 emit_insn (gen_floatsidf2 (fp, x));
12712 real_ldexp (&TWO31r, &dconst1, 31);
12713 x = const_double_from_real_value (TWO31r, DFmode);
12715 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
12717 emit_move_insn (target, x);
12720 /* Convert a signed DImode value into a DFmode. Only used for SSE in
12721 32-bit mode; otherwise we have a direct convert instruction. */
12724 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
12726 REAL_VALUE_TYPE TWO32r;
12727 rtx fp_lo, fp_hi, x;
12729 fp_lo = gen_reg_rtx (DFmode);
12730 fp_hi = gen_reg_rtx (DFmode);
12732 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
12734 real_ldexp (&TWO32r, &dconst1, 32);
12735 x = const_double_from_real_value (TWO32r, DFmode);
12736 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
12738 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
12740 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
12743 emit_move_insn (target, x);
12746 /* Convert an unsigned SImode value into a SFmode, using only SSE.
12747 For x86_32, -mfpmath=sse, !optimize_size only. */
12749 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
12751 REAL_VALUE_TYPE ONE16r;
12752 rtx fp_hi, fp_lo, int_hi, int_lo, x;
12754 real_ldexp (&ONE16r, &dconst1, 16);
12755 x = const_double_from_real_value (ONE16r, SFmode);
12756 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
12757 NULL, 0, OPTAB_DIRECT);
12758 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
12759 NULL, 0, OPTAB_DIRECT);
12760 fp_hi = gen_reg_rtx (SFmode);
12761 fp_lo = gen_reg_rtx (SFmode);
12762 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
12763 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
12764 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
12766 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
12768 if (!rtx_equal_p (target, fp_hi))
12769 emit_move_insn (target, fp_hi);
12772 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
12773 then replicate the value for all elements of the vector
12777 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
12784 v = gen_rtvec (4, value, value, value, value);
12785 return gen_rtx_CONST_VECTOR (V4SImode, v);
12789 v = gen_rtvec (2, value, value);
12790 return gen_rtx_CONST_VECTOR (V2DImode, v);
12794 v = gen_rtvec (4, value, value, value, value);
12796 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
12797 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12798 return gen_rtx_CONST_VECTOR (V4SFmode, v);
12802 v = gen_rtvec (2, value, value);
12804 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
12805 return gen_rtx_CONST_VECTOR (V2DFmode, v);
12808 gcc_unreachable ();
12812 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
12813 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
12814 for an SSE register. If VECT is true, then replicate the mask for
12815 all elements of the vector register. If INVERT is true, then create
12816 a mask excluding the sign bit. */
12819 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
12821 enum machine_mode vec_mode, imode;
12822 HOST_WIDE_INT hi, lo;
12827 /* Find the sign bit, sign extended to 2*HWI. */
12833 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
12834 lo = 0x80000000, hi = lo < 0;
12840 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
12841 if (HOST_BITS_PER_WIDE_INT >= 64)
12842 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
12844 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12849 vec_mode = VOIDmode;
12850 if (HOST_BITS_PER_WIDE_INT >= 64)
12853 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
12860 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
12864 lo = ~lo, hi = ~hi;
12870 mask = immed_double_const (lo, hi, imode);
12872 vec = gen_rtvec (2, v, mask);
12873 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
12874 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
12881 gcc_unreachable ();
12885 lo = ~lo, hi = ~hi;
12887 /* Force this value into the low part of a fp vector constant. */
12888 mask = immed_double_const (lo, hi, imode);
12889 mask = gen_lowpart (mode, mask);
12891 if (vec_mode == VOIDmode)
12892 return force_reg (mode, mask);
12894 v = ix86_build_const_vector (mode, vect, mask);
12895 return force_reg (vec_mode, v);
12898 /* Generate code for floating point ABS or NEG. */
12901 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
12904 rtx mask, set, use, clob, dst, src;
12905 bool use_sse = false;
12906 bool vector_mode = VECTOR_MODE_P (mode);
12907 enum machine_mode elt_mode = mode;
12911 elt_mode = GET_MODE_INNER (mode);
12914 else if (mode == TFmode)
12916 else if (TARGET_SSE_MATH)
12917 use_sse = SSE_FLOAT_MODE_P (mode);
12919 /* NEG and ABS performed with SSE use bitwise mask operations.
12920 Create the appropriate mask now. */
12922 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
12931 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
12932 set = gen_rtx_SET (VOIDmode, dst, set);
12937 set = gen_rtx_fmt_e (code, mode, src);
12938 set = gen_rtx_SET (VOIDmode, dst, set);
12941 use = gen_rtx_USE (VOIDmode, mask);
12942 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12943 emit_insn (gen_rtx_PARALLEL (VOIDmode,
12944 gen_rtvec (3, set, use, clob)));
12951 /* Expand a copysign operation. Special case operand 0 being a constant. */
12954 ix86_expand_copysign (rtx operands[])
12956 enum machine_mode mode;
12957 rtx dest, op0, op1, mask, nmask;
12959 dest = operands[0];
12963 mode = GET_MODE (dest);
12965 if (GET_CODE (op0) == CONST_DOUBLE)
12967 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
12969 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
12970 op0 = simplify_unary_operation (ABS, mode, op0, mode);
12972 if (mode == SFmode || mode == DFmode)
12974 enum machine_mode vmode;
12976 vmode = mode == SFmode ? V4SFmode : V2DFmode;
12978 if (op0 == CONST0_RTX (mode))
12979 op0 = CONST0_RTX (vmode);
12984 if (mode == SFmode)
12985 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
12986 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
12988 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
12990 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
12993 else if (op0 != CONST0_RTX (mode))
12994 op0 = force_reg (mode, op0);
12996 mask = ix86_build_signbit_mask (mode, 0, 0);
12998 if (mode == SFmode)
12999 copysign_insn = gen_copysignsf3_const;
13000 else if (mode == DFmode)
13001 copysign_insn = gen_copysigndf3_const;
13003 copysign_insn = gen_copysigntf3_const;
13005 emit_insn (copysign_insn (dest, op0, op1, mask));
13009 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13011 nmask = ix86_build_signbit_mask (mode, 0, 1);
13012 mask = ix86_build_signbit_mask (mode, 0, 0);
13014 if (mode == SFmode)
13015 copysign_insn = gen_copysignsf3_var;
13016 else if (mode == DFmode)
13017 copysign_insn = gen_copysigndf3_var;
13019 copysign_insn = gen_copysigntf3_var;
13021 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13025 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13026 be a constant, and so has already been expanded into a vector constant. */
13029 ix86_split_copysign_const (rtx operands[])
13031 enum machine_mode mode, vmode;
13032 rtx dest, op0, op1, mask, x;
13034 dest = operands[0];
13037 mask = operands[3];
13039 mode = GET_MODE (dest);
13040 vmode = GET_MODE (mask);
13042 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13043 x = gen_rtx_AND (vmode, dest, mask);
13044 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13046 if (op0 != CONST0_RTX (vmode))
13048 x = gen_rtx_IOR (vmode, dest, op0);
13049 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13053 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13054 so we have to do two masks. */
13057 ix86_split_copysign_var (rtx operands[])
13059 enum machine_mode mode, vmode;
13060 rtx dest, scratch, op0, op1, mask, nmask, x;
13062 dest = operands[0];
13063 scratch = operands[1];
13066 nmask = operands[4];
13067 mask = operands[5];
13069 mode = GET_MODE (dest);
13070 vmode = GET_MODE (mask);
13072 if (rtx_equal_p (op0, op1))
13074 /* Shouldn't happen often (it's useless, obviously), but when it does
13075 we'd generate incorrect code if we continue below. */
13076 emit_move_insn (dest, op0);
13080 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13082 gcc_assert (REGNO (op1) == REGNO (scratch));
13084 x = gen_rtx_AND (vmode, scratch, mask);
13085 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13088 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13089 x = gen_rtx_NOT (vmode, dest);
13090 x = gen_rtx_AND (vmode, x, op0);
13091 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13095 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13097 x = gen_rtx_AND (vmode, scratch, mask);
13099 else /* alternative 2,4 */
13101 gcc_assert (REGNO (mask) == REGNO (scratch));
13102 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13103 x = gen_rtx_AND (vmode, scratch, op1);
13105 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13107 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13109 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13110 x = gen_rtx_AND (vmode, dest, nmask);
13112 else /* alternative 3,4 */
13114 gcc_assert (REGNO (nmask) == REGNO (dest));
13116 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13117 x = gen_rtx_AND (vmode, dest, op0);
13119 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13122 x = gen_rtx_IOR (vmode, dest, scratch);
13123 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13126 /* Return TRUE or FALSE depending on whether the first SET in INSN
13127 has source and destination with matching CC modes, and that the
13128 CC mode is at least as constrained as REQ_MODE. */
13131 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13134 enum machine_mode set_mode;
13136 set = PATTERN (insn);
13137 if (GET_CODE (set) == PARALLEL)
13138 set = XVECEXP (set, 0, 0);
13139 gcc_assert (GET_CODE (set) == SET);
13140 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13142 set_mode = GET_MODE (SET_DEST (set));
13146 if (req_mode != CCNOmode
13147 && (req_mode != CCmode
13148 || XEXP (SET_SRC (set), 1) != const0_rtx))
13152 if (req_mode == CCGCmode)
13156 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13160 if (req_mode == CCZmode)
13171 gcc_unreachable ();
13174 return (GET_MODE (SET_SRC (set)) == set_mode);
13177 /* Generate insn patterns to do an integer compare of OPERANDS. */
13180 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13182 enum machine_mode cmpmode;
13185 cmpmode = SELECT_CC_MODE (code, op0, op1);
13186 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13188 /* This is very simple, but making the interface the same as in the
13189 FP case makes the rest of the code easier. */
13190 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13191 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13193 /* Return the test that should be put into the flags user, i.e.
13194 the bcc, scc, or cmov instruction. */
13195 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13198 /* Figure out whether to use ordered or unordered fp comparisons.
13199 Return the appropriate mode to use. */
13202 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13204 /* ??? In order to make all comparisons reversible, we do all comparisons
13205 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13206 all forms trapping and nontrapping comparisons, we can make inequality
13207 comparisons trapping again, since it results in better code when using
13208 FCOM based compares. */
13209 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13213 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13215 enum machine_mode mode = GET_MODE (op0);
13217 if (SCALAR_FLOAT_MODE_P (mode))
13219 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13220 return ix86_fp_compare_mode (code);
13225 /* Only zero flag is needed. */
13226 case EQ: /* ZF=0 */
13227 case NE: /* ZF!=0 */
13229 /* Codes needing carry flag. */
13230 case GEU: /* CF=0 */
13231 case LTU: /* CF=1 */
13232 /* Detect overflow checks. They need just the carry flag. */
13233 if (GET_CODE (op0) == PLUS
13234 && rtx_equal_p (op1, XEXP (op0, 0)))
13238 case GTU: /* CF=0 & ZF=0 */
13239 case LEU: /* CF=1 | ZF=1 */
13240 /* Detect overflow checks. They need just the carry flag. */
13241 if (GET_CODE (op0) == MINUS
13242 && rtx_equal_p (op1, XEXP (op0, 0)))
13246 /* Codes possibly doable only with sign flag when
13247 comparing against zero. */
13248 case GE: /* SF=OF or SF=0 */
13249 case LT: /* SF<>OF or SF=1 */
13250 if (op1 == const0_rtx)
13253 /* For other cases Carry flag is not required. */
13255 /* Codes doable only with sign flag when comparing
13256 against zero, but we miss jump instruction for it
13257 so we need to use relational tests against overflow
13258 that thus needs to be zero. */
13259 case GT: /* ZF=0 & SF=OF */
13260 case LE: /* ZF=1 | SF<>OF */
13261 if (op1 == const0_rtx)
13265 /* strcmp pattern do (use flags) and combine may ask us for proper
13270 gcc_unreachable ();
13274 /* Return the fixed registers used for condition codes. */
13277 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13284 /* If two condition code modes are compatible, return a condition code
13285 mode which is compatible with both. Otherwise, return
13288 static enum machine_mode
13289 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13294 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13297 if ((m1 == CCGCmode && m2 == CCGOCmode)
13298 || (m1 == CCGOCmode && m2 == CCGCmode))
13304 gcc_unreachable ();
13334 /* These are only compatible with themselves, which we already
13340 /* Split comparison code CODE into comparisons we can do using branch
13341 instructions. BYPASS_CODE is comparison code for branch that will
13342 branch around FIRST_CODE and SECOND_CODE. If some of branches
13343 is not required, set value to UNKNOWN.
13344 We never require more than two branches. */
13347 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13348 enum rtx_code *first_code,
13349 enum rtx_code *second_code)
13351 *first_code = code;
13352 *bypass_code = UNKNOWN;
13353 *second_code = UNKNOWN;
13355 /* The fcomi comparison sets flags as follows:
13365 case GT: /* GTU - CF=0 & ZF=0 */
13366 case GE: /* GEU - CF=0 */
13367 case ORDERED: /* PF=0 */
13368 case UNORDERED: /* PF=1 */
13369 case UNEQ: /* EQ - ZF=1 */
13370 case UNLT: /* LTU - CF=1 */
13371 case UNLE: /* LEU - CF=1 | ZF=1 */
13372 case LTGT: /* EQ - ZF=0 */
13374 case LT: /* LTU - CF=1 - fails on unordered */
13375 *first_code = UNLT;
13376 *bypass_code = UNORDERED;
13378 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13379 *first_code = UNLE;
13380 *bypass_code = UNORDERED;
13382 case EQ: /* EQ - ZF=1 - fails on unordered */
13383 *first_code = UNEQ;
13384 *bypass_code = UNORDERED;
13386 case NE: /* NE - ZF=0 - fails on unordered */
13387 *first_code = LTGT;
13388 *second_code = UNORDERED;
13390 case UNGE: /* GEU - CF=0 - fails on unordered */
13392 *second_code = UNORDERED;
13394 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13396 *second_code = UNORDERED;
13399 gcc_unreachable ();
13401 if (!TARGET_IEEE_FP)
13403 *second_code = UNKNOWN;
13404 *bypass_code = UNKNOWN;
13408 /* Return cost of comparison done fcom + arithmetics operations on AX.
13409 All following functions do use number of instructions as a cost metrics.
13410 In future this should be tweaked to compute bytes for optimize_size and
13411 take into account performance of various instructions on various CPUs. */
13413 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13415 if (!TARGET_IEEE_FP)
13417 /* The cost of code output by ix86_expand_fp_compare. */
13441 gcc_unreachable ();
13445 /* Return cost of comparison done using fcomi operation.
13446 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13448 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13450 enum rtx_code bypass_code, first_code, second_code;
13451 /* Return arbitrarily high cost when instruction is not supported - this
13452 prevents gcc from using it. */
13455 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13456 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
13459 /* Return cost of comparison done using sahf operation.
13460 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13462 ix86_fp_comparison_sahf_cost (enum rtx_code code)
13464 enum rtx_code bypass_code, first_code, second_code;
13465 /* Return arbitrarily high cost when instruction is not preferred - this
13466 avoids gcc from using it. */
13467 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
13469 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13470 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
13473 /* Compute cost of the comparison done using any method.
13474 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13476 ix86_fp_comparison_cost (enum rtx_code code)
13478 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
13481 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
13482 sahf_cost = ix86_fp_comparison_sahf_cost (code);
13484 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
13485 if (min > sahf_cost)
13487 if (min > fcomi_cost)
13492 /* Return true if we should use an FCOMI instruction for this
13496 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
13498 enum rtx_code swapped_code = swap_condition (code);
13500 return ((ix86_fp_comparison_cost (code)
13501 == ix86_fp_comparison_fcomi_cost (code))
13502 || (ix86_fp_comparison_cost (swapped_code)
13503 == ix86_fp_comparison_fcomi_cost (swapped_code)));
13506 /* Swap, force into registers, or otherwise massage the two operands
13507 to a fp comparison. The operands are updated in place; the new
13508 comparison code is returned. */
13510 static enum rtx_code
13511 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
13513 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
13514 rtx op0 = *pop0, op1 = *pop1;
13515 enum machine_mode op_mode = GET_MODE (op0);
13516 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
13518 /* All of the unordered compare instructions only work on registers.
13519 The same is true of the fcomi compare instructions. The XFmode
13520 compare instructions require registers except when comparing
13521 against zero or when converting operand 1 from fixed point to
13525 && (fpcmp_mode == CCFPUmode
13526 || (op_mode == XFmode
13527 && ! (standard_80387_constant_p (op0) == 1
13528 || standard_80387_constant_p (op1) == 1)
13529 && GET_CODE (op1) != FLOAT)
13530 || ix86_use_fcomi_compare (code)))
13532 op0 = force_reg (op_mode, op0);
13533 op1 = force_reg (op_mode, op1);
13537 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
13538 things around if they appear profitable, otherwise force op0
13539 into a register. */
13541 if (standard_80387_constant_p (op0) == 0
13543 && ! (standard_80387_constant_p (op1) == 0
13547 tmp = op0, op0 = op1, op1 = tmp;
13548 code = swap_condition (code);
13552 op0 = force_reg (op_mode, op0);
13554 if (CONSTANT_P (op1))
13556 int tmp = standard_80387_constant_p (op1);
13558 op1 = validize_mem (force_const_mem (op_mode, op1));
13562 op1 = force_reg (op_mode, op1);
13565 op1 = force_reg (op_mode, op1);
13569 /* Try to rearrange the comparison to make it cheaper. */
13570 if (ix86_fp_comparison_cost (code)
13571 > ix86_fp_comparison_cost (swap_condition (code))
13572 && (REG_P (op1) || can_create_pseudo_p ()))
13575 tmp = op0, op0 = op1, op1 = tmp;
13576 code = swap_condition (code);
13578 op0 = force_reg (op_mode, op0);
13586 /* Convert comparison codes we use to represent FP comparison to integer
13587 code that will result in proper branch. Return UNKNOWN if no such code
13591 ix86_fp_compare_code_to_integer (enum rtx_code code)
13620 /* Generate insn patterns to do a floating point compare of OPERANDS. */
13623 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
13624 rtx *second_test, rtx *bypass_test)
13626 enum machine_mode fpcmp_mode, intcmp_mode;
13628 int cost = ix86_fp_comparison_cost (code);
13629 enum rtx_code bypass_code, first_code, second_code;
13631 fpcmp_mode = ix86_fp_compare_mode (code);
13632 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
13635 *second_test = NULL_RTX;
13637 *bypass_test = NULL_RTX;
13639 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13641 /* Do fcomi/sahf based test when profitable. */
13642 if (ix86_fp_comparison_arithmetics_cost (code) > cost
13643 && (bypass_code == UNKNOWN || bypass_test)
13644 && (second_code == UNKNOWN || second_test))
13646 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13647 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
13653 gcc_assert (TARGET_SAHF);
13656 scratch = gen_reg_rtx (HImode);
13657 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
13659 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
13662 /* The FP codes work out to act like unsigned. */
13663 intcmp_mode = fpcmp_mode;
13665 if (bypass_code != UNKNOWN)
13666 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
13667 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13669 if (second_code != UNKNOWN)
13670 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
13671 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13676 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
13677 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
13678 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
13680 scratch = gen_reg_rtx (HImode);
13681 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
13683 /* In the unordered case, we have to check C2 for NaN's, which
13684 doesn't happen to work out to anything nice combination-wise.
13685 So do some bit twiddling on the value we've got in AH to come
13686 up with an appropriate set of condition codes. */
13688 intcmp_mode = CCNOmode;
13693 if (code == GT || !TARGET_IEEE_FP)
13695 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13700 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13701 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13702 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
13703 intcmp_mode = CCmode;
13709 if (code == LT && TARGET_IEEE_FP)
13711 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13712 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
13713 intcmp_mode = CCmode;
13718 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
13724 if (code == GE || !TARGET_IEEE_FP)
13726 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
13731 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13732 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13739 if (code == LE && TARGET_IEEE_FP)
13741 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13742 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
13743 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13744 intcmp_mode = CCmode;
13749 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
13755 if (code == EQ && TARGET_IEEE_FP)
13757 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13758 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
13759 intcmp_mode = CCmode;
13764 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13771 if (code == NE && TARGET_IEEE_FP)
13773 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
13774 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
13780 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
13786 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13790 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
13795 gcc_unreachable ();
13799 /* Return the test that should be put into the flags user, i.e.
13800 the bcc, scc, or cmov instruction. */
13801 return gen_rtx_fmt_ee (code, VOIDmode,
13802 gen_rtx_REG (intcmp_mode, FLAGS_REG),
13807 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
13810 op0 = ix86_compare_op0;
13811 op1 = ix86_compare_op1;
13814 *second_test = NULL_RTX;
13816 *bypass_test = NULL_RTX;
13818 if (ix86_compare_emitted)
13820 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
13821 ix86_compare_emitted = NULL_RTX;
13823 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
13825 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
13826 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
13827 second_test, bypass_test);
13830 ret = ix86_expand_int_compare (code, op0, op1);
13835 /* Return true if the CODE will result in nontrivial jump sequence. */
13837 ix86_fp_jump_nontrivial_p (enum rtx_code code)
13839 enum rtx_code bypass_code, first_code, second_code;
13842 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13843 return bypass_code != UNKNOWN || second_code != UNKNOWN;
13847 ix86_expand_branch (enum rtx_code code, rtx label)
13851 /* If we have emitted a compare insn, go straight to simple.
13852 ix86_expand_compare won't emit anything if ix86_compare_emitted
13854 if (ix86_compare_emitted)
13857 switch (GET_MODE (ix86_compare_op0))
13863 tmp = ix86_expand_compare (code, NULL, NULL);
13864 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13865 gen_rtx_LABEL_REF (VOIDmode, label),
13867 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
13876 enum rtx_code bypass_code, first_code, second_code;
13878 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
13879 &ix86_compare_op1);
13881 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
13883 /* Check whether we will use the natural sequence with one jump. If
13884 so, we can expand jump early. Otherwise delay expansion by
13885 creating compound insn to not confuse optimizers. */
13886 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
13888 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
13889 gen_rtx_LABEL_REF (VOIDmode, label),
13890 pc_rtx, NULL_RTX, NULL_RTX);
13894 tmp = gen_rtx_fmt_ee (code, VOIDmode,
13895 ix86_compare_op0, ix86_compare_op1);
13896 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
13897 gen_rtx_LABEL_REF (VOIDmode, label),
13899 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
13901 use_fcomi = ix86_use_fcomi_compare (code);
13902 vec = rtvec_alloc (3 + !use_fcomi);
13903 RTVEC_ELT (vec, 0) = tmp;
13905 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
13907 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
13910 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
13912 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
13921 /* Expand DImode branch into multiple compare+branch. */
13923 rtx lo[2], hi[2], label2;
13924 enum rtx_code code1, code2, code3;
13925 enum machine_mode submode;
13927 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
13929 tmp = ix86_compare_op0;
13930 ix86_compare_op0 = ix86_compare_op1;
13931 ix86_compare_op1 = tmp;
13932 code = swap_condition (code);
13934 if (GET_MODE (ix86_compare_op0) == DImode)
13936 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
13937 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
13942 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
13943 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
13947 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
13948 avoid two branches. This costs one extra insn, so disable when
13949 optimizing for size. */
13951 if ((code == EQ || code == NE)
13952 && (!optimize_insn_for_size_p ()
13953 || hi[1] == const0_rtx || lo[1] == const0_rtx))
13958 if (hi[1] != const0_rtx)
13959 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
13960 NULL_RTX, 0, OPTAB_WIDEN);
13963 if (lo[1] != const0_rtx)
13964 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
13965 NULL_RTX, 0, OPTAB_WIDEN);
13967 tmp = expand_binop (submode, ior_optab, xor1, xor0,
13968 NULL_RTX, 0, OPTAB_WIDEN);
13970 ix86_compare_op0 = tmp;
13971 ix86_compare_op1 = const0_rtx;
13972 ix86_expand_branch (code, label);
13976 /* Otherwise, if we are doing less-than or greater-or-equal-than,
13977 op1 is a constant and the low word is zero, then we can just
13978 examine the high word. Similarly for low word -1 and
13979 less-or-equal-than or greater-than. */
13981 if (CONST_INT_P (hi[1]))
13984 case LT: case LTU: case GE: case GEU:
13985 if (lo[1] == const0_rtx)
13987 ix86_compare_op0 = hi[0];
13988 ix86_compare_op1 = hi[1];
13989 ix86_expand_branch (code, label);
13993 case LE: case LEU: case GT: case GTU:
13994 if (lo[1] == constm1_rtx)
13996 ix86_compare_op0 = hi[0];
13997 ix86_compare_op1 = hi[1];
13998 ix86_expand_branch (code, label);
14006 /* Otherwise, we need two or three jumps. */
14008 label2 = gen_label_rtx ();
14011 code2 = swap_condition (code);
14012 code3 = unsigned_condition (code);
14016 case LT: case GT: case LTU: case GTU:
14019 case LE: code1 = LT; code2 = GT; break;
14020 case GE: code1 = GT; code2 = LT; break;
14021 case LEU: code1 = LTU; code2 = GTU; break;
14022 case GEU: code1 = GTU; code2 = LTU; break;
14024 case EQ: code1 = UNKNOWN; code2 = NE; break;
14025 case NE: code2 = UNKNOWN; break;
14028 gcc_unreachable ();
14033 * if (hi(a) < hi(b)) goto true;
14034 * if (hi(a) > hi(b)) goto false;
14035 * if (lo(a) < lo(b)) goto true;
14039 ix86_compare_op0 = hi[0];
14040 ix86_compare_op1 = hi[1];
14042 if (code1 != UNKNOWN)
14043 ix86_expand_branch (code1, label);
14044 if (code2 != UNKNOWN)
14045 ix86_expand_branch (code2, label2);
14047 ix86_compare_op0 = lo[0];
14048 ix86_compare_op1 = lo[1];
14049 ix86_expand_branch (code3, label);
14051 if (code2 != UNKNOWN)
14052 emit_label (label2);
14057 gcc_unreachable ();
14061 /* Split branch based on floating point condition. */
14063 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14064 rtx target1, rtx target2, rtx tmp, rtx pushed)
14066 rtx second, bypass;
14067 rtx label = NULL_RTX;
14069 int bypass_probability = -1, second_probability = -1, probability = -1;
14072 if (target2 != pc_rtx)
14075 code = reverse_condition_maybe_unordered (code);
14080 condition = ix86_expand_fp_compare (code, op1, op2,
14081 tmp, &second, &bypass);
14083 /* Remove pushed operand from stack. */
14085 ix86_free_from_memory (GET_MODE (pushed));
14087 if (split_branch_probability >= 0)
14089 /* Distribute the probabilities across the jumps.
14090 Assume the BYPASS and SECOND to be always test
14092 probability = split_branch_probability;
14094 /* Value of 1 is low enough to make no need for probability
14095 to be updated. Later we may run some experiments and see
14096 if unordered values are more frequent in practice. */
14098 bypass_probability = 1;
14100 second_probability = 1;
14102 if (bypass != NULL_RTX)
14104 label = gen_label_rtx ();
14105 i = emit_jump_insn (gen_rtx_SET
14107 gen_rtx_IF_THEN_ELSE (VOIDmode,
14109 gen_rtx_LABEL_REF (VOIDmode,
14112 if (bypass_probability >= 0)
14114 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14115 GEN_INT (bypass_probability),
14118 i = emit_jump_insn (gen_rtx_SET
14120 gen_rtx_IF_THEN_ELSE (VOIDmode,
14121 condition, target1, target2)));
14122 if (probability >= 0)
14124 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14125 GEN_INT (probability),
14127 if (second != NULL_RTX)
14129 i = emit_jump_insn (gen_rtx_SET
14131 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14133 if (second_probability >= 0)
14135 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14136 GEN_INT (second_probability),
14139 if (label != NULL_RTX)
14140 emit_label (label);
14144 ix86_expand_setcc (enum rtx_code code, rtx dest)
14146 rtx ret, tmp, tmpreg, equiv;
14147 rtx second_test, bypass_test;
14149 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14150 return 0; /* FAIL */
14152 gcc_assert (GET_MODE (dest) == QImode);
14154 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14155 PUT_MODE (ret, QImode);
14160 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14161 if (bypass_test || second_test)
14163 rtx test = second_test;
14165 rtx tmp2 = gen_reg_rtx (QImode);
14168 gcc_assert (!second_test);
14169 test = bypass_test;
14171 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14173 PUT_MODE (test, QImode);
14174 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14177 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14179 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14182 /* Attach a REG_EQUAL note describing the comparison result. */
14183 if (ix86_compare_op0 && ix86_compare_op1)
14185 equiv = simplify_gen_relational (code, QImode,
14186 GET_MODE (ix86_compare_op0),
14187 ix86_compare_op0, ix86_compare_op1);
14188 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14191 return 1; /* DONE */
14194 /* Expand comparison setting or clearing carry flag. Return true when
14195 successful and set pop for the operation. */
14197 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14199 enum machine_mode mode =
14200 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14202 /* Do not handle DImode compares that go through special path. */
14203 if (mode == (TARGET_64BIT ? TImode : DImode))
14206 if (SCALAR_FLOAT_MODE_P (mode))
14208 rtx second_test = NULL, bypass_test = NULL;
14209 rtx compare_op, compare_seq;
14211 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14213 /* Shortcut: following common codes never translate
14214 into carry flag compares. */
14215 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14216 || code == ORDERED || code == UNORDERED)
14219 /* These comparisons require zero flag; swap operands so they won't. */
14220 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14221 && !TARGET_IEEE_FP)
14226 code = swap_condition (code);
14229 /* Try to expand the comparison and verify that we end up with
14230 carry flag based comparison. This fails to be true only when
14231 we decide to expand comparison using arithmetic that is not
14232 too common scenario. */
14234 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14235 &second_test, &bypass_test);
14236 compare_seq = get_insns ();
14239 if (second_test || bypass_test)
14242 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14243 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14244 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14246 code = GET_CODE (compare_op);
14248 if (code != LTU && code != GEU)
14251 emit_insn (compare_seq);
14256 if (!INTEGRAL_MODE_P (mode))
14265 /* Convert a==0 into (unsigned)a<1. */
14268 if (op1 != const0_rtx)
14271 code = (code == EQ ? LTU : GEU);
14274 /* Convert a>b into b<a or a>=b-1. */
14277 if (CONST_INT_P (op1))
14279 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14280 /* Bail out on overflow. We still can swap operands but that
14281 would force loading of the constant into register. */
14282 if (op1 == const0_rtx
14283 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14285 code = (code == GTU ? GEU : LTU);
14292 code = (code == GTU ? LTU : GEU);
14296 /* Convert a>=0 into (unsigned)a<0x80000000. */
14299 if (mode == DImode || op1 != const0_rtx)
14301 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14302 code = (code == LT ? GEU : LTU);
14306 if (mode == DImode || op1 != constm1_rtx)
14308 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14309 code = (code == LE ? GEU : LTU);
14315 /* Swapping operands may cause constant to appear as first operand. */
14316 if (!nonimmediate_operand (op0, VOIDmode))
14318 if (!can_create_pseudo_p ())
14320 op0 = force_reg (mode, op0);
14322 ix86_compare_op0 = op0;
14323 ix86_compare_op1 = op1;
14324 *pop = ix86_expand_compare (code, NULL, NULL);
14325 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14330 ix86_expand_int_movcc (rtx operands[])
14332 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14333 rtx compare_seq, compare_op;
14334 rtx second_test, bypass_test;
14335 enum machine_mode mode = GET_MODE (operands[0]);
14336 bool sign_bit_compare_p = false;;
14339 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14340 compare_seq = get_insns ();
14343 compare_code = GET_CODE (compare_op);
14345 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14346 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14347 sign_bit_compare_p = true;
14349 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14350 HImode insns, we'd be swallowed in word prefix ops. */
14352 if ((mode != HImode || TARGET_FAST_PREFIX)
14353 && (mode != (TARGET_64BIT ? TImode : DImode))
14354 && CONST_INT_P (operands[2])
14355 && CONST_INT_P (operands[3]))
14357 rtx out = operands[0];
14358 HOST_WIDE_INT ct = INTVAL (operands[2]);
14359 HOST_WIDE_INT cf = INTVAL (operands[3]);
14360 HOST_WIDE_INT diff;
14363 /* Sign bit compares are better done using shifts than we do by using
14365 if (sign_bit_compare_p
14366 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14367 ix86_compare_op1, &compare_op))
14369 /* Detect overlap between destination and compare sources. */
14372 if (!sign_bit_compare_p)
14374 bool fpcmp = false;
14376 compare_code = GET_CODE (compare_op);
14378 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14379 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14382 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14385 /* To simplify rest of code, restrict to the GEU case. */
14386 if (compare_code == LTU)
14388 HOST_WIDE_INT tmp = ct;
14391 compare_code = reverse_condition (compare_code);
14392 code = reverse_condition (code);
14397 PUT_CODE (compare_op,
14398 reverse_condition_maybe_unordered
14399 (GET_CODE (compare_op)));
14401 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14405 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14406 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14407 tmp = gen_reg_rtx (mode);
14409 if (mode == DImode)
14410 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14412 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14416 if (code == GT || code == GE)
14417 code = reverse_condition (code);
14420 HOST_WIDE_INT tmp = ct;
14425 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14426 ix86_compare_op1, VOIDmode, 0, -1);
14439 tmp = expand_simple_binop (mode, PLUS,
14441 copy_rtx (tmp), 1, OPTAB_DIRECT);
14452 tmp = expand_simple_binop (mode, IOR,
14454 copy_rtx (tmp), 1, OPTAB_DIRECT);
14456 else if (diff == -1 && ct)
14466 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14468 tmp = expand_simple_binop (mode, PLUS,
14469 copy_rtx (tmp), GEN_INT (cf),
14470 copy_rtx (tmp), 1, OPTAB_DIRECT);
14478 * andl cf - ct, dest
14488 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
14491 tmp = expand_simple_binop (mode, AND,
14493 gen_int_mode (cf - ct, mode),
14494 copy_rtx (tmp), 1, OPTAB_DIRECT);
14496 tmp = expand_simple_binop (mode, PLUS,
14497 copy_rtx (tmp), GEN_INT (ct),
14498 copy_rtx (tmp), 1, OPTAB_DIRECT);
14501 if (!rtx_equal_p (tmp, out))
14502 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
14504 return 1; /* DONE */
14509 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14512 tmp = ct, ct = cf, cf = tmp;
14515 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14517 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14519 /* We may be reversing unordered compare to normal compare, that
14520 is not valid in general (we may convert non-trapping condition
14521 to trapping one), however on i386 we currently emit all
14522 comparisons unordered. */
14523 compare_code = reverse_condition_maybe_unordered (compare_code);
14524 code = reverse_condition_maybe_unordered (code);
14528 compare_code = reverse_condition (compare_code);
14529 code = reverse_condition (code);
14533 compare_code = UNKNOWN;
14534 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
14535 && CONST_INT_P (ix86_compare_op1))
14537 if (ix86_compare_op1 == const0_rtx
14538 && (code == LT || code == GE))
14539 compare_code = code;
14540 else if (ix86_compare_op1 == constm1_rtx)
14544 else if (code == GT)
14549 /* Optimize dest = (op0 < 0) ? -1 : cf. */
14550 if (compare_code != UNKNOWN
14551 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
14552 && (cf == -1 || ct == -1))
14554 /* If lea code below could be used, only optimize
14555 if it results in a 2 insn sequence. */
14557 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
14558 || diff == 3 || diff == 5 || diff == 9)
14559 || (compare_code == LT && ct == -1)
14560 || (compare_code == GE && cf == -1))
14563 * notl op1 (if necessary)
14571 code = reverse_condition (code);
14574 out = emit_store_flag (out, code, ix86_compare_op0,
14575 ix86_compare_op1, VOIDmode, 0, -1);
14577 out = expand_simple_binop (mode, IOR,
14579 out, 1, OPTAB_DIRECT);
14580 if (out != operands[0])
14581 emit_move_insn (operands[0], out);
14583 return 1; /* DONE */
14588 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
14589 || diff == 3 || diff == 5 || diff == 9)
14590 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
14592 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
14598 * lea cf(dest*(ct-cf)),dest
14602 * This also catches the degenerate setcc-only case.
14608 out = emit_store_flag (out, code, ix86_compare_op0,
14609 ix86_compare_op1, VOIDmode, 0, 1);
14612 /* On x86_64 the lea instruction operates on Pmode, so we need
14613 to get arithmetics done in proper mode to match. */
14615 tmp = copy_rtx (out);
14619 out1 = copy_rtx (out);
14620 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
14624 tmp = gen_rtx_PLUS (mode, tmp, out1);
14630 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
14633 if (!rtx_equal_p (tmp, out))
14636 out = force_operand (tmp, copy_rtx (out));
14638 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
14640 if (!rtx_equal_p (out, operands[0]))
14641 emit_move_insn (operands[0], copy_rtx (out));
14643 return 1; /* DONE */
14647 * General case: Jumpful:
14648 * xorl dest,dest cmpl op1, op2
14649 * cmpl op1, op2 movl ct, dest
14650 * setcc dest jcc 1f
14651 * decl dest movl cf, dest
14652 * andl (cf-ct),dest 1:
14655 * Size 20. Size 14.
14657 * This is reasonably steep, but branch mispredict costs are
14658 * high on modern cpus, so consider failing only if optimizing
14662 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14663 && BRANCH_COST (optimize_insn_for_speed_p (),
14668 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
14673 if (SCALAR_FLOAT_MODE_P (cmp_mode))
14675 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
14677 /* We may be reversing unordered compare to normal compare,
14678 that is not valid in general (we may convert non-trapping
14679 condition to trapping one), however on i386 we currently
14680 emit all comparisons unordered. */
14681 code = reverse_condition_maybe_unordered (code);
14685 code = reverse_condition (code);
14686 if (compare_code != UNKNOWN)
14687 compare_code = reverse_condition (compare_code);
14691 if (compare_code != UNKNOWN)
14693 /* notl op1 (if needed)
14698 For x < 0 (resp. x <= -1) there will be no notl,
14699 so if possible swap the constants to get rid of the
14701 True/false will be -1/0 while code below (store flag
14702 followed by decrement) is 0/-1, so the constants need
14703 to be exchanged once more. */
14705 if (compare_code == GE || !cf)
14707 code = reverse_condition (code);
14712 HOST_WIDE_INT tmp = cf;
14717 out = emit_store_flag (out, code, ix86_compare_op0,
14718 ix86_compare_op1, VOIDmode, 0, -1);
14722 out = emit_store_flag (out, code, ix86_compare_op0,
14723 ix86_compare_op1, VOIDmode, 0, 1);
14725 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
14726 copy_rtx (out), 1, OPTAB_DIRECT);
14729 out = expand_simple_binop (mode, AND, copy_rtx (out),
14730 gen_int_mode (cf - ct, mode),
14731 copy_rtx (out), 1, OPTAB_DIRECT);
14733 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
14734 copy_rtx (out), 1, OPTAB_DIRECT);
14735 if (!rtx_equal_p (out, operands[0]))
14736 emit_move_insn (operands[0], copy_rtx (out));
14738 return 1; /* DONE */
14742 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
14744 /* Try a few things more with specific constants and a variable. */
14747 rtx var, orig_out, out, tmp;
14749 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
14750 return 0; /* FAIL */
14752 /* If one of the two operands is an interesting constant, load a
14753 constant with the above and mask it in with a logical operation. */
14755 if (CONST_INT_P (operands[2]))
14758 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
14759 operands[3] = constm1_rtx, op = and_optab;
14760 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
14761 operands[3] = const0_rtx, op = ior_optab;
14763 return 0; /* FAIL */
14765 else if (CONST_INT_P (operands[3]))
14768 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
14769 operands[2] = constm1_rtx, op = and_optab;
14770 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
14771 operands[2] = const0_rtx, op = ior_optab;
14773 return 0; /* FAIL */
14776 return 0; /* FAIL */
14778 orig_out = operands[0];
14779 tmp = gen_reg_rtx (mode);
14782 /* Recurse to get the constant loaded. */
14783 if (ix86_expand_int_movcc (operands) == 0)
14784 return 0; /* FAIL */
14786 /* Mask in the interesting variable. */
14787 out = expand_binop (mode, op, var, tmp, orig_out, 0,
14789 if (!rtx_equal_p (out, orig_out))
14790 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
14792 return 1; /* DONE */
14796 * For comparison with above,
14806 if (! nonimmediate_operand (operands[2], mode))
14807 operands[2] = force_reg (mode, operands[2]);
14808 if (! nonimmediate_operand (operands[3], mode))
14809 operands[3] = force_reg (mode, operands[3]);
14811 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
14813 rtx tmp = gen_reg_rtx (mode);
14814 emit_move_insn (tmp, operands[3]);
14817 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
14819 rtx tmp = gen_reg_rtx (mode);
14820 emit_move_insn (tmp, operands[2]);
14824 if (! register_operand (operands[2], VOIDmode)
14826 || ! register_operand (operands[3], VOIDmode)))
14827 operands[2] = force_reg (mode, operands[2]);
14830 && ! register_operand (operands[3], VOIDmode))
14831 operands[3] = force_reg (mode, operands[3]);
14833 emit_insn (compare_seq);
14834 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
14835 gen_rtx_IF_THEN_ELSE (mode,
14836 compare_op, operands[2],
14839 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14840 gen_rtx_IF_THEN_ELSE (mode,
14842 copy_rtx (operands[3]),
14843 copy_rtx (operands[0]))));
14845 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
14846 gen_rtx_IF_THEN_ELSE (mode,
14848 copy_rtx (operands[2]),
14849 copy_rtx (operands[0]))));
14851 return 1; /* DONE */
14854 /* Swap, force into registers, or otherwise massage the two operands
14855 to an sse comparison with a mask result. Thus we differ a bit from
14856 ix86_prepare_fp_compare_args which expects to produce a flags result.
14858 The DEST operand exists to help determine whether to commute commutative
14859 operators. The POP0/POP1 operands are updated in place. The new
14860 comparison code is returned, or UNKNOWN if not implementable. */
14862 static enum rtx_code
14863 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
14864 rtx *pop0, rtx *pop1)
14872 /* We have no LTGT as an operator. We could implement it with
14873 NE & ORDERED, but this requires an extra temporary. It's
14874 not clear that it's worth it. */
14881 /* These are supported directly. */
14888 /* For commutative operators, try to canonicalize the destination
14889 operand to be first in the comparison - this helps reload to
14890 avoid extra moves. */
14891 if (!dest || !rtx_equal_p (dest, *pop1))
14899 /* These are not supported directly. Swap the comparison operands
14900 to transform into something that is supported. */
14904 code = swap_condition (code);
14908 gcc_unreachable ();
14914 /* Detect conditional moves that exactly match min/max operational
14915 semantics. Note that this is IEEE safe, as long as we don't
14916 interchange the operands.
14918 Returns FALSE if this conditional move doesn't match a MIN/MAX,
14919 and TRUE if the operation is successful and instructions are emitted. */
14922 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
14923 rtx cmp_op1, rtx if_true, rtx if_false)
14925 enum machine_mode mode;
14931 else if (code == UNGE)
14934 if_true = if_false;
14940 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
14942 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
14947 mode = GET_MODE (dest);
14949 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
14950 but MODE may be a vector mode and thus not appropriate. */
14951 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
14953 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
14956 if_true = force_reg (mode, if_true);
14957 v = gen_rtvec (2, if_true, if_false);
14958 tmp = gen_rtx_UNSPEC (mode, v, u);
14962 code = is_min ? SMIN : SMAX;
14963 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
14966 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
14970 /* Expand an sse vector comparison. Return the register with the result. */
14973 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
14974 rtx op_true, rtx op_false)
14976 enum machine_mode mode = GET_MODE (dest);
14979 cmp_op0 = force_reg (mode, cmp_op0);
14980 if (!nonimmediate_operand (cmp_op1, mode))
14981 cmp_op1 = force_reg (mode, cmp_op1);
14984 || reg_overlap_mentioned_p (dest, op_true)
14985 || reg_overlap_mentioned_p (dest, op_false))
14986 dest = gen_reg_rtx (mode);
14988 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
14989 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14994 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
14995 operations. This is used for both scalar and vector conditional moves. */
14998 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15000 enum machine_mode mode = GET_MODE (dest);
15003 if (op_false == CONST0_RTX (mode))
15005 op_true = force_reg (mode, op_true);
15006 x = gen_rtx_AND (mode, cmp, op_true);
15007 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15009 else if (op_true == CONST0_RTX (mode))
15011 op_false = force_reg (mode, op_false);
15012 x = gen_rtx_NOT (mode, cmp);
15013 x = gen_rtx_AND (mode, x, op_false);
15014 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15016 else if (TARGET_SSE5)
15018 rtx pcmov = gen_rtx_SET (mode, dest,
15019 gen_rtx_IF_THEN_ELSE (mode, cmp,
15026 op_true = force_reg (mode, op_true);
15027 op_false = force_reg (mode, op_false);
15029 t2 = gen_reg_rtx (mode);
15031 t3 = gen_reg_rtx (mode);
15035 x = gen_rtx_AND (mode, op_true, cmp);
15036 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15038 x = gen_rtx_NOT (mode, cmp);
15039 x = gen_rtx_AND (mode, x, op_false);
15040 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15042 x = gen_rtx_IOR (mode, t3, t2);
15043 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15047 /* Expand a floating-point conditional move. Return true if successful. */
15050 ix86_expand_fp_movcc (rtx operands[])
15052 enum machine_mode mode = GET_MODE (operands[0]);
15053 enum rtx_code code = GET_CODE (operands[1]);
15054 rtx tmp, compare_op, second_test, bypass_test;
15056 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15058 enum machine_mode cmode;
15060 /* Since we've no cmove for sse registers, don't force bad register
15061 allocation just to gain access to it. Deny movcc when the
15062 comparison mode doesn't match the move mode. */
15063 cmode = GET_MODE (ix86_compare_op0);
15064 if (cmode == VOIDmode)
15065 cmode = GET_MODE (ix86_compare_op1);
15069 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15071 &ix86_compare_op1);
15072 if (code == UNKNOWN)
15075 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15076 ix86_compare_op1, operands[2],
15080 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15081 ix86_compare_op1, operands[2], operands[3]);
15082 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15086 /* The floating point conditional move instructions don't directly
15087 support conditions resulting from a signed integer comparison. */
15089 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15091 /* The floating point conditional move instructions don't directly
15092 support signed integer comparisons. */
15094 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15096 gcc_assert (!second_test && !bypass_test);
15097 tmp = gen_reg_rtx (QImode);
15098 ix86_expand_setcc (code, tmp);
15100 ix86_compare_op0 = tmp;
15101 ix86_compare_op1 = const0_rtx;
15102 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15104 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15106 tmp = gen_reg_rtx (mode);
15107 emit_move_insn (tmp, operands[3]);
15110 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15112 tmp = gen_reg_rtx (mode);
15113 emit_move_insn (tmp, operands[2]);
15117 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15118 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15119 operands[2], operands[3])));
15121 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15122 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15123 operands[3], operands[0])));
15125 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15126 gen_rtx_IF_THEN_ELSE (mode, second_test,
15127 operands[2], operands[0])));
15132 /* Expand a floating-point vector conditional move; a vcond operation
15133 rather than a movcc operation. */
15136 ix86_expand_fp_vcond (rtx operands[])
15138 enum rtx_code code = GET_CODE (operands[3]);
15141 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15142 &operands[4], &operands[5]);
15143 if (code == UNKNOWN)
15146 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15147 operands[5], operands[1], operands[2]))
15150 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15151 operands[1], operands[2]);
15152 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15156 /* Expand a signed/unsigned integral vector conditional move. */
15159 ix86_expand_int_vcond (rtx operands[])
15161 enum machine_mode mode = GET_MODE (operands[0]);
15162 enum rtx_code code = GET_CODE (operands[3]);
15163 bool negate = false;
15166 cop0 = operands[4];
15167 cop1 = operands[5];
15169 /* SSE5 supports all of the comparisons on all vector int types. */
15172 /* Canonicalize the comparison to EQ, GT, GTU. */
15183 code = reverse_condition (code);
15189 code = reverse_condition (code);
15195 code = swap_condition (code);
15196 x = cop0, cop0 = cop1, cop1 = x;
15200 gcc_unreachable ();
15203 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15204 if (mode == V2DImode)
15209 /* SSE4.1 supports EQ. */
15210 if (!TARGET_SSE4_1)
15216 /* SSE4.2 supports GT/GTU. */
15217 if (!TARGET_SSE4_2)
15222 gcc_unreachable ();
15226 /* Unsigned parallel compare is not supported by the hardware. Play some
15227 tricks to turn this into a signed comparison against 0. */
15230 cop0 = force_reg (mode, cop0);
15239 /* Perform a parallel modulo subtraction. */
15240 t1 = gen_reg_rtx (mode);
15241 emit_insn ((mode == V4SImode
15243 : gen_subv2di3) (t1, cop0, cop1));
15245 /* Extract the original sign bit of op0. */
15246 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15248 t2 = gen_reg_rtx (mode);
15249 emit_insn ((mode == V4SImode
15251 : gen_andv2di3) (t2, cop0, mask));
15253 /* XOR it back into the result of the subtraction. This results
15254 in the sign bit set iff we saw unsigned underflow. */
15255 x = gen_reg_rtx (mode);
15256 emit_insn ((mode == V4SImode
15258 : gen_xorv2di3) (x, t1, t2));
15266 /* Perform a parallel unsigned saturating subtraction. */
15267 x = gen_reg_rtx (mode);
15268 emit_insn (gen_rtx_SET (VOIDmode, x,
15269 gen_rtx_US_MINUS (mode, cop0, cop1)));
15276 gcc_unreachable ();
15280 cop1 = CONST0_RTX (mode);
15284 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15285 operands[1+negate], operands[2-negate]);
15287 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15288 operands[2-negate]);
15292 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15293 true if we should do zero extension, else sign extension. HIGH_P is
15294 true if we want the N/2 high elements, else the low elements. */
15297 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15299 enum machine_mode imode = GET_MODE (operands[1]);
15300 rtx (*unpack)(rtx, rtx, rtx);
15307 unpack = gen_vec_interleave_highv16qi;
15309 unpack = gen_vec_interleave_lowv16qi;
15313 unpack = gen_vec_interleave_highv8hi;
15315 unpack = gen_vec_interleave_lowv8hi;
15319 unpack = gen_vec_interleave_highv4si;
15321 unpack = gen_vec_interleave_lowv4si;
15324 gcc_unreachable ();
15327 dest = gen_lowpart (imode, operands[0]);
15330 se = force_reg (imode, CONST0_RTX (imode));
15332 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15333 operands[1], pc_rtx, pc_rtx);
15335 emit_insn (unpack (dest, operands[1], se));
15338 /* This function performs the same task as ix86_expand_sse_unpack,
15339 but with SSE4.1 instructions. */
15342 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15344 enum machine_mode imode = GET_MODE (operands[1]);
15345 rtx (*unpack)(rtx, rtx);
15352 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15354 unpack = gen_sse4_1_extendv8qiv8hi2;
15358 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15360 unpack = gen_sse4_1_extendv4hiv4si2;
15364 unpack = gen_sse4_1_zero_extendv2siv2di2;
15366 unpack = gen_sse4_1_extendv2siv2di2;
15369 gcc_unreachable ();
15372 dest = operands[0];
15375 /* Shift higher 8 bytes to lower 8 bytes. */
15376 src = gen_reg_rtx (imode);
15377 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15378 gen_lowpart (TImode, operands[1]),
15384 emit_insn (unpack (dest, src));
15387 /* This function performs the same task as ix86_expand_sse_unpack,
15388 but with sse5 instructions. */
15391 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15393 enum machine_mode imode = GET_MODE (operands[1]);
15394 int pperm_bytes[16];
15396 int h = (high_p) ? 8 : 0;
15399 rtvec v = rtvec_alloc (16);
15402 rtx op0 = operands[0], op1 = operands[1];
15407 vs = rtvec_alloc (8);
15408 h2 = (high_p) ? 8 : 0;
15409 for (i = 0; i < 8; i++)
15411 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15412 pperm_bytes[2*i+1] = ((unsigned_p)
15414 : PPERM_SIGN | PPERM_SRC2 | i | h);
15417 for (i = 0; i < 16; i++)
15418 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15420 for (i = 0; i < 8; i++)
15421 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15423 p = gen_rtx_PARALLEL (VOIDmode, vs);
15424 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15426 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15428 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15432 vs = rtvec_alloc (4);
15433 h2 = (high_p) ? 4 : 0;
15434 for (i = 0; i < 4; i++)
15436 sign_extend = ((unsigned_p)
15438 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15439 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15440 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15441 pperm_bytes[4*i+2] = sign_extend;
15442 pperm_bytes[4*i+3] = sign_extend;
15445 for (i = 0; i < 16; i++)
15446 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15448 for (i = 0; i < 4; i++)
15449 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15451 p = gen_rtx_PARALLEL (VOIDmode, vs);
15452 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15454 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15456 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15460 vs = rtvec_alloc (2);
15461 h2 = (high_p) ? 2 : 0;
15462 for (i = 0; i < 2; i++)
15464 sign_extend = ((unsigned_p)
15466 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
15467 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
15468 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
15469 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
15470 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
15471 pperm_bytes[8*i+4] = sign_extend;
15472 pperm_bytes[8*i+5] = sign_extend;
15473 pperm_bytes[8*i+6] = sign_extend;
15474 pperm_bytes[8*i+7] = sign_extend;
15477 for (i = 0; i < 16; i++)
15478 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15480 for (i = 0; i < 2; i++)
15481 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15483 p = gen_rtx_PARALLEL (VOIDmode, vs);
15484 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15486 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
15488 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
15492 gcc_unreachable ();
15498 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
15499 next narrower integer vector type */
15501 ix86_expand_sse5_pack (rtx operands[3])
15503 enum machine_mode imode = GET_MODE (operands[0]);
15504 int pperm_bytes[16];
15506 rtvec v = rtvec_alloc (16);
15508 rtx op0 = operands[0];
15509 rtx op1 = operands[1];
15510 rtx op2 = operands[2];
15515 for (i = 0; i < 8; i++)
15517 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
15518 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
15521 for (i = 0; i < 16; i++)
15522 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15524 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15525 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
15529 for (i = 0; i < 4; i++)
15531 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
15532 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
15533 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
15534 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
15537 for (i = 0; i < 16; i++)
15538 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15540 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15541 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
15545 for (i = 0; i < 2; i++)
15547 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
15548 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
15549 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
15550 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
15551 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
15552 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
15553 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
15554 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
15557 for (i = 0; i < 16; i++)
15558 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15560 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15561 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
15565 gcc_unreachable ();
15571 /* Expand conditional increment or decrement using adb/sbb instructions.
15572 The default case using setcc followed by the conditional move can be
15573 done by generic code. */
15575 ix86_expand_int_addcc (rtx operands[])
15577 enum rtx_code code = GET_CODE (operands[1]);
15579 rtx val = const0_rtx;
15580 bool fpcmp = false;
15581 enum machine_mode mode = GET_MODE (operands[0]);
15583 if (operands[3] != const1_rtx
15584 && operands[3] != constm1_rtx)
15586 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15587 ix86_compare_op1, &compare_op))
15589 code = GET_CODE (compare_op);
15591 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15592 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15595 code = ix86_fp_compare_code_to_integer (code);
15602 PUT_CODE (compare_op,
15603 reverse_condition_maybe_unordered
15604 (GET_CODE (compare_op)));
15606 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15608 PUT_MODE (compare_op, mode);
15610 /* Construct either adc or sbb insn. */
15611 if ((code == LTU) == (operands[3] == constm1_rtx))
15613 switch (GET_MODE (operands[0]))
15616 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
15619 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
15622 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
15625 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15628 gcc_unreachable ();
15633 switch (GET_MODE (operands[0]))
15636 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
15639 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
15642 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
15645 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
15648 gcc_unreachable ();
15651 return 1; /* DONE */
15655 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
15656 works for floating pointer parameters and nonoffsetable memories.
15657 For pushes, it returns just stack offsets; the values will be saved
15658 in the right order. Maximally three parts are generated. */
15661 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
15666 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
15668 size = (GET_MODE_SIZE (mode) + 4) / 8;
15670 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
15671 gcc_assert (size >= 2 && size <= 4);
15673 /* Optimize constant pool reference to immediates. This is used by fp
15674 moves, that force all constants to memory to allow combining. */
15675 if (MEM_P (operand) && MEM_READONLY_P (operand))
15677 rtx tmp = maybe_get_pool_constant (operand);
15682 if (MEM_P (operand) && !offsettable_memref_p (operand))
15684 /* The only non-offsetable memories we handle are pushes. */
15685 int ok = push_operand (operand, VOIDmode);
15689 operand = copy_rtx (operand);
15690 PUT_MODE (operand, Pmode);
15691 parts[0] = parts[1] = parts[2] = parts[3] = operand;
15695 if (GET_CODE (operand) == CONST_VECTOR)
15697 enum machine_mode imode = int_mode_for_mode (mode);
15698 /* Caution: if we looked through a constant pool memory above,
15699 the operand may actually have a different mode now. That's
15700 ok, since we want to pun this all the way back to an integer. */
15701 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
15702 gcc_assert (operand != NULL);
15708 if (mode == DImode)
15709 split_di (&operand, 1, &parts[0], &parts[1]);
15714 if (REG_P (operand))
15716 gcc_assert (reload_completed);
15717 for (i = 0; i < size; i++)
15718 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
15720 else if (offsettable_memref_p (operand))
15722 operand = adjust_address (operand, SImode, 0);
15723 parts[0] = operand;
15724 for (i = 1; i < size; i++)
15725 parts[i] = adjust_address (operand, SImode, 4 * i);
15727 else if (GET_CODE (operand) == CONST_DOUBLE)
15732 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15736 real_to_target (l, &r, mode);
15737 parts[3] = gen_int_mode (l[3], SImode);
15738 parts[2] = gen_int_mode (l[2], SImode);
15741 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
15742 parts[2] = gen_int_mode (l[2], SImode);
15745 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15748 gcc_unreachable ();
15750 parts[1] = gen_int_mode (l[1], SImode);
15751 parts[0] = gen_int_mode (l[0], SImode);
15754 gcc_unreachable ();
15759 if (mode == TImode)
15760 split_ti (&operand, 1, &parts[0], &parts[1]);
15761 if (mode == XFmode || mode == TFmode)
15763 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
15764 if (REG_P (operand))
15766 gcc_assert (reload_completed);
15767 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
15768 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
15770 else if (offsettable_memref_p (operand))
15772 operand = adjust_address (operand, DImode, 0);
15773 parts[0] = operand;
15774 parts[1] = adjust_address (operand, upper_mode, 8);
15776 else if (GET_CODE (operand) == CONST_DOUBLE)
15781 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
15782 real_to_target (l, &r, mode);
15784 /* Do not use shift by 32 to avoid warning on 32bit systems. */
15785 if (HOST_BITS_PER_WIDE_INT >= 64)
15788 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
15789 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
15792 parts[0] = immed_double_const (l[0], l[1], DImode);
15794 if (upper_mode == SImode)
15795 parts[1] = gen_int_mode (l[2], SImode);
15796 else if (HOST_BITS_PER_WIDE_INT >= 64)
15799 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
15800 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
15803 parts[1] = immed_double_const (l[2], l[3], DImode);
15806 gcc_unreachable ();
15813 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
15814 Return false when normal moves are needed; true when all required
15815 insns have been emitted. Operands 2-4 contain the input values
15816 int the correct order; operands 5-7 contain the output values. */
15819 ix86_split_long_move (rtx operands[])
15824 int collisions = 0;
15825 enum machine_mode mode = GET_MODE (operands[0]);
15826 bool collisionparts[4];
15828 /* The DFmode expanders may ask us to move double.
15829 For 64bit target this is single move. By hiding the fact
15830 here we simplify i386.md splitters. */
15831 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
15833 /* Optimize constant pool reference to immediates. This is used by
15834 fp moves, that force all constants to memory to allow combining. */
15836 if (MEM_P (operands[1])
15837 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
15838 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
15839 operands[1] = get_pool_constant (XEXP (operands[1], 0));
15840 if (push_operand (operands[0], VOIDmode))
15842 operands[0] = copy_rtx (operands[0]);
15843 PUT_MODE (operands[0], Pmode);
15846 operands[0] = gen_lowpart (DImode, operands[0]);
15847 operands[1] = gen_lowpart (DImode, operands[1]);
15848 emit_move_insn (operands[0], operands[1]);
15852 /* The only non-offsettable memory we handle is push. */
15853 if (push_operand (operands[0], VOIDmode))
15856 gcc_assert (!MEM_P (operands[0])
15857 || offsettable_memref_p (operands[0]));
15859 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
15860 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
15862 /* When emitting push, take care for source operands on the stack. */
15863 if (push && MEM_P (operands[1])
15864 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
15865 for (i = 0; i < nparts - 1; i++)
15866 part[1][i] = change_address (part[1][i],
15867 GET_MODE (part[1][i]),
15868 XEXP (part[1][i + 1], 0));
15870 /* We need to do copy in the right order in case an address register
15871 of the source overlaps the destination. */
15872 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
15876 for (i = 0; i < nparts; i++)
15879 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
15880 if (collisionparts[i])
15884 /* Collision in the middle part can be handled by reordering. */
15885 if (collisions == 1 && nparts == 3 && collisionparts [1])
15887 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15888 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15890 else if (collisions == 1
15892 && (collisionparts [1] || collisionparts [2]))
15894 if (collisionparts [1])
15896 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
15897 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
15901 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
15902 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
15906 /* If there are more collisions, we can't handle it by reordering.
15907 Do an lea to the last part and use only one colliding move. */
15908 else if (collisions > 1)
15914 base = part[0][nparts - 1];
15916 /* Handle the case when the last part isn't valid for lea.
15917 Happens in 64-bit mode storing the 12-byte XFmode. */
15918 if (GET_MODE (base) != Pmode)
15919 base = gen_rtx_REG (Pmode, REGNO (base));
15921 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
15922 part[1][0] = replace_equiv_address (part[1][0], base);
15923 for (i = 1; i < nparts; i++)
15925 tmp = plus_constant (base, UNITS_PER_WORD * i);
15926 part[1][i] = replace_equiv_address (part[1][i], tmp);
15937 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
15938 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
15939 emit_move_insn (part[0][2], part[1][2]);
15941 else if (nparts == 4)
15943 emit_move_insn (part[0][3], part[1][3]);
15944 emit_move_insn (part[0][2], part[1][2]);
15949 /* In 64bit mode we don't have 32bit push available. In case this is
15950 register, it is OK - we will just use larger counterpart. We also
15951 retype memory - these comes from attempt to avoid REX prefix on
15952 moving of second half of TFmode value. */
15953 if (GET_MODE (part[1][1]) == SImode)
15955 switch (GET_CODE (part[1][1]))
15958 part[1][1] = adjust_address (part[1][1], DImode, 0);
15962 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
15966 gcc_unreachable ();
15969 if (GET_MODE (part[1][0]) == SImode)
15970 part[1][0] = part[1][1];
15973 emit_move_insn (part[0][1], part[1][1]);
15974 emit_move_insn (part[0][0], part[1][0]);
15978 /* Choose correct order to not overwrite the source before it is copied. */
15979 if ((REG_P (part[0][0])
15980 && REG_P (part[1][1])
15981 && (REGNO (part[0][0]) == REGNO (part[1][1])
15983 && REGNO (part[0][0]) == REGNO (part[1][2]))
15985 && REGNO (part[0][0]) == REGNO (part[1][3]))))
15987 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
15989 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
15991 operands[2 + i] = part[0][j];
15992 operands[6 + i] = part[1][j];
15997 for (i = 0; i < nparts; i++)
15999 operands[2 + i] = part[0][i];
16000 operands[6 + i] = part[1][i];
16004 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16005 if (optimize_insn_for_size_p ())
16007 for (j = 0; j < nparts - 1; j++)
16008 if (CONST_INT_P (operands[6 + j])
16009 && operands[6 + j] != const0_rtx
16010 && REG_P (operands[2 + j]))
16011 for (i = j; i < nparts - 1; i++)
16012 if (CONST_INT_P (operands[7 + i])
16013 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16014 operands[7 + i] = operands[2 + j];
16017 for (i = 0; i < nparts; i++)
16018 emit_move_insn (operands[2 + i], operands[6 + i]);
16023 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16024 left shift by a constant, either using a single shift or
16025 a sequence of add instructions. */
16028 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16032 emit_insn ((mode == DImode
16034 : gen_adddi3) (operand, operand, operand));
16036 else if (!optimize_insn_for_size_p ()
16037 && count * ix86_cost->add <= ix86_cost->shift_const)
16040 for (i=0; i<count; i++)
16042 emit_insn ((mode == DImode
16044 : gen_adddi3) (operand, operand, operand));
16048 emit_insn ((mode == DImode
16050 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16054 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16056 rtx low[2], high[2];
16058 const int single_width = mode == DImode ? 32 : 64;
16060 if (CONST_INT_P (operands[2]))
16062 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16063 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16065 if (count >= single_width)
16067 emit_move_insn (high[0], low[1]);
16068 emit_move_insn (low[0], const0_rtx);
16070 if (count > single_width)
16071 ix86_expand_ashl_const (high[0], count - single_width, mode);
16075 if (!rtx_equal_p (operands[0], operands[1]))
16076 emit_move_insn (operands[0], operands[1]);
16077 emit_insn ((mode == DImode
16079 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16080 ix86_expand_ashl_const (low[0], count, mode);
16085 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16087 if (operands[1] == const1_rtx)
16089 /* Assuming we've chosen a QImode capable registers, then 1 << N
16090 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16091 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16093 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16095 ix86_expand_clear (low[0]);
16096 ix86_expand_clear (high[0]);
16097 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16099 d = gen_lowpart (QImode, low[0]);
16100 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16101 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16102 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16104 d = gen_lowpart (QImode, high[0]);
16105 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16106 s = gen_rtx_NE (QImode, flags, const0_rtx);
16107 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16110 /* Otherwise, we can get the same results by manually performing
16111 a bit extract operation on bit 5/6, and then performing the two
16112 shifts. The two methods of getting 0/1 into low/high are exactly
16113 the same size. Avoiding the shift in the bit extract case helps
16114 pentium4 a bit; no one else seems to care much either way. */
16119 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16120 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16122 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16123 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16125 emit_insn ((mode == DImode
16127 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16128 emit_insn ((mode == DImode
16130 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16131 emit_move_insn (low[0], high[0]);
16132 emit_insn ((mode == DImode
16134 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16137 emit_insn ((mode == DImode
16139 : gen_ashldi3) (low[0], low[0], operands[2]));
16140 emit_insn ((mode == DImode
16142 : gen_ashldi3) (high[0], high[0], operands[2]));
16146 if (operands[1] == constm1_rtx)
16148 /* For -1 << N, we can avoid the shld instruction, because we
16149 know that we're shifting 0...31/63 ones into a -1. */
16150 emit_move_insn (low[0], constm1_rtx);
16151 if (optimize_insn_for_size_p ())
16152 emit_move_insn (high[0], low[0]);
16154 emit_move_insn (high[0], constm1_rtx);
16158 if (!rtx_equal_p (operands[0], operands[1]))
16159 emit_move_insn (operands[0], operands[1]);
16161 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16162 emit_insn ((mode == DImode
16164 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16167 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16169 if (TARGET_CMOVE && scratch)
16171 ix86_expand_clear (scratch);
16172 emit_insn ((mode == DImode
16173 ? gen_x86_shift_adj_1
16174 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16178 emit_insn ((mode == DImode
16179 ? gen_x86_shift_adj_2
16180 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16184 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16186 rtx low[2], high[2];
16188 const int single_width = mode == DImode ? 32 : 64;
16190 if (CONST_INT_P (operands[2]))
16192 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16193 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16195 if (count == single_width * 2 - 1)
16197 emit_move_insn (high[0], high[1]);
16198 emit_insn ((mode == DImode
16200 : gen_ashrdi3) (high[0], high[0],
16201 GEN_INT (single_width - 1)));
16202 emit_move_insn (low[0], high[0]);
16205 else if (count >= single_width)
16207 emit_move_insn (low[0], high[1]);
16208 emit_move_insn (high[0], low[0]);
16209 emit_insn ((mode == DImode
16211 : gen_ashrdi3) (high[0], high[0],
16212 GEN_INT (single_width - 1)));
16213 if (count > single_width)
16214 emit_insn ((mode == DImode
16216 : gen_ashrdi3) (low[0], low[0],
16217 GEN_INT (count - single_width)));
16221 if (!rtx_equal_p (operands[0], operands[1]))
16222 emit_move_insn (operands[0], operands[1]);
16223 emit_insn ((mode == DImode
16225 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16226 emit_insn ((mode == DImode
16228 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16233 if (!rtx_equal_p (operands[0], operands[1]))
16234 emit_move_insn (operands[0], operands[1]);
16236 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16238 emit_insn ((mode == DImode
16240 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16241 emit_insn ((mode == DImode
16243 : gen_ashrdi3) (high[0], high[0], operands[2]));
16245 if (TARGET_CMOVE && scratch)
16247 emit_move_insn (scratch, high[0]);
16248 emit_insn ((mode == DImode
16250 : gen_ashrdi3) (scratch, scratch,
16251 GEN_INT (single_width - 1)));
16252 emit_insn ((mode == DImode
16253 ? gen_x86_shift_adj_1
16254 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16258 emit_insn ((mode == DImode
16259 ? gen_x86_shift_adj_3
16260 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16265 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16267 rtx low[2], high[2];
16269 const int single_width = mode == DImode ? 32 : 64;
16271 if (CONST_INT_P (operands[2]))
16273 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16274 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16276 if (count >= single_width)
16278 emit_move_insn (low[0], high[1]);
16279 ix86_expand_clear (high[0]);
16281 if (count > single_width)
16282 emit_insn ((mode == DImode
16284 : gen_lshrdi3) (low[0], low[0],
16285 GEN_INT (count - single_width)));
16289 if (!rtx_equal_p (operands[0], operands[1]))
16290 emit_move_insn (operands[0], operands[1]);
16291 emit_insn ((mode == DImode
16293 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16294 emit_insn ((mode == DImode
16296 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16301 if (!rtx_equal_p (operands[0], operands[1]))
16302 emit_move_insn (operands[0], operands[1]);
16304 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16306 emit_insn ((mode == DImode
16308 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16309 emit_insn ((mode == DImode
16311 : gen_lshrdi3) (high[0], high[0], operands[2]));
16313 /* Heh. By reversing the arguments, we can reuse this pattern. */
16314 if (TARGET_CMOVE && scratch)
16316 ix86_expand_clear (scratch);
16317 emit_insn ((mode == DImode
16318 ? gen_x86_shift_adj_1
16319 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16323 emit_insn ((mode == DImode
16324 ? gen_x86_shift_adj_2
16325 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16329 /* Predict just emitted jump instruction to be taken with probability PROB. */
16331 predict_jump (int prob)
16333 rtx insn = get_last_insn ();
16334 gcc_assert (JUMP_P (insn));
16336 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16341 /* Helper function for the string operations below. Dest VARIABLE whether
16342 it is aligned to VALUE bytes. If true, jump to the label. */
16344 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16346 rtx label = gen_label_rtx ();
16347 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16348 if (GET_MODE (variable) == DImode)
16349 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16351 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16352 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16355 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16357 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16361 /* Adjust COUNTER by the VALUE. */
16363 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16365 if (GET_MODE (countreg) == DImode)
16366 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16368 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16371 /* Zero extend possibly SImode EXP to Pmode register. */
16373 ix86_zero_extend_to_Pmode (rtx exp)
16376 if (GET_MODE (exp) == VOIDmode)
16377 return force_reg (Pmode, exp);
16378 if (GET_MODE (exp) == Pmode)
16379 return copy_to_mode_reg (Pmode, exp);
16380 r = gen_reg_rtx (Pmode);
16381 emit_insn (gen_zero_extendsidi2 (r, exp));
16385 /* Divide COUNTREG by SCALE. */
16387 scale_counter (rtx countreg, int scale)
16390 rtx piece_size_mask;
16394 if (CONST_INT_P (countreg))
16395 return GEN_INT (INTVAL (countreg) / scale);
16396 gcc_assert (REG_P (countreg));
16398 piece_size_mask = GEN_INT (scale - 1);
16399 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16400 GEN_INT (exact_log2 (scale)),
16401 NULL, 1, OPTAB_DIRECT);
16405 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16406 DImode for constant loop counts. */
16408 static enum machine_mode
16409 counter_mode (rtx count_exp)
16411 if (GET_MODE (count_exp) != VOIDmode)
16412 return GET_MODE (count_exp);
16413 if (GET_CODE (count_exp) != CONST_INT)
16415 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16420 /* When SRCPTR is non-NULL, output simple loop to move memory
16421 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16422 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16423 equivalent loop to set memory by VALUE (supposed to be in MODE).
16425 The size is rounded down to whole number of chunk size moved at once.
16426 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16430 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16431 rtx destptr, rtx srcptr, rtx value,
16432 rtx count, enum machine_mode mode, int unroll,
16435 rtx out_label, top_label, iter, tmp;
16436 enum machine_mode iter_mode = counter_mode (count);
16437 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16438 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16444 top_label = gen_label_rtx ();
16445 out_label = gen_label_rtx ();
16446 iter = gen_reg_rtx (iter_mode);
16448 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16449 NULL, 1, OPTAB_DIRECT);
16450 /* Those two should combine. */
16451 if (piece_size == const1_rtx)
16453 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
16455 predict_jump (REG_BR_PROB_BASE * 10 / 100);
16457 emit_move_insn (iter, const0_rtx);
16459 emit_label (top_label);
16461 tmp = convert_modes (Pmode, iter_mode, iter, true);
16462 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
16463 destmem = change_address (destmem, mode, x_addr);
16467 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
16468 srcmem = change_address (srcmem, mode, y_addr);
16470 /* When unrolling for chips that reorder memory reads and writes,
16471 we can save registers by using single temporary.
16472 Also using 4 temporaries is overkill in 32bit mode. */
16473 if (!TARGET_64BIT && 0)
16475 for (i = 0; i < unroll; i++)
16480 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16482 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16484 emit_move_insn (destmem, srcmem);
16490 gcc_assert (unroll <= 4);
16491 for (i = 0; i < unroll; i++)
16493 tmpreg[i] = gen_reg_rtx (mode);
16497 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
16499 emit_move_insn (tmpreg[i], srcmem);
16501 for (i = 0; i < unroll; i++)
16506 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16508 emit_move_insn (destmem, tmpreg[i]);
16513 for (i = 0; i < unroll; i++)
16517 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
16518 emit_move_insn (destmem, value);
16521 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
16522 true, OPTAB_LIB_WIDEN);
16524 emit_move_insn (iter, tmp);
16526 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
16528 if (expected_size != -1)
16530 expected_size /= GET_MODE_SIZE (mode) * unroll;
16531 if (expected_size == 0)
16533 else if (expected_size > REG_BR_PROB_BASE)
16534 predict_jump (REG_BR_PROB_BASE - 1);
16536 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
16539 predict_jump (REG_BR_PROB_BASE * 80 / 100);
16540 iter = ix86_zero_extend_to_Pmode (iter);
16541 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
16542 true, OPTAB_LIB_WIDEN);
16543 if (tmp != destptr)
16544 emit_move_insn (destptr, tmp);
16547 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
16548 true, OPTAB_LIB_WIDEN);
16550 emit_move_insn (srcptr, tmp);
16552 emit_label (out_label);
16555 /* Output "rep; mov" instruction.
16556 Arguments have same meaning as for previous function */
16558 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
16559 rtx destptr, rtx srcptr,
16561 enum machine_mode mode)
16567 /* If the size is known, it is shorter to use rep movs. */
16568 if (mode == QImode && CONST_INT_P (count)
16569 && !(INTVAL (count) & 3))
16572 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16573 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16574 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
16575 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
16576 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16577 if (mode != QImode)
16579 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16580 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16581 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16582 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
16583 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16584 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
16588 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16589 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
16591 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
16595 /* Output "rep; stos" instruction.
16596 Arguments have same meaning as for previous function */
16598 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
16600 enum machine_mode mode)
16605 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
16606 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
16607 value = force_reg (mode, gen_lowpart (mode, value));
16608 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
16609 if (mode != QImode)
16611 destexp = gen_rtx_ASHIFT (Pmode, countreg,
16612 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
16613 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
16616 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
16617 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
16621 emit_strmov (rtx destmem, rtx srcmem,
16622 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
16624 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
16625 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
16626 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16629 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
16631 expand_movmem_epilogue (rtx destmem, rtx srcmem,
16632 rtx destptr, rtx srcptr, rtx count, int max_size)
16635 if (CONST_INT_P (count))
16637 HOST_WIDE_INT countval = INTVAL (count);
16640 if ((countval & 0x10) && max_size > 16)
16644 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16645 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
16648 gcc_unreachable ();
16651 if ((countval & 0x08) && max_size > 8)
16654 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
16657 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16658 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
16662 if ((countval & 0x04) && max_size > 4)
16664 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
16667 if ((countval & 0x02) && max_size > 2)
16669 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
16672 if ((countval & 0x01) && max_size > 1)
16674 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
16681 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
16682 count, 1, OPTAB_DIRECT);
16683 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
16684 count, QImode, 1, 4);
16688 /* When there are stringops, we can cheaply increase dest and src pointers.
16689 Otherwise we save code size by maintaining offset (zero is readily
16690 available from preceding rep operation) and using x86 addressing modes.
16692 if (TARGET_SINGLE_STRINGOP)
16696 rtx label = ix86_expand_aligntest (count, 4, true);
16697 src = change_address (srcmem, SImode, srcptr);
16698 dest = change_address (destmem, SImode, destptr);
16699 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16700 emit_label (label);
16701 LABEL_NUSES (label) = 1;
16705 rtx label = ix86_expand_aligntest (count, 2, true);
16706 src = change_address (srcmem, HImode, srcptr);
16707 dest = change_address (destmem, HImode, destptr);
16708 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16709 emit_label (label);
16710 LABEL_NUSES (label) = 1;
16714 rtx label = ix86_expand_aligntest (count, 1, true);
16715 src = change_address (srcmem, QImode, srcptr);
16716 dest = change_address (destmem, QImode, destptr);
16717 emit_insn (gen_strmov (destptr, dest, srcptr, src));
16718 emit_label (label);
16719 LABEL_NUSES (label) = 1;
16724 rtx offset = force_reg (Pmode, const0_rtx);
16729 rtx label = ix86_expand_aligntest (count, 4, true);
16730 src = change_address (srcmem, SImode, srcptr);
16731 dest = change_address (destmem, SImode, destptr);
16732 emit_move_insn (dest, src);
16733 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
16734 true, OPTAB_LIB_WIDEN);
16736 emit_move_insn (offset, tmp);
16737 emit_label (label);
16738 LABEL_NUSES (label) = 1;
16742 rtx label = ix86_expand_aligntest (count, 2, true);
16743 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16744 src = change_address (srcmem, HImode, tmp);
16745 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16746 dest = change_address (destmem, HImode, tmp);
16747 emit_move_insn (dest, src);
16748 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
16749 true, OPTAB_LIB_WIDEN);
16751 emit_move_insn (offset, tmp);
16752 emit_label (label);
16753 LABEL_NUSES (label) = 1;
16757 rtx label = ix86_expand_aligntest (count, 1, true);
16758 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
16759 src = change_address (srcmem, QImode, tmp);
16760 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
16761 dest = change_address (destmem, QImode, tmp);
16762 emit_move_insn (dest, src);
16763 emit_label (label);
16764 LABEL_NUSES (label) = 1;
16769 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16771 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
16772 rtx count, int max_size)
16775 expand_simple_binop (counter_mode (count), AND, count,
16776 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
16777 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
16778 gen_lowpart (QImode, value), count, QImode,
16782 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
16784 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
16788 if (CONST_INT_P (count))
16790 HOST_WIDE_INT countval = INTVAL (count);
16793 if ((countval & 0x10) && max_size > 16)
16797 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16798 emit_insn (gen_strset (destptr, dest, value));
16799 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
16800 emit_insn (gen_strset (destptr, dest, value));
16803 gcc_unreachable ();
16806 if ((countval & 0x08) && max_size > 8)
16810 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
16811 emit_insn (gen_strset (destptr, dest, value));
16815 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16816 emit_insn (gen_strset (destptr, dest, value));
16817 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
16818 emit_insn (gen_strset (destptr, dest, value));
16822 if ((countval & 0x04) && max_size > 4)
16824 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
16825 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16828 if ((countval & 0x02) && max_size > 2)
16830 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
16831 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16834 if ((countval & 0x01) && max_size > 1)
16836 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
16837 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16844 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
16849 rtx label = ix86_expand_aligntest (count, 16, true);
16852 dest = change_address (destmem, DImode, destptr);
16853 emit_insn (gen_strset (destptr, dest, value));
16854 emit_insn (gen_strset (destptr, dest, value));
16858 dest = change_address (destmem, SImode, destptr);
16859 emit_insn (gen_strset (destptr, dest, value));
16860 emit_insn (gen_strset (destptr, dest, value));
16861 emit_insn (gen_strset (destptr, dest, value));
16862 emit_insn (gen_strset (destptr, dest, value));
16864 emit_label (label);
16865 LABEL_NUSES (label) = 1;
16869 rtx label = ix86_expand_aligntest (count, 8, true);
16872 dest = change_address (destmem, DImode, destptr);
16873 emit_insn (gen_strset (destptr, dest, value));
16877 dest = change_address (destmem, SImode, destptr);
16878 emit_insn (gen_strset (destptr, dest, value));
16879 emit_insn (gen_strset (destptr, dest, value));
16881 emit_label (label);
16882 LABEL_NUSES (label) = 1;
16886 rtx label = ix86_expand_aligntest (count, 4, true);
16887 dest = change_address (destmem, SImode, destptr);
16888 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
16889 emit_label (label);
16890 LABEL_NUSES (label) = 1;
16894 rtx label = ix86_expand_aligntest (count, 2, true);
16895 dest = change_address (destmem, HImode, destptr);
16896 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
16897 emit_label (label);
16898 LABEL_NUSES (label) = 1;
16902 rtx label = ix86_expand_aligntest (count, 1, true);
16903 dest = change_address (destmem, QImode, destptr);
16904 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
16905 emit_label (label);
16906 LABEL_NUSES (label) = 1;
16910 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
16911 DESIRED_ALIGNMENT. */
16913 expand_movmem_prologue (rtx destmem, rtx srcmem,
16914 rtx destptr, rtx srcptr, rtx count,
16915 int align, int desired_alignment)
16917 if (align <= 1 && desired_alignment > 1)
16919 rtx label = ix86_expand_aligntest (destptr, 1, false);
16920 srcmem = change_address (srcmem, QImode, srcptr);
16921 destmem = change_address (destmem, QImode, destptr);
16922 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16923 ix86_adjust_counter (count, 1);
16924 emit_label (label);
16925 LABEL_NUSES (label) = 1;
16927 if (align <= 2 && desired_alignment > 2)
16929 rtx label = ix86_expand_aligntest (destptr, 2, false);
16930 srcmem = change_address (srcmem, HImode, srcptr);
16931 destmem = change_address (destmem, HImode, destptr);
16932 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16933 ix86_adjust_counter (count, 2);
16934 emit_label (label);
16935 LABEL_NUSES (label) = 1;
16937 if (align <= 4 && desired_alignment > 4)
16939 rtx label = ix86_expand_aligntest (destptr, 4, false);
16940 srcmem = change_address (srcmem, SImode, srcptr);
16941 destmem = change_address (destmem, SImode, destptr);
16942 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
16943 ix86_adjust_counter (count, 4);
16944 emit_label (label);
16945 LABEL_NUSES (label) = 1;
16947 gcc_assert (desired_alignment <= 8);
16950 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
16951 DESIRED_ALIGNMENT. */
16953 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
16954 int align, int desired_alignment)
16956 if (align <= 1 && desired_alignment > 1)
16958 rtx label = ix86_expand_aligntest (destptr, 1, false);
16959 destmem = change_address (destmem, QImode, destptr);
16960 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
16961 ix86_adjust_counter (count, 1);
16962 emit_label (label);
16963 LABEL_NUSES (label) = 1;
16965 if (align <= 2 && desired_alignment > 2)
16967 rtx label = ix86_expand_aligntest (destptr, 2, false);
16968 destmem = change_address (destmem, HImode, destptr);
16969 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
16970 ix86_adjust_counter (count, 2);
16971 emit_label (label);
16972 LABEL_NUSES (label) = 1;
16974 if (align <= 4 && desired_alignment > 4)
16976 rtx label = ix86_expand_aligntest (destptr, 4, false);
16977 destmem = change_address (destmem, SImode, destptr);
16978 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
16979 ix86_adjust_counter (count, 4);
16980 emit_label (label);
16981 LABEL_NUSES (label) = 1;
16983 gcc_assert (desired_alignment <= 8);
16986 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
16987 static enum stringop_alg
16988 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
16989 int *dynamic_check)
16991 const struct stringop_algs * algs;
16992 /* Algorithms using the rep prefix want at least edi and ecx;
16993 additionally, memset wants eax and memcpy wants esi. Don't
16994 consider such algorithms if the user has appropriated those
16995 registers for their own purposes. */
16996 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
16998 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17000 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17001 || (alg != rep_prefix_1_byte \
17002 && alg != rep_prefix_4_byte \
17003 && alg != rep_prefix_8_byte))
17004 const struct processor_costs *cost;
17006 cost = optimize_insn_for_size_p () ? &ix86_size_cost : ix86_cost;
17008 *dynamic_check = -1;
17010 algs = &cost->memset[TARGET_64BIT != 0];
17012 algs = &cost->memcpy[TARGET_64BIT != 0];
17013 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17014 return stringop_alg;
17015 /* rep; movq or rep; movl is the smallest variant. */
17016 else if (optimize_insn_for_size_p ())
17018 if (!count || (count & 3))
17019 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17021 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17023 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17025 else if (expected_size != -1 && expected_size < 4)
17026 return loop_1_byte;
17027 else if (expected_size != -1)
17030 enum stringop_alg alg = libcall;
17031 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17033 /* We get here if the algorithms that were not libcall-based
17034 were rep-prefix based and we are unable to use rep prefixes
17035 based on global register usage. Break out of the loop and
17036 use the heuristic below. */
17037 if (algs->size[i].max == 0)
17039 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17041 enum stringop_alg candidate = algs->size[i].alg;
17043 if (candidate != libcall && ALG_USABLE_P (candidate))
17045 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17046 last non-libcall inline algorithm. */
17047 if (TARGET_INLINE_ALL_STRINGOPS)
17049 /* When the current size is best to be copied by a libcall,
17050 but we are still forced to inline, run the heuristic below
17051 that will pick code for medium sized blocks. */
17052 if (alg != libcall)
17056 else if (ALG_USABLE_P (candidate))
17060 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17062 /* When asked to inline the call anyway, try to pick meaningful choice.
17063 We look for maximal size of block that is faster to copy by hand and
17064 take blocks of at most of that size guessing that average size will
17065 be roughly half of the block.
17067 If this turns out to be bad, we might simply specify the preferred
17068 choice in ix86_costs. */
17069 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17070 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17073 enum stringop_alg alg;
17075 bool any_alg_usable_p = true;
17077 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17079 enum stringop_alg candidate = algs->size[i].alg;
17080 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17082 if (candidate != libcall && candidate
17083 && ALG_USABLE_P (candidate))
17084 max = algs->size[i].max;
17086 /* If there aren't any usable algorithms, then recursing on
17087 smaller sizes isn't going to find anything. Just return the
17088 simple byte-at-a-time copy loop. */
17089 if (!any_alg_usable_p)
17091 /* Pick something reasonable. */
17092 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17093 *dynamic_check = 128;
17094 return loop_1_byte;
17098 alg = decide_alg (count, max / 2, memset, dynamic_check);
17099 gcc_assert (*dynamic_check == -1);
17100 gcc_assert (alg != libcall);
17101 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17102 *dynamic_check = max;
17105 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17106 #undef ALG_USABLE_P
17109 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17110 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17112 decide_alignment (int align,
17113 enum stringop_alg alg,
17116 int desired_align = 0;
17120 gcc_unreachable ();
17122 case unrolled_loop:
17123 desired_align = GET_MODE_SIZE (Pmode);
17125 case rep_prefix_8_byte:
17128 case rep_prefix_4_byte:
17129 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17130 copying whole cacheline at once. */
17131 if (TARGET_PENTIUMPRO)
17136 case rep_prefix_1_byte:
17137 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17138 copying whole cacheline at once. */
17139 if (TARGET_PENTIUMPRO)
17153 if (desired_align < align)
17154 desired_align = align;
17155 if (expected_size != -1 && expected_size < 4)
17156 desired_align = align;
17157 return desired_align;
17160 /* Return the smallest power of 2 greater than VAL. */
17162 smallest_pow2_greater_than (int val)
17170 /* Expand string move (memcpy) operation. Use i386 string operations when
17171 profitable. expand_setmem contains similar code. The code depends upon
17172 architecture, block size and alignment, but always has the same
17175 1) Prologue guard: Conditional that jumps up to epilogues for small
17176 blocks that can be handled by epilogue alone. This is faster but
17177 also needed for correctness, since prologue assume the block is larger
17178 than the desired alignment.
17180 Optional dynamic check for size and libcall for large
17181 blocks is emitted here too, with -minline-stringops-dynamically.
17183 2) Prologue: copy first few bytes in order to get destination aligned
17184 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17185 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17186 We emit either a jump tree on power of two sized blocks, or a byte loop.
17188 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17189 with specified algorithm.
17191 4) Epilogue: code copying tail of the block that is too small to be
17192 handled by main body (or up to size guarded by prologue guard). */
17195 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17196 rtx expected_align_exp, rtx expected_size_exp)
17202 rtx jump_around_label = NULL;
17203 HOST_WIDE_INT align = 1;
17204 unsigned HOST_WIDE_INT count = 0;
17205 HOST_WIDE_INT expected_size = -1;
17206 int size_needed = 0, epilogue_size_needed;
17207 int desired_align = 0;
17208 enum stringop_alg alg;
17211 if (CONST_INT_P (align_exp))
17212 align = INTVAL (align_exp);
17213 /* i386 can do misaligned access on reasonably increased cost. */
17214 if (CONST_INT_P (expected_align_exp)
17215 && INTVAL (expected_align_exp) > align)
17216 align = INTVAL (expected_align_exp);
17217 if (CONST_INT_P (count_exp))
17218 count = expected_size = INTVAL (count_exp);
17219 if (CONST_INT_P (expected_size_exp) && count == 0)
17220 expected_size = INTVAL (expected_size_exp);
17222 /* Make sure we don't need to care about overflow later on. */
17223 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17226 /* Step 0: Decide on preferred algorithm, desired alignment and
17227 size of chunks to be copied by main loop. */
17229 alg = decide_alg (count, expected_size, false, &dynamic_check);
17230 desired_align = decide_alignment (align, alg, expected_size);
17232 if (!TARGET_ALIGN_STRINGOPS)
17233 align = desired_align;
17235 if (alg == libcall)
17237 gcc_assert (alg != no_stringop);
17239 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17240 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17241 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17246 gcc_unreachable ();
17248 size_needed = GET_MODE_SIZE (Pmode);
17250 case unrolled_loop:
17251 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17253 case rep_prefix_8_byte:
17256 case rep_prefix_4_byte:
17259 case rep_prefix_1_byte:
17265 epilogue_size_needed = size_needed;
17267 /* Step 1: Prologue guard. */
17269 /* Alignment code needs count to be in register. */
17270 if (CONST_INT_P (count_exp) && desired_align > align)
17271 count_exp = force_reg (counter_mode (count_exp), count_exp);
17272 gcc_assert (desired_align >= 1 && align >= 1);
17274 /* Ensure that alignment prologue won't copy past end of block. */
17275 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17277 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17278 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17279 Make sure it is power of 2. */
17280 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17282 if (CONST_INT_P (count_exp))
17284 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
17289 label = gen_label_rtx ();
17290 emit_cmp_and_jump_insns (count_exp,
17291 GEN_INT (epilogue_size_needed),
17292 LTU, 0, counter_mode (count_exp), 1, label);
17293 if (expected_size == -1 || expected_size < epilogue_size_needed)
17294 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17296 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17300 /* Emit code to decide on runtime whether library call or inline should be
17302 if (dynamic_check != -1)
17304 if (CONST_INT_P (count_exp))
17306 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
17308 emit_block_move_via_libcall (dst, src, count_exp, false);
17309 count_exp = const0_rtx;
17315 rtx hot_label = gen_label_rtx ();
17316 jump_around_label = gen_label_rtx ();
17317 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17318 LEU, 0, GET_MODE (count_exp), 1, hot_label);
17319 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17320 emit_block_move_via_libcall (dst, src, count_exp, false);
17321 emit_jump (jump_around_label);
17322 emit_label (hot_label);
17326 /* Step 2: Alignment prologue. */
17328 if (desired_align > align)
17330 /* Except for the first move in epilogue, we no longer know
17331 constant offset in aliasing info. It don't seems to worth
17332 the pain to maintain it for the first move, so throw away
17334 src = change_address (src, BLKmode, srcreg);
17335 dst = change_address (dst, BLKmode, destreg);
17336 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
17339 if (label && size_needed == 1)
17341 emit_label (label);
17342 LABEL_NUSES (label) = 1;
17346 /* Step 3: Main loop. */
17352 gcc_unreachable ();
17354 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17355 count_exp, QImode, 1, expected_size);
17358 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17359 count_exp, Pmode, 1, expected_size);
17361 case unrolled_loop:
17362 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
17363 registers for 4 temporaries anyway. */
17364 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
17365 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
17368 case rep_prefix_8_byte:
17369 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17372 case rep_prefix_4_byte:
17373 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17376 case rep_prefix_1_byte:
17377 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
17381 /* Adjust properly the offset of src and dest memory for aliasing. */
17382 if (CONST_INT_P (count_exp))
17384 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
17385 (count / size_needed) * size_needed);
17386 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17387 (count / size_needed) * size_needed);
17391 src = change_address (src, BLKmode, srcreg);
17392 dst = change_address (dst, BLKmode, destreg);
17395 /* Step 4: Epilogue to copy the remaining bytes. */
17399 /* When the main loop is done, COUNT_EXP might hold original count,
17400 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17401 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17402 bytes. Compensate if needed. */
17404 if (size_needed < epilogue_size_needed)
17407 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17408 GEN_INT (size_needed - 1), count_exp, 1,
17410 if (tmp != count_exp)
17411 emit_move_insn (count_exp, tmp);
17413 emit_label (label);
17414 LABEL_NUSES (label) = 1;
17417 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17418 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
17419 epilogue_size_needed);
17420 if (jump_around_label)
17421 emit_label (jump_around_label);
17425 /* Helper function for memcpy. For QImode value 0xXY produce
17426 0xXYXYXYXY of wide specified by MODE. This is essentially
17427 a * 0x10101010, but we can do slightly better than
17428 synth_mult by unwinding the sequence by hand on CPUs with
17431 promote_duplicated_reg (enum machine_mode mode, rtx val)
17433 enum machine_mode valmode = GET_MODE (val);
17435 int nops = mode == DImode ? 3 : 2;
17437 gcc_assert (mode == SImode || mode == DImode);
17438 if (val == const0_rtx)
17439 return copy_to_mode_reg (mode, const0_rtx);
17440 if (CONST_INT_P (val))
17442 HOST_WIDE_INT v = INTVAL (val) & 255;
17446 if (mode == DImode)
17447 v |= (v << 16) << 16;
17448 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
17451 if (valmode == VOIDmode)
17453 if (valmode != QImode)
17454 val = gen_lowpart (QImode, val);
17455 if (mode == QImode)
17457 if (!TARGET_PARTIAL_REG_STALL)
17459 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
17460 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
17461 <= (ix86_cost->shift_const + ix86_cost->add) * nops
17462 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
17464 rtx reg = convert_modes (mode, QImode, val, true);
17465 tmp = promote_duplicated_reg (mode, const1_rtx);
17466 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
17471 rtx reg = convert_modes (mode, QImode, val, true);
17473 if (!TARGET_PARTIAL_REG_STALL)
17474 if (mode == SImode)
17475 emit_insn (gen_movsi_insv_1 (reg, reg));
17477 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
17480 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
17481 NULL, 1, OPTAB_DIRECT);
17483 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17485 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
17486 NULL, 1, OPTAB_DIRECT);
17487 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17488 if (mode == SImode)
17490 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
17491 NULL, 1, OPTAB_DIRECT);
17492 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
17497 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
17498 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
17499 alignment from ALIGN to DESIRED_ALIGN. */
17501 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
17506 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
17507 promoted_val = promote_duplicated_reg (DImode, val);
17508 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
17509 promoted_val = promote_duplicated_reg (SImode, val);
17510 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
17511 promoted_val = promote_duplicated_reg (HImode, val);
17513 promoted_val = val;
17515 return promoted_val;
17518 /* Expand string clear operation (bzero). Use i386 string operations when
17519 profitable. See expand_movmem comment for explanation of individual
17520 steps performed. */
17522 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
17523 rtx expected_align_exp, rtx expected_size_exp)
17528 rtx jump_around_label = NULL;
17529 HOST_WIDE_INT align = 1;
17530 unsigned HOST_WIDE_INT count = 0;
17531 HOST_WIDE_INT expected_size = -1;
17532 int size_needed = 0, epilogue_size_needed;
17533 int desired_align = 0;
17534 enum stringop_alg alg;
17535 rtx promoted_val = NULL;
17536 bool force_loopy_epilogue = false;
17539 if (CONST_INT_P (align_exp))
17540 align = INTVAL (align_exp);
17541 /* i386 can do misaligned access on reasonably increased cost. */
17542 if (CONST_INT_P (expected_align_exp)
17543 && INTVAL (expected_align_exp) > align)
17544 align = INTVAL (expected_align_exp);
17545 if (CONST_INT_P (count_exp))
17546 count = expected_size = INTVAL (count_exp);
17547 if (CONST_INT_P (expected_size_exp) && count == 0)
17548 expected_size = INTVAL (expected_size_exp);
17550 /* Make sure we don't need to care about overflow later on. */
17551 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17554 /* Step 0: Decide on preferred algorithm, desired alignment and
17555 size of chunks to be copied by main loop. */
17557 alg = decide_alg (count, expected_size, true, &dynamic_check);
17558 desired_align = decide_alignment (align, alg, expected_size);
17560 if (!TARGET_ALIGN_STRINGOPS)
17561 align = desired_align;
17563 if (alg == libcall)
17565 gcc_assert (alg != no_stringop);
17567 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
17568 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17573 gcc_unreachable ();
17575 size_needed = GET_MODE_SIZE (Pmode);
17577 case unrolled_loop:
17578 size_needed = GET_MODE_SIZE (Pmode) * 4;
17580 case rep_prefix_8_byte:
17583 case rep_prefix_4_byte:
17586 case rep_prefix_1_byte:
17591 epilogue_size_needed = size_needed;
17593 /* Step 1: Prologue guard. */
17595 /* Alignment code needs count to be in register. */
17596 if (CONST_INT_P (count_exp) && desired_align > align)
17598 enum machine_mode mode = SImode;
17599 if (TARGET_64BIT && (count & ~0xffffffff))
17601 count_exp = force_reg (mode, count_exp);
17603 /* Do the cheap promotion to allow better CSE across the
17604 main loop and epilogue (ie one load of the big constant in the
17605 front of all code. */
17606 if (CONST_INT_P (val_exp))
17607 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17608 desired_align, align);
17609 /* Ensure that alignment prologue won't copy past end of block. */
17610 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
17612 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
17613 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
17614 Make sure it is power of 2. */
17615 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
17617 /* To improve performance of small blocks, we jump around the VAL
17618 promoting mode. This mean that if the promoted VAL is not constant,
17619 we might not use it in the epilogue and have to use byte
17621 if (epilogue_size_needed > 2 && !promoted_val)
17622 force_loopy_epilogue = true;
17623 label = gen_label_rtx ();
17624 emit_cmp_and_jump_insns (count_exp,
17625 GEN_INT (epilogue_size_needed),
17626 LTU, 0, counter_mode (count_exp), 1, label);
17627 if (GET_CODE (count_exp) == CONST_INT)
17629 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
17630 predict_jump (REG_BR_PROB_BASE * 60 / 100);
17632 predict_jump (REG_BR_PROB_BASE * 20 / 100);
17634 if (dynamic_check != -1)
17636 rtx hot_label = gen_label_rtx ();
17637 jump_around_label = gen_label_rtx ();
17638 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
17639 LEU, 0, counter_mode (count_exp), 1, hot_label);
17640 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17641 set_storage_via_libcall (dst, count_exp, val_exp, false);
17642 emit_jump (jump_around_label);
17643 emit_label (hot_label);
17646 /* Step 2: Alignment prologue. */
17648 /* Do the expensive promotion once we branched off the small blocks. */
17650 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
17651 desired_align, align);
17652 gcc_assert (desired_align >= 1 && align >= 1);
17654 if (desired_align > align)
17656 /* Except for the first move in epilogue, we no longer know
17657 constant offset in aliasing info. It don't seems to worth
17658 the pain to maintain it for the first move, so throw away
17660 dst = change_address (dst, BLKmode, destreg);
17661 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
17664 if (label && size_needed == 1)
17666 emit_label (label);
17667 LABEL_NUSES (label) = 1;
17671 /* Step 3: Main loop. */
17677 gcc_unreachable ();
17679 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17680 count_exp, QImode, 1, expected_size);
17683 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17684 count_exp, Pmode, 1, expected_size);
17686 case unrolled_loop:
17687 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
17688 count_exp, Pmode, 4, expected_size);
17690 case rep_prefix_8_byte:
17691 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17694 case rep_prefix_4_byte:
17695 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17698 case rep_prefix_1_byte:
17699 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
17703 /* Adjust properly the offset of src and dest memory for aliasing. */
17704 if (CONST_INT_P (count_exp))
17705 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
17706 (count / size_needed) * size_needed);
17708 dst = change_address (dst, BLKmode, destreg);
17710 /* Step 4: Epilogue to copy the remaining bytes. */
17714 /* When the main loop is done, COUNT_EXP might hold original count,
17715 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
17716 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
17717 bytes. Compensate if needed. */
17719 if (size_needed < desired_align - align)
17722 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
17723 GEN_INT (size_needed - 1), count_exp, 1,
17725 size_needed = desired_align - align + 1;
17726 if (tmp != count_exp)
17727 emit_move_insn (count_exp, tmp);
17729 emit_label (label);
17730 LABEL_NUSES (label) = 1;
17732 if (count_exp != const0_rtx && epilogue_size_needed > 1)
17734 if (force_loopy_epilogue)
17735 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
17738 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
17741 if (jump_around_label)
17742 emit_label (jump_around_label);
17746 /* Expand the appropriate insns for doing strlen if not just doing
17749 out = result, initialized with the start address
17750 align_rtx = alignment of the address.
17751 scratch = scratch register, initialized with the startaddress when
17752 not aligned, otherwise undefined
17754 This is just the body. It needs the initializations mentioned above and
17755 some address computing at the end. These things are done in i386.md. */
17758 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
17762 rtx align_2_label = NULL_RTX;
17763 rtx align_3_label = NULL_RTX;
17764 rtx align_4_label = gen_label_rtx ();
17765 rtx end_0_label = gen_label_rtx ();
17767 rtx tmpreg = gen_reg_rtx (SImode);
17768 rtx scratch = gen_reg_rtx (SImode);
17772 if (CONST_INT_P (align_rtx))
17773 align = INTVAL (align_rtx);
17775 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
17777 /* Is there a known alignment and is it less than 4? */
17780 rtx scratch1 = gen_reg_rtx (Pmode);
17781 emit_move_insn (scratch1, out);
17782 /* Is there a known alignment and is it not 2? */
17785 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
17786 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
17788 /* Leave just the 3 lower bits. */
17789 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
17790 NULL_RTX, 0, OPTAB_WIDEN);
17792 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17793 Pmode, 1, align_4_label);
17794 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
17795 Pmode, 1, align_2_label);
17796 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
17797 Pmode, 1, align_3_label);
17801 /* Since the alignment is 2, we have to check 2 or 0 bytes;
17802 check if is aligned to 4 - byte. */
17804 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
17805 NULL_RTX, 0, OPTAB_WIDEN);
17807 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
17808 Pmode, 1, align_4_label);
17811 mem = change_address (src, QImode, out);
17813 /* Now compare the bytes. */
17815 /* Compare the first n unaligned byte on a byte per byte basis. */
17816 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
17817 QImode, 1, end_0_label);
17819 /* Increment the address. */
17820 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17822 /* Not needed with an alignment of 2 */
17825 emit_label (align_2_label);
17827 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17830 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17832 emit_label (align_3_label);
17835 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
17838 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
17841 /* Generate loop to check 4 bytes at a time. It is not a good idea to
17842 align this loop. It gives only huge programs, but does not help to
17844 emit_label (align_4_label);
17846 mem = change_address (src, SImode, out);
17847 emit_move_insn (scratch, mem);
17848 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
17850 /* This formula yields a nonzero result iff one of the bytes is zero.
17851 This saves three branches inside loop and many cycles. */
17853 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
17854 emit_insn (gen_one_cmplsi2 (scratch, scratch));
17855 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
17856 emit_insn (gen_andsi3 (tmpreg, tmpreg,
17857 gen_int_mode (0x80808080, SImode)));
17858 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
17863 rtx reg = gen_reg_rtx (SImode);
17864 rtx reg2 = gen_reg_rtx (Pmode);
17865 emit_move_insn (reg, tmpreg);
17866 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
17868 /* If zero is not in the first two bytes, move two bytes forward. */
17869 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17870 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17871 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17872 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
17873 gen_rtx_IF_THEN_ELSE (SImode, tmp,
17876 /* Emit lea manually to avoid clobbering of flags. */
17877 emit_insn (gen_rtx_SET (SImode, reg2,
17878 gen_rtx_PLUS (Pmode, out, const2_rtx)));
17880 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17881 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
17882 emit_insn (gen_rtx_SET (VOIDmode, out,
17883 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
17890 rtx end_2_label = gen_label_rtx ();
17891 /* Is zero in the first two bytes? */
17893 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
17894 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
17895 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
17896 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
17897 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
17899 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
17900 JUMP_LABEL (tmp) = end_2_label;
17902 /* Not in the first two. Move two bytes forward. */
17903 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
17904 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
17906 emit_label (end_2_label);
17910 /* Avoid branch in fixing the byte. */
17911 tmpreg = gen_lowpart (QImode, tmpreg);
17912 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
17913 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
17914 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
17916 emit_label (end_0_label);
17919 /* Expand strlen. */
17922 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
17924 rtx addr, scratch1, scratch2, scratch3, scratch4;
17926 /* The generic case of strlen expander is long. Avoid it's
17927 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
17929 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17930 && !TARGET_INLINE_ALL_STRINGOPS
17931 && !optimize_insn_for_size_p ()
17932 && (!CONST_INT_P (align) || INTVAL (align) < 4))
17935 addr = force_reg (Pmode, XEXP (src, 0));
17936 scratch1 = gen_reg_rtx (Pmode);
17938 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
17939 && !optimize_insn_for_size_p ())
17941 /* Well it seems that some optimizer does not combine a call like
17942 foo(strlen(bar), strlen(bar));
17943 when the move and the subtraction is done here. It does calculate
17944 the length just once when these instructions are done inside of
17945 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
17946 often used and I use one fewer register for the lifetime of
17947 output_strlen_unroll() this is better. */
17949 emit_move_insn (out, addr);
17951 ix86_expand_strlensi_unroll_1 (out, src, align);
17953 /* strlensi_unroll_1 returns the address of the zero at the end of
17954 the string, like memchr(), so compute the length by subtracting
17955 the start address. */
17956 emit_insn ((*ix86_gen_sub3) (out, out, addr));
17962 /* Can't use this if the user has appropriated eax, ecx, or edi. */
17963 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
17966 scratch2 = gen_reg_rtx (Pmode);
17967 scratch3 = gen_reg_rtx (Pmode);
17968 scratch4 = force_reg (Pmode, constm1_rtx);
17970 emit_move_insn (scratch3, addr);
17971 eoschar = force_reg (QImode, eoschar);
17973 src = replace_equiv_address_nv (src, scratch3);
17975 /* If .md starts supporting :P, this can be done in .md. */
17976 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
17977 scratch4), UNSPEC_SCAS);
17978 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
17979 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
17980 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
17985 /* For given symbol (function) construct code to compute address of it's PLT
17986 entry in large x86-64 PIC model. */
17988 construct_plt_address (rtx symbol)
17990 rtx tmp = gen_reg_rtx (Pmode);
17991 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
17993 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
17994 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
17996 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
17997 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18002 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18003 rtx callarg2 ATTRIBUTE_UNUSED,
18004 rtx pop, int sibcall)
18006 rtx use = NULL, call;
18008 if (pop == const0_rtx)
18010 gcc_assert (!TARGET_64BIT || !pop);
18012 if (TARGET_MACHO && !TARGET_64BIT)
18015 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18016 fnaddr = machopic_indirect_call_target (fnaddr);
18021 /* Static functions and indirect calls don't need the pic register. */
18022 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18023 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18024 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18025 use_reg (&use, pic_offset_table_rtx);
18028 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18030 rtx al = gen_rtx_REG (QImode, AX_REG);
18031 emit_move_insn (al, callarg2);
18032 use_reg (&use, al);
18035 if (ix86_cmodel == CM_LARGE_PIC
18036 && GET_CODE (fnaddr) == MEM
18037 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18038 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18039 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18040 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
18042 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18043 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18045 if (sibcall && TARGET_64BIT
18046 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
18049 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18050 fnaddr = gen_rtx_REG (Pmode, R11_REG);
18051 emit_move_insn (fnaddr, addr);
18052 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18055 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18057 call = gen_rtx_SET (VOIDmode, retval, call);
18060 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18061 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18062 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18065 call = emit_call_insn (call);
18067 CALL_INSN_FUNCTION_USAGE (call) = use;
18071 /* Clear stack slot assignments remembered from previous functions.
18072 This is called from INIT_EXPANDERS once before RTL is emitted for each
18075 static struct machine_function *
18076 ix86_init_machine_status (void)
18078 struct machine_function *f;
18080 f = GGC_CNEW (struct machine_function);
18081 f->use_fast_prologue_epilogue_nregs = -1;
18082 f->tls_descriptor_call_expanded_p = 0;
18083 f->call_abi = DEFAULT_ABI;
18088 /* Return a MEM corresponding to a stack slot with mode MODE.
18089 Allocate a new slot if necessary.
18091 The RTL for a function can have several slots available: N is
18092 which slot to use. */
18095 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18097 struct stack_local_entry *s;
18099 gcc_assert (n < MAX_386_STACK_LOCALS);
18101 /* Virtual slot is valid only before vregs are instantiated. */
18102 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18104 for (s = ix86_stack_locals; s; s = s->next)
18105 if (s->mode == mode && s->n == n)
18106 return copy_rtx (s->rtl);
18108 s = (struct stack_local_entry *)
18109 ggc_alloc (sizeof (struct stack_local_entry));
18112 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18114 s->next = ix86_stack_locals;
18115 ix86_stack_locals = s;
18119 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18121 static GTY(()) rtx ix86_tls_symbol;
18123 ix86_tls_get_addr (void)
18126 if (!ix86_tls_symbol)
18128 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18129 (TARGET_ANY_GNU_TLS
18131 ? "___tls_get_addr"
18132 : "__tls_get_addr");
18135 return ix86_tls_symbol;
18138 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
18140 static GTY(()) rtx ix86_tls_module_base_symbol;
18142 ix86_tls_module_base (void)
18145 if (!ix86_tls_module_base_symbol)
18147 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
18148 "_TLS_MODULE_BASE_");
18149 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
18150 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
18153 return ix86_tls_module_base_symbol;
18156 /* Calculate the length of the memory address in the instruction
18157 encoding. Does not include the one-byte modrm, opcode, or prefix. */
18160 memory_address_length (rtx addr)
18162 struct ix86_address parts;
18163 rtx base, index, disp;
18167 if (GET_CODE (addr) == PRE_DEC
18168 || GET_CODE (addr) == POST_INC
18169 || GET_CODE (addr) == PRE_MODIFY
18170 || GET_CODE (addr) == POST_MODIFY)
18173 ok = ix86_decompose_address (addr, &parts);
18176 if (parts.base && GET_CODE (parts.base) == SUBREG)
18177 parts.base = SUBREG_REG (parts.base);
18178 if (parts.index && GET_CODE (parts.index) == SUBREG)
18179 parts.index = SUBREG_REG (parts.index);
18182 index = parts.index;
18187 - esp as the base always wants an index,
18188 - ebp as the base always wants a displacement. */
18190 /* Register Indirect. */
18191 if (base && !index && !disp)
18193 /* esp (for its index) and ebp (for its displacement) need
18194 the two-byte modrm form. */
18195 if (addr == stack_pointer_rtx
18196 || addr == arg_pointer_rtx
18197 || addr == frame_pointer_rtx
18198 || addr == hard_frame_pointer_rtx)
18202 /* Direct Addressing. */
18203 else if (disp && !base && !index)
18208 /* Find the length of the displacement constant. */
18211 if (base && satisfies_constraint_K (disp))
18216 /* ebp always wants a displacement. */
18217 else if (base == hard_frame_pointer_rtx)
18220 /* An index requires the two-byte modrm form.... */
18222 /* ...like esp, which always wants an index. */
18223 || base == stack_pointer_rtx
18224 || base == arg_pointer_rtx
18225 || base == frame_pointer_rtx)
18232 /* Compute default value for "length_immediate" attribute. When SHORTFORM
18233 is set, expect that insn have 8bit immediate alternative. */
18235 ix86_attr_length_immediate_default (rtx insn, int shortform)
18239 extract_insn_cached (insn);
18240 for (i = recog_data.n_operands - 1; i >= 0; --i)
18241 if (CONSTANT_P (recog_data.operand[i]))
18244 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
18248 switch (get_attr_mode (insn))
18259 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
18264 fatal_insn ("unknown insn mode", insn);
18270 /* Compute default value for "length_address" attribute. */
18272 ix86_attr_length_address_default (rtx insn)
18276 if (get_attr_type (insn) == TYPE_LEA)
18278 rtx set = PATTERN (insn);
18280 if (GET_CODE (set) == PARALLEL)
18281 set = XVECEXP (set, 0, 0);
18283 gcc_assert (GET_CODE (set) == SET);
18285 return memory_address_length (SET_SRC (set));
18288 extract_insn_cached (insn);
18289 for (i = recog_data.n_operands - 1; i >= 0; --i)
18290 if (MEM_P (recog_data.operand[i]))
18292 return memory_address_length (XEXP (recog_data.operand[i], 0));
18298 /* Compute default value for "length_vex" attribute. It includes
18299 2 or 3 byte VEX prefix and 1 opcode byte. */
18302 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
18307 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
18308 byte VEX prefix. */
18309 if (!has_0f_opcode || has_vex_w)
18312 /* We can always use 2 byte VEX prefix in 32bit. */
18316 extract_insn_cached (insn);
18318 for (i = recog_data.n_operands - 1; i >= 0; --i)
18319 if (REG_P (recog_data.operand[i]))
18321 /* REX.W bit uses 3 byte VEX prefix. */
18322 if (GET_MODE (recog_data.operand[i]) == DImode)
18327 /* REX.X or REX.B bits use 3 byte VEX prefix. */
18328 if (MEM_P (recog_data.operand[i])
18329 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
18336 /* Return the maximum number of instructions a cpu can issue. */
18339 ix86_issue_rate (void)
18343 case PROCESSOR_PENTIUM:
18347 case PROCESSOR_PENTIUMPRO:
18348 case PROCESSOR_PENTIUM4:
18349 case PROCESSOR_ATHLON:
18351 case PROCESSOR_AMDFAM10:
18352 case PROCESSOR_NOCONA:
18353 case PROCESSOR_GENERIC32:
18354 case PROCESSOR_GENERIC64:
18357 case PROCESSOR_CORE2:
18365 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
18366 by DEP_INSN and nothing set by DEP_INSN. */
18369 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18373 /* Simplify the test for uninteresting insns. */
18374 if (insn_type != TYPE_SETCC
18375 && insn_type != TYPE_ICMOV
18376 && insn_type != TYPE_FCMOV
18377 && insn_type != TYPE_IBR)
18380 if ((set = single_set (dep_insn)) != 0)
18382 set = SET_DEST (set);
18385 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
18386 && XVECLEN (PATTERN (dep_insn), 0) == 2
18387 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
18388 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
18390 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18391 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
18396 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
18399 /* This test is true if the dependent insn reads the flags but
18400 not any other potentially set register. */
18401 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
18404 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
18410 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
18411 address with operands set by DEP_INSN. */
18414 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
18418 if (insn_type == TYPE_LEA
18421 addr = PATTERN (insn);
18423 if (GET_CODE (addr) == PARALLEL)
18424 addr = XVECEXP (addr, 0, 0);
18426 gcc_assert (GET_CODE (addr) == SET);
18428 addr = SET_SRC (addr);
18433 extract_insn_cached (insn);
18434 for (i = recog_data.n_operands - 1; i >= 0; --i)
18435 if (MEM_P (recog_data.operand[i]))
18437 addr = XEXP (recog_data.operand[i], 0);
18444 return modified_in_p (addr, dep_insn);
18448 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
18450 enum attr_type insn_type, dep_insn_type;
18451 enum attr_memory memory;
18453 int dep_insn_code_number;
18455 /* Anti and output dependencies have zero cost on all CPUs. */
18456 if (REG_NOTE_KIND (link) != 0)
18459 dep_insn_code_number = recog_memoized (dep_insn);
18461 /* If we can't recognize the insns, we can't really do anything. */
18462 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
18465 insn_type = get_attr_type (insn);
18466 dep_insn_type = get_attr_type (dep_insn);
18470 case PROCESSOR_PENTIUM:
18471 /* Address Generation Interlock adds a cycle of latency. */
18472 if (ix86_agi_dependent (insn, dep_insn, insn_type))
18475 /* ??? Compares pair with jump/setcc. */
18476 if (ix86_flags_dependent (insn, dep_insn, insn_type))
18479 /* Floating point stores require value to be ready one cycle earlier. */
18480 if (insn_type == TYPE_FMOV
18481 && get_attr_memory (insn) == MEMORY_STORE
18482 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18486 case PROCESSOR_PENTIUMPRO:
18487 memory = get_attr_memory (insn);
18489 /* INT->FP conversion is expensive. */
18490 if (get_attr_fp_int_src (dep_insn))
18493 /* There is one cycle extra latency between an FP op and a store. */
18494 if (insn_type == TYPE_FMOV
18495 && (set = single_set (dep_insn)) != NULL_RTX
18496 && (set2 = single_set (insn)) != NULL_RTX
18497 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
18498 && MEM_P (SET_DEST (set2)))
18501 /* Show ability of reorder buffer to hide latency of load by executing
18502 in parallel with previous instruction in case
18503 previous instruction is not needed to compute the address. */
18504 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18505 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18507 /* Claim moves to take one cycle, as core can issue one load
18508 at time and the next load can start cycle later. */
18509 if (dep_insn_type == TYPE_IMOV
18510 || dep_insn_type == TYPE_FMOV)
18518 memory = get_attr_memory (insn);
18520 /* The esp dependency is resolved before the instruction is really
18522 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
18523 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
18526 /* INT->FP conversion is expensive. */
18527 if (get_attr_fp_int_src (dep_insn))
18530 /* Show ability of reorder buffer to hide latency of load by executing
18531 in parallel with previous instruction in case
18532 previous instruction is not needed to compute the address. */
18533 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18534 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18536 /* Claim moves to take one cycle, as core can issue one load
18537 at time and the next load can start cycle later. */
18538 if (dep_insn_type == TYPE_IMOV
18539 || dep_insn_type == TYPE_FMOV)
18548 case PROCESSOR_ATHLON:
18550 case PROCESSOR_AMDFAM10:
18551 case PROCESSOR_GENERIC32:
18552 case PROCESSOR_GENERIC64:
18553 memory = get_attr_memory (insn);
18555 /* Show ability of reorder buffer to hide latency of load by executing
18556 in parallel with previous instruction in case
18557 previous instruction is not needed to compute the address. */
18558 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
18559 && !ix86_agi_dependent (insn, dep_insn, insn_type))
18561 enum attr_unit unit = get_attr_unit (insn);
18564 /* Because of the difference between the length of integer and
18565 floating unit pipeline preparation stages, the memory operands
18566 for floating point are cheaper.
18568 ??? For Athlon it the difference is most probably 2. */
18569 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
18572 loadcost = TARGET_ATHLON ? 2 : 0;
18574 if (cost >= loadcost)
18587 /* How many alternative schedules to try. This should be as wide as the
18588 scheduling freedom in the DFA, but no wider. Making this value too
18589 large results extra work for the scheduler. */
18592 ia32_multipass_dfa_lookahead (void)
18596 case PROCESSOR_PENTIUM:
18599 case PROCESSOR_PENTIUMPRO:
18609 /* Compute the alignment given to a constant that is being placed in memory.
18610 EXP is the constant and ALIGN is the alignment that the object would
18612 The value of this function is used instead of that alignment to align
18616 ix86_constant_alignment (tree exp, int align)
18618 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
18619 || TREE_CODE (exp) == INTEGER_CST)
18621 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
18623 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
18626 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
18627 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
18628 return BITS_PER_WORD;
18633 /* Compute the alignment for a static variable.
18634 TYPE is the data type, and ALIGN is the alignment that
18635 the object would ordinarily have. The value of this function is used
18636 instead of that alignment to align the object. */
18639 ix86_data_alignment (tree type, int align)
18641 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
18643 if (AGGREGATE_TYPE_P (type)
18644 && TYPE_SIZE (type)
18645 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18646 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
18647 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
18648 && align < max_align)
18651 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18652 to 16byte boundary. */
18655 if (AGGREGATE_TYPE_P (type)
18656 && TYPE_SIZE (type)
18657 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18658 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
18659 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18663 if (TREE_CODE (type) == ARRAY_TYPE)
18665 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18667 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18670 else if (TREE_CODE (type) == COMPLEX_TYPE)
18673 if (TYPE_MODE (type) == DCmode && align < 64)
18675 if ((TYPE_MODE (type) == XCmode
18676 || TYPE_MODE (type) == TCmode) && align < 128)
18679 else if ((TREE_CODE (type) == RECORD_TYPE
18680 || TREE_CODE (type) == UNION_TYPE
18681 || TREE_CODE (type) == QUAL_UNION_TYPE)
18682 && TYPE_FIELDS (type))
18684 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18686 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18689 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18690 || TREE_CODE (type) == INTEGER_TYPE)
18692 if (TYPE_MODE (type) == DFmode && align < 64)
18694 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18701 /* Compute the alignment for a local variable or a stack slot. TYPE is
18702 the data type, MODE is the widest mode available and ALIGN is the
18703 alignment that the object would ordinarily have. The value of this
18704 macro is used instead of that alignment to align the object. */
18707 ix86_local_alignment (tree type, enum machine_mode mode,
18708 unsigned int align)
18710 /* If TYPE is NULL, we are allocating a stack slot for caller-save
18711 register in MODE. We will return the largest alignment of XF
18715 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
18716 align = GET_MODE_ALIGNMENT (DFmode);
18720 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
18721 to 16byte boundary. */
18724 if (AGGREGATE_TYPE_P (type)
18725 && TYPE_SIZE (type)
18726 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
18727 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
18728 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
18731 if (TREE_CODE (type) == ARRAY_TYPE)
18733 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
18735 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
18738 else if (TREE_CODE (type) == COMPLEX_TYPE)
18740 if (TYPE_MODE (type) == DCmode && align < 64)
18742 if ((TYPE_MODE (type) == XCmode
18743 || TYPE_MODE (type) == TCmode) && align < 128)
18746 else if ((TREE_CODE (type) == RECORD_TYPE
18747 || TREE_CODE (type) == UNION_TYPE
18748 || TREE_CODE (type) == QUAL_UNION_TYPE)
18749 && TYPE_FIELDS (type))
18751 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
18753 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
18756 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
18757 || TREE_CODE (type) == INTEGER_TYPE)
18760 if (TYPE_MODE (type) == DFmode && align < 64)
18762 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
18768 /* Emit RTL insns to initialize the variable parts of a trampoline.
18769 FNADDR is an RTX for the address of the function's pure code.
18770 CXT is an RTX for the static chain value for the function. */
18772 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
18776 /* Compute offset from the end of the jmp to the target function. */
18777 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
18778 plus_constant (tramp, 10),
18779 NULL_RTX, 1, OPTAB_DIRECT);
18780 emit_move_insn (gen_rtx_MEM (QImode, tramp),
18781 gen_int_mode (0xb9, QImode));
18782 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
18783 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
18784 gen_int_mode (0xe9, QImode));
18785 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
18790 /* Try to load address using shorter movl instead of movabs.
18791 We may want to support movq for kernel mode, but kernel does not use
18792 trampolines at the moment. */
18793 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
18795 fnaddr = copy_to_mode_reg (DImode, fnaddr);
18796 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18797 gen_int_mode (0xbb41, HImode));
18798 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
18799 gen_lowpart (SImode, fnaddr));
18804 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18805 gen_int_mode (0xbb49, HImode));
18806 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18810 /* Load static chain using movabs to r10. */
18811 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18812 gen_int_mode (0xba49, HImode));
18813 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
18816 /* Jump to the r11 */
18817 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
18818 gen_int_mode (0xff49, HImode));
18819 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
18820 gen_int_mode (0xe3, QImode));
18822 gcc_assert (offset <= TRAMPOLINE_SIZE);
18825 #ifdef ENABLE_EXECUTE_STACK
18826 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
18827 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
18831 /* Codes for all the SSE/MMX builtins. */
18834 IX86_BUILTIN_ADDPS,
18835 IX86_BUILTIN_ADDSS,
18836 IX86_BUILTIN_DIVPS,
18837 IX86_BUILTIN_DIVSS,
18838 IX86_BUILTIN_MULPS,
18839 IX86_BUILTIN_MULSS,
18840 IX86_BUILTIN_SUBPS,
18841 IX86_BUILTIN_SUBSS,
18843 IX86_BUILTIN_CMPEQPS,
18844 IX86_BUILTIN_CMPLTPS,
18845 IX86_BUILTIN_CMPLEPS,
18846 IX86_BUILTIN_CMPGTPS,
18847 IX86_BUILTIN_CMPGEPS,
18848 IX86_BUILTIN_CMPNEQPS,
18849 IX86_BUILTIN_CMPNLTPS,
18850 IX86_BUILTIN_CMPNLEPS,
18851 IX86_BUILTIN_CMPNGTPS,
18852 IX86_BUILTIN_CMPNGEPS,
18853 IX86_BUILTIN_CMPORDPS,
18854 IX86_BUILTIN_CMPUNORDPS,
18855 IX86_BUILTIN_CMPEQSS,
18856 IX86_BUILTIN_CMPLTSS,
18857 IX86_BUILTIN_CMPLESS,
18858 IX86_BUILTIN_CMPNEQSS,
18859 IX86_BUILTIN_CMPNLTSS,
18860 IX86_BUILTIN_CMPNLESS,
18861 IX86_BUILTIN_CMPNGTSS,
18862 IX86_BUILTIN_CMPNGESS,
18863 IX86_BUILTIN_CMPORDSS,
18864 IX86_BUILTIN_CMPUNORDSS,
18866 IX86_BUILTIN_COMIEQSS,
18867 IX86_BUILTIN_COMILTSS,
18868 IX86_BUILTIN_COMILESS,
18869 IX86_BUILTIN_COMIGTSS,
18870 IX86_BUILTIN_COMIGESS,
18871 IX86_BUILTIN_COMINEQSS,
18872 IX86_BUILTIN_UCOMIEQSS,
18873 IX86_BUILTIN_UCOMILTSS,
18874 IX86_BUILTIN_UCOMILESS,
18875 IX86_BUILTIN_UCOMIGTSS,
18876 IX86_BUILTIN_UCOMIGESS,
18877 IX86_BUILTIN_UCOMINEQSS,
18879 IX86_BUILTIN_CVTPI2PS,
18880 IX86_BUILTIN_CVTPS2PI,
18881 IX86_BUILTIN_CVTSI2SS,
18882 IX86_BUILTIN_CVTSI642SS,
18883 IX86_BUILTIN_CVTSS2SI,
18884 IX86_BUILTIN_CVTSS2SI64,
18885 IX86_BUILTIN_CVTTPS2PI,
18886 IX86_BUILTIN_CVTTSS2SI,
18887 IX86_BUILTIN_CVTTSS2SI64,
18889 IX86_BUILTIN_MAXPS,
18890 IX86_BUILTIN_MAXSS,
18891 IX86_BUILTIN_MINPS,
18892 IX86_BUILTIN_MINSS,
18894 IX86_BUILTIN_LOADUPS,
18895 IX86_BUILTIN_STOREUPS,
18896 IX86_BUILTIN_MOVSS,
18898 IX86_BUILTIN_MOVHLPS,
18899 IX86_BUILTIN_MOVLHPS,
18900 IX86_BUILTIN_LOADHPS,
18901 IX86_BUILTIN_LOADLPS,
18902 IX86_BUILTIN_STOREHPS,
18903 IX86_BUILTIN_STORELPS,
18905 IX86_BUILTIN_MASKMOVQ,
18906 IX86_BUILTIN_MOVMSKPS,
18907 IX86_BUILTIN_PMOVMSKB,
18909 IX86_BUILTIN_MOVNTPS,
18910 IX86_BUILTIN_MOVNTQ,
18912 IX86_BUILTIN_LOADDQU,
18913 IX86_BUILTIN_STOREDQU,
18915 IX86_BUILTIN_PACKSSWB,
18916 IX86_BUILTIN_PACKSSDW,
18917 IX86_BUILTIN_PACKUSWB,
18919 IX86_BUILTIN_PADDB,
18920 IX86_BUILTIN_PADDW,
18921 IX86_BUILTIN_PADDD,
18922 IX86_BUILTIN_PADDQ,
18923 IX86_BUILTIN_PADDSB,
18924 IX86_BUILTIN_PADDSW,
18925 IX86_BUILTIN_PADDUSB,
18926 IX86_BUILTIN_PADDUSW,
18927 IX86_BUILTIN_PSUBB,
18928 IX86_BUILTIN_PSUBW,
18929 IX86_BUILTIN_PSUBD,
18930 IX86_BUILTIN_PSUBQ,
18931 IX86_BUILTIN_PSUBSB,
18932 IX86_BUILTIN_PSUBSW,
18933 IX86_BUILTIN_PSUBUSB,
18934 IX86_BUILTIN_PSUBUSW,
18937 IX86_BUILTIN_PANDN,
18941 IX86_BUILTIN_PAVGB,
18942 IX86_BUILTIN_PAVGW,
18944 IX86_BUILTIN_PCMPEQB,
18945 IX86_BUILTIN_PCMPEQW,
18946 IX86_BUILTIN_PCMPEQD,
18947 IX86_BUILTIN_PCMPGTB,
18948 IX86_BUILTIN_PCMPGTW,
18949 IX86_BUILTIN_PCMPGTD,
18951 IX86_BUILTIN_PMADDWD,
18953 IX86_BUILTIN_PMAXSW,
18954 IX86_BUILTIN_PMAXUB,
18955 IX86_BUILTIN_PMINSW,
18956 IX86_BUILTIN_PMINUB,
18958 IX86_BUILTIN_PMULHUW,
18959 IX86_BUILTIN_PMULHW,
18960 IX86_BUILTIN_PMULLW,
18962 IX86_BUILTIN_PSADBW,
18963 IX86_BUILTIN_PSHUFW,
18965 IX86_BUILTIN_PSLLW,
18966 IX86_BUILTIN_PSLLD,
18967 IX86_BUILTIN_PSLLQ,
18968 IX86_BUILTIN_PSRAW,
18969 IX86_BUILTIN_PSRAD,
18970 IX86_BUILTIN_PSRLW,
18971 IX86_BUILTIN_PSRLD,
18972 IX86_BUILTIN_PSRLQ,
18973 IX86_BUILTIN_PSLLWI,
18974 IX86_BUILTIN_PSLLDI,
18975 IX86_BUILTIN_PSLLQI,
18976 IX86_BUILTIN_PSRAWI,
18977 IX86_BUILTIN_PSRADI,
18978 IX86_BUILTIN_PSRLWI,
18979 IX86_BUILTIN_PSRLDI,
18980 IX86_BUILTIN_PSRLQI,
18982 IX86_BUILTIN_PUNPCKHBW,
18983 IX86_BUILTIN_PUNPCKHWD,
18984 IX86_BUILTIN_PUNPCKHDQ,
18985 IX86_BUILTIN_PUNPCKLBW,
18986 IX86_BUILTIN_PUNPCKLWD,
18987 IX86_BUILTIN_PUNPCKLDQ,
18989 IX86_BUILTIN_SHUFPS,
18991 IX86_BUILTIN_RCPPS,
18992 IX86_BUILTIN_RCPSS,
18993 IX86_BUILTIN_RSQRTPS,
18994 IX86_BUILTIN_RSQRTPS_NR,
18995 IX86_BUILTIN_RSQRTSS,
18996 IX86_BUILTIN_RSQRTF,
18997 IX86_BUILTIN_SQRTPS,
18998 IX86_BUILTIN_SQRTPS_NR,
18999 IX86_BUILTIN_SQRTSS,
19001 IX86_BUILTIN_UNPCKHPS,
19002 IX86_BUILTIN_UNPCKLPS,
19004 IX86_BUILTIN_ANDPS,
19005 IX86_BUILTIN_ANDNPS,
19007 IX86_BUILTIN_XORPS,
19010 IX86_BUILTIN_LDMXCSR,
19011 IX86_BUILTIN_STMXCSR,
19012 IX86_BUILTIN_SFENCE,
19014 /* 3DNow! Original */
19015 IX86_BUILTIN_FEMMS,
19016 IX86_BUILTIN_PAVGUSB,
19017 IX86_BUILTIN_PF2ID,
19018 IX86_BUILTIN_PFACC,
19019 IX86_BUILTIN_PFADD,
19020 IX86_BUILTIN_PFCMPEQ,
19021 IX86_BUILTIN_PFCMPGE,
19022 IX86_BUILTIN_PFCMPGT,
19023 IX86_BUILTIN_PFMAX,
19024 IX86_BUILTIN_PFMIN,
19025 IX86_BUILTIN_PFMUL,
19026 IX86_BUILTIN_PFRCP,
19027 IX86_BUILTIN_PFRCPIT1,
19028 IX86_BUILTIN_PFRCPIT2,
19029 IX86_BUILTIN_PFRSQIT1,
19030 IX86_BUILTIN_PFRSQRT,
19031 IX86_BUILTIN_PFSUB,
19032 IX86_BUILTIN_PFSUBR,
19033 IX86_BUILTIN_PI2FD,
19034 IX86_BUILTIN_PMULHRW,
19036 /* 3DNow! Athlon Extensions */
19037 IX86_BUILTIN_PF2IW,
19038 IX86_BUILTIN_PFNACC,
19039 IX86_BUILTIN_PFPNACC,
19040 IX86_BUILTIN_PI2FW,
19041 IX86_BUILTIN_PSWAPDSI,
19042 IX86_BUILTIN_PSWAPDSF,
19045 IX86_BUILTIN_ADDPD,
19046 IX86_BUILTIN_ADDSD,
19047 IX86_BUILTIN_DIVPD,
19048 IX86_BUILTIN_DIVSD,
19049 IX86_BUILTIN_MULPD,
19050 IX86_BUILTIN_MULSD,
19051 IX86_BUILTIN_SUBPD,
19052 IX86_BUILTIN_SUBSD,
19054 IX86_BUILTIN_CMPEQPD,
19055 IX86_BUILTIN_CMPLTPD,
19056 IX86_BUILTIN_CMPLEPD,
19057 IX86_BUILTIN_CMPGTPD,
19058 IX86_BUILTIN_CMPGEPD,
19059 IX86_BUILTIN_CMPNEQPD,
19060 IX86_BUILTIN_CMPNLTPD,
19061 IX86_BUILTIN_CMPNLEPD,
19062 IX86_BUILTIN_CMPNGTPD,
19063 IX86_BUILTIN_CMPNGEPD,
19064 IX86_BUILTIN_CMPORDPD,
19065 IX86_BUILTIN_CMPUNORDPD,
19066 IX86_BUILTIN_CMPEQSD,
19067 IX86_BUILTIN_CMPLTSD,
19068 IX86_BUILTIN_CMPLESD,
19069 IX86_BUILTIN_CMPNEQSD,
19070 IX86_BUILTIN_CMPNLTSD,
19071 IX86_BUILTIN_CMPNLESD,
19072 IX86_BUILTIN_CMPORDSD,
19073 IX86_BUILTIN_CMPUNORDSD,
19075 IX86_BUILTIN_COMIEQSD,
19076 IX86_BUILTIN_COMILTSD,
19077 IX86_BUILTIN_COMILESD,
19078 IX86_BUILTIN_COMIGTSD,
19079 IX86_BUILTIN_COMIGESD,
19080 IX86_BUILTIN_COMINEQSD,
19081 IX86_BUILTIN_UCOMIEQSD,
19082 IX86_BUILTIN_UCOMILTSD,
19083 IX86_BUILTIN_UCOMILESD,
19084 IX86_BUILTIN_UCOMIGTSD,
19085 IX86_BUILTIN_UCOMIGESD,
19086 IX86_BUILTIN_UCOMINEQSD,
19088 IX86_BUILTIN_MAXPD,
19089 IX86_BUILTIN_MAXSD,
19090 IX86_BUILTIN_MINPD,
19091 IX86_BUILTIN_MINSD,
19093 IX86_BUILTIN_ANDPD,
19094 IX86_BUILTIN_ANDNPD,
19096 IX86_BUILTIN_XORPD,
19098 IX86_BUILTIN_SQRTPD,
19099 IX86_BUILTIN_SQRTSD,
19101 IX86_BUILTIN_UNPCKHPD,
19102 IX86_BUILTIN_UNPCKLPD,
19104 IX86_BUILTIN_SHUFPD,
19106 IX86_BUILTIN_LOADUPD,
19107 IX86_BUILTIN_STOREUPD,
19108 IX86_BUILTIN_MOVSD,
19110 IX86_BUILTIN_LOADHPD,
19111 IX86_BUILTIN_LOADLPD,
19113 IX86_BUILTIN_CVTDQ2PD,
19114 IX86_BUILTIN_CVTDQ2PS,
19116 IX86_BUILTIN_CVTPD2DQ,
19117 IX86_BUILTIN_CVTPD2PI,
19118 IX86_BUILTIN_CVTPD2PS,
19119 IX86_BUILTIN_CVTTPD2DQ,
19120 IX86_BUILTIN_CVTTPD2PI,
19122 IX86_BUILTIN_CVTPI2PD,
19123 IX86_BUILTIN_CVTSI2SD,
19124 IX86_BUILTIN_CVTSI642SD,
19126 IX86_BUILTIN_CVTSD2SI,
19127 IX86_BUILTIN_CVTSD2SI64,
19128 IX86_BUILTIN_CVTSD2SS,
19129 IX86_BUILTIN_CVTSS2SD,
19130 IX86_BUILTIN_CVTTSD2SI,
19131 IX86_BUILTIN_CVTTSD2SI64,
19133 IX86_BUILTIN_CVTPS2DQ,
19134 IX86_BUILTIN_CVTPS2PD,
19135 IX86_BUILTIN_CVTTPS2DQ,
19137 IX86_BUILTIN_MOVNTI,
19138 IX86_BUILTIN_MOVNTPD,
19139 IX86_BUILTIN_MOVNTDQ,
19141 IX86_BUILTIN_MOVQ128,
19144 IX86_BUILTIN_MASKMOVDQU,
19145 IX86_BUILTIN_MOVMSKPD,
19146 IX86_BUILTIN_PMOVMSKB128,
19148 IX86_BUILTIN_PACKSSWB128,
19149 IX86_BUILTIN_PACKSSDW128,
19150 IX86_BUILTIN_PACKUSWB128,
19152 IX86_BUILTIN_PADDB128,
19153 IX86_BUILTIN_PADDW128,
19154 IX86_BUILTIN_PADDD128,
19155 IX86_BUILTIN_PADDQ128,
19156 IX86_BUILTIN_PADDSB128,
19157 IX86_BUILTIN_PADDSW128,
19158 IX86_BUILTIN_PADDUSB128,
19159 IX86_BUILTIN_PADDUSW128,
19160 IX86_BUILTIN_PSUBB128,
19161 IX86_BUILTIN_PSUBW128,
19162 IX86_BUILTIN_PSUBD128,
19163 IX86_BUILTIN_PSUBQ128,
19164 IX86_BUILTIN_PSUBSB128,
19165 IX86_BUILTIN_PSUBSW128,
19166 IX86_BUILTIN_PSUBUSB128,
19167 IX86_BUILTIN_PSUBUSW128,
19169 IX86_BUILTIN_PAND128,
19170 IX86_BUILTIN_PANDN128,
19171 IX86_BUILTIN_POR128,
19172 IX86_BUILTIN_PXOR128,
19174 IX86_BUILTIN_PAVGB128,
19175 IX86_BUILTIN_PAVGW128,
19177 IX86_BUILTIN_PCMPEQB128,
19178 IX86_BUILTIN_PCMPEQW128,
19179 IX86_BUILTIN_PCMPEQD128,
19180 IX86_BUILTIN_PCMPGTB128,
19181 IX86_BUILTIN_PCMPGTW128,
19182 IX86_BUILTIN_PCMPGTD128,
19184 IX86_BUILTIN_PMADDWD128,
19186 IX86_BUILTIN_PMAXSW128,
19187 IX86_BUILTIN_PMAXUB128,
19188 IX86_BUILTIN_PMINSW128,
19189 IX86_BUILTIN_PMINUB128,
19191 IX86_BUILTIN_PMULUDQ,
19192 IX86_BUILTIN_PMULUDQ128,
19193 IX86_BUILTIN_PMULHUW128,
19194 IX86_BUILTIN_PMULHW128,
19195 IX86_BUILTIN_PMULLW128,
19197 IX86_BUILTIN_PSADBW128,
19198 IX86_BUILTIN_PSHUFHW,
19199 IX86_BUILTIN_PSHUFLW,
19200 IX86_BUILTIN_PSHUFD,
19202 IX86_BUILTIN_PSLLDQI128,
19203 IX86_BUILTIN_PSLLWI128,
19204 IX86_BUILTIN_PSLLDI128,
19205 IX86_BUILTIN_PSLLQI128,
19206 IX86_BUILTIN_PSRAWI128,
19207 IX86_BUILTIN_PSRADI128,
19208 IX86_BUILTIN_PSRLDQI128,
19209 IX86_BUILTIN_PSRLWI128,
19210 IX86_BUILTIN_PSRLDI128,
19211 IX86_BUILTIN_PSRLQI128,
19213 IX86_BUILTIN_PSLLDQ128,
19214 IX86_BUILTIN_PSLLW128,
19215 IX86_BUILTIN_PSLLD128,
19216 IX86_BUILTIN_PSLLQ128,
19217 IX86_BUILTIN_PSRAW128,
19218 IX86_BUILTIN_PSRAD128,
19219 IX86_BUILTIN_PSRLW128,
19220 IX86_BUILTIN_PSRLD128,
19221 IX86_BUILTIN_PSRLQ128,
19223 IX86_BUILTIN_PUNPCKHBW128,
19224 IX86_BUILTIN_PUNPCKHWD128,
19225 IX86_BUILTIN_PUNPCKHDQ128,
19226 IX86_BUILTIN_PUNPCKHQDQ128,
19227 IX86_BUILTIN_PUNPCKLBW128,
19228 IX86_BUILTIN_PUNPCKLWD128,
19229 IX86_BUILTIN_PUNPCKLDQ128,
19230 IX86_BUILTIN_PUNPCKLQDQ128,
19232 IX86_BUILTIN_CLFLUSH,
19233 IX86_BUILTIN_MFENCE,
19234 IX86_BUILTIN_LFENCE,
19237 IX86_BUILTIN_ADDSUBPS,
19238 IX86_BUILTIN_HADDPS,
19239 IX86_BUILTIN_HSUBPS,
19240 IX86_BUILTIN_MOVSHDUP,
19241 IX86_BUILTIN_MOVSLDUP,
19242 IX86_BUILTIN_ADDSUBPD,
19243 IX86_BUILTIN_HADDPD,
19244 IX86_BUILTIN_HSUBPD,
19245 IX86_BUILTIN_LDDQU,
19247 IX86_BUILTIN_MONITOR,
19248 IX86_BUILTIN_MWAIT,
19251 IX86_BUILTIN_PHADDW,
19252 IX86_BUILTIN_PHADDD,
19253 IX86_BUILTIN_PHADDSW,
19254 IX86_BUILTIN_PHSUBW,
19255 IX86_BUILTIN_PHSUBD,
19256 IX86_BUILTIN_PHSUBSW,
19257 IX86_BUILTIN_PMADDUBSW,
19258 IX86_BUILTIN_PMULHRSW,
19259 IX86_BUILTIN_PSHUFB,
19260 IX86_BUILTIN_PSIGNB,
19261 IX86_BUILTIN_PSIGNW,
19262 IX86_BUILTIN_PSIGND,
19263 IX86_BUILTIN_PALIGNR,
19264 IX86_BUILTIN_PABSB,
19265 IX86_BUILTIN_PABSW,
19266 IX86_BUILTIN_PABSD,
19268 IX86_BUILTIN_PHADDW128,
19269 IX86_BUILTIN_PHADDD128,
19270 IX86_BUILTIN_PHADDSW128,
19271 IX86_BUILTIN_PHSUBW128,
19272 IX86_BUILTIN_PHSUBD128,
19273 IX86_BUILTIN_PHSUBSW128,
19274 IX86_BUILTIN_PMADDUBSW128,
19275 IX86_BUILTIN_PMULHRSW128,
19276 IX86_BUILTIN_PSHUFB128,
19277 IX86_BUILTIN_PSIGNB128,
19278 IX86_BUILTIN_PSIGNW128,
19279 IX86_BUILTIN_PSIGND128,
19280 IX86_BUILTIN_PALIGNR128,
19281 IX86_BUILTIN_PABSB128,
19282 IX86_BUILTIN_PABSW128,
19283 IX86_BUILTIN_PABSD128,
19285 /* AMDFAM10 - SSE4A New Instructions. */
19286 IX86_BUILTIN_MOVNTSD,
19287 IX86_BUILTIN_MOVNTSS,
19288 IX86_BUILTIN_EXTRQI,
19289 IX86_BUILTIN_EXTRQ,
19290 IX86_BUILTIN_INSERTQI,
19291 IX86_BUILTIN_INSERTQ,
19294 IX86_BUILTIN_BLENDPD,
19295 IX86_BUILTIN_BLENDPS,
19296 IX86_BUILTIN_BLENDVPD,
19297 IX86_BUILTIN_BLENDVPS,
19298 IX86_BUILTIN_PBLENDVB128,
19299 IX86_BUILTIN_PBLENDW128,
19304 IX86_BUILTIN_INSERTPS128,
19306 IX86_BUILTIN_MOVNTDQA,
19307 IX86_BUILTIN_MPSADBW128,
19308 IX86_BUILTIN_PACKUSDW128,
19309 IX86_BUILTIN_PCMPEQQ,
19310 IX86_BUILTIN_PHMINPOSUW128,
19312 IX86_BUILTIN_PMAXSB128,
19313 IX86_BUILTIN_PMAXSD128,
19314 IX86_BUILTIN_PMAXUD128,
19315 IX86_BUILTIN_PMAXUW128,
19317 IX86_BUILTIN_PMINSB128,
19318 IX86_BUILTIN_PMINSD128,
19319 IX86_BUILTIN_PMINUD128,
19320 IX86_BUILTIN_PMINUW128,
19322 IX86_BUILTIN_PMOVSXBW128,
19323 IX86_BUILTIN_PMOVSXBD128,
19324 IX86_BUILTIN_PMOVSXBQ128,
19325 IX86_BUILTIN_PMOVSXWD128,
19326 IX86_BUILTIN_PMOVSXWQ128,
19327 IX86_BUILTIN_PMOVSXDQ128,
19329 IX86_BUILTIN_PMOVZXBW128,
19330 IX86_BUILTIN_PMOVZXBD128,
19331 IX86_BUILTIN_PMOVZXBQ128,
19332 IX86_BUILTIN_PMOVZXWD128,
19333 IX86_BUILTIN_PMOVZXWQ128,
19334 IX86_BUILTIN_PMOVZXDQ128,
19336 IX86_BUILTIN_PMULDQ128,
19337 IX86_BUILTIN_PMULLD128,
19339 IX86_BUILTIN_ROUNDPD,
19340 IX86_BUILTIN_ROUNDPS,
19341 IX86_BUILTIN_ROUNDSD,
19342 IX86_BUILTIN_ROUNDSS,
19344 IX86_BUILTIN_PTESTZ,
19345 IX86_BUILTIN_PTESTC,
19346 IX86_BUILTIN_PTESTNZC,
19348 IX86_BUILTIN_VEC_INIT_V2SI,
19349 IX86_BUILTIN_VEC_INIT_V4HI,
19350 IX86_BUILTIN_VEC_INIT_V8QI,
19351 IX86_BUILTIN_VEC_EXT_V2DF,
19352 IX86_BUILTIN_VEC_EXT_V2DI,
19353 IX86_BUILTIN_VEC_EXT_V4SF,
19354 IX86_BUILTIN_VEC_EXT_V4SI,
19355 IX86_BUILTIN_VEC_EXT_V8HI,
19356 IX86_BUILTIN_VEC_EXT_V2SI,
19357 IX86_BUILTIN_VEC_EXT_V4HI,
19358 IX86_BUILTIN_VEC_EXT_V16QI,
19359 IX86_BUILTIN_VEC_SET_V2DI,
19360 IX86_BUILTIN_VEC_SET_V4SF,
19361 IX86_BUILTIN_VEC_SET_V4SI,
19362 IX86_BUILTIN_VEC_SET_V8HI,
19363 IX86_BUILTIN_VEC_SET_V4HI,
19364 IX86_BUILTIN_VEC_SET_V16QI,
19366 IX86_BUILTIN_VEC_PACK_SFIX,
19369 IX86_BUILTIN_CRC32QI,
19370 IX86_BUILTIN_CRC32HI,
19371 IX86_BUILTIN_CRC32SI,
19372 IX86_BUILTIN_CRC32DI,
19374 IX86_BUILTIN_PCMPESTRI128,
19375 IX86_BUILTIN_PCMPESTRM128,
19376 IX86_BUILTIN_PCMPESTRA128,
19377 IX86_BUILTIN_PCMPESTRC128,
19378 IX86_BUILTIN_PCMPESTRO128,
19379 IX86_BUILTIN_PCMPESTRS128,
19380 IX86_BUILTIN_PCMPESTRZ128,
19381 IX86_BUILTIN_PCMPISTRI128,
19382 IX86_BUILTIN_PCMPISTRM128,
19383 IX86_BUILTIN_PCMPISTRA128,
19384 IX86_BUILTIN_PCMPISTRC128,
19385 IX86_BUILTIN_PCMPISTRO128,
19386 IX86_BUILTIN_PCMPISTRS128,
19387 IX86_BUILTIN_PCMPISTRZ128,
19389 IX86_BUILTIN_PCMPGTQ,
19391 /* AES instructions */
19392 IX86_BUILTIN_AESENC128,
19393 IX86_BUILTIN_AESENCLAST128,
19394 IX86_BUILTIN_AESDEC128,
19395 IX86_BUILTIN_AESDECLAST128,
19396 IX86_BUILTIN_AESIMC128,
19397 IX86_BUILTIN_AESKEYGENASSIST128,
19399 /* PCLMUL instruction */
19400 IX86_BUILTIN_PCLMULQDQ128,
19403 IX86_BUILTIN_ADDPD256,
19404 IX86_BUILTIN_ADDPS256,
19405 IX86_BUILTIN_ADDSUBPD256,
19406 IX86_BUILTIN_ADDSUBPS256,
19407 IX86_BUILTIN_ANDPD256,
19408 IX86_BUILTIN_ANDPS256,
19409 IX86_BUILTIN_ANDNPD256,
19410 IX86_BUILTIN_ANDNPS256,
19411 IX86_BUILTIN_BLENDPD256,
19412 IX86_BUILTIN_BLENDPS256,
19413 IX86_BUILTIN_BLENDVPD256,
19414 IX86_BUILTIN_BLENDVPS256,
19415 IX86_BUILTIN_DIVPD256,
19416 IX86_BUILTIN_DIVPS256,
19417 IX86_BUILTIN_DPPS256,
19418 IX86_BUILTIN_HADDPD256,
19419 IX86_BUILTIN_HADDPS256,
19420 IX86_BUILTIN_HSUBPD256,
19421 IX86_BUILTIN_HSUBPS256,
19422 IX86_BUILTIN_MAXPD256,
19423 IX86_BUILTIN_MAXPS256,
19424 IX86_BUILTIN_MINPD256,
19425 IX86_BUILTIN_MINPS256,
19426 IX86_BUILTIN_MULPD256,
19427 IX86_BUILTIN_MULPS256,
19428 IX86_BUILTIN_ORPD256,
19429 IX86_BUILTIN_ORPS256,
19430 IX86_BUILTIN_SHUFPD256,
19431 IX86_BUILTIN_SHUFPS256,
19432 IX86_BUILTIN_SUBPD256,
19433 IX86_BUILTIN_SUBPS256,
19434 IX86_BUILTIN_XORPD256,
19435 IX86_BUILTIN_XORPS256,
19436 IX86_BUILTIN_CMPSD,
19437 IX86_BUILTIN_CMPSS,
19438 IX86_BUILTIN_CMPPD,
19439 IX86_BUILTIN_CMPPS,
19440 IX86_BUILTIN_CMPPD256,
19441 IX86_BUILTIN_CMPPS256,
19442 IX86_BUILTIN_CVTDQ2PD256,
19443 IX86_BUILTIN_CVTDQ2PS256,
19444 IX86_BUILTIN_CVTPD2PS256,
19445 IX86_BUILTIN_CVTPS2DQ256,
19446 IX86_BUILTIN_CVTPS2PD256,
19447 IX86_BUILTIN_CVTTPD2DQ256,
19448 IX86_BUILTIN_CVTPD2DQ256,
19449 IX86_BUILTIN_CVTTPS2DQ256,
19450 IX86_BUILTIN_EXTRACTF128PD256,
19451 IX86_BUILTIN_EXTRACTF128PS256,
19452 IX86_BUILTIN_EXTRACTF128SI256,
19453 IX86_BUILTIN_VZEROALL,
19454 IX86_BUILTIN_VZEROUPPER,
19455 IX86_BUILTIN_VZEROUPPER_REX64,
19456 IX86_BUILTIN_VPERMILVARPD,
19457 IX86_BUILTIN_VPERMILVARPS,
19458 IX86_BUILTIN_VPERMILVARPD256,
19459 IX86_BUILTIN_VPERMILVARPS256,
19460 IX86_BUILTIN_VPERMILPD,
19461 IX86_BUILTIN_VPERMILPS,
19462 IX86_BUILTIN_VPERMILPD256,
19463 IX86_BUILTIN_VPERMILPS256,
19464 IX86_BUILTIN_VPERMIL2PD,
19465 IX86_BUILTIN_VPERMIL2PS,
19466 IX86_BUILTIN_VPERMIL2PD256,
19467 IX86_BUILTIN_VPERMIL2PS256,
19468 IX86_BUILTIN_VPERM2F128PD256,
19469 IX86_BUILTIN_VPERM2F128PS256,
19470 IX86_BUILTIN_VPERM2F128SI256,
19471 IX86_BUILTIN_VBROADCASTSS,
19472 IX86_BUILTIN_VBROADCASTSD256,
19473 IX86_BUILTIN_VBROADCASTSS256,
19474 IX86_BUILTIN_VBROADCASTPD256,
19475 IX86_BUILTIN_VBROADCASTPS256,
19476 IX86_BUILTIN_VINSERTF128PD256,
19477 IX86_BUILTIN_VINSERTF128PS256,
19478 IX86_BUILTIN_VINSERTF128SI256,
19479 IX86_BUILTIN_LOADUPD256,
19480 IX86_BUILTIN_LOADUPS256,
19481 IX86_BUILTIN_STOREUPD256,
19482 IX86_BUILTIN_STOREUPS256,
19483 IX86_BUILTIN_LDDQU256,
19484 IX86_BUILTIN_LOADDQU256,
19485 IX86_BUILTIN_STOREDQU256,
19486 IX86_BUILTIN_MASKLOADPD,
19487 IX86_BUILTIN_MASKLOADPS,
19488 IX86_BUILTIN_MASKSTOREPD,
19489 IX86_BUILTIN_MASKSTOREPS,
19490 IX86_BUILTIN_MASKLOADPD256,
19491 IX86_BUILTIN_MASKLOADPS256,
19492 IX86_BUILTIN_MASKSTOREPD256,
19493 IX86_BUILTIN_MASKSTOREPS256,
19494 IX86_BUILTIN_MOVSHDUP256,
19495 IX86_BUILTIN_MOVSLDUP256,
19496 IX86_BUILTIN_MOVDDUP256,
19498 IX86_BUILTIN_SQRTPD256,
19499 IX86_BUILTIN_SQRTPS256,
19500 IX86_BUILTIN_SQRTPS_NR256,
19501 IX86_BUILTIN_RSQRTPS256,
19502 IX86_BUILTIN_RSQRTPS_NR256,
19504 IX86_BUILTIN_RCPPS256,
19506 IX86_BUILTIN_ROUNDPD256,
19507 IX86_BUILTIN_ROUNDPS256,
19509 IX86_BUILTIN_UNPCKHPD256,
19510 IX86_BUILTIN_UNPCKLPD256,
19511 IX86_BUILTIN_UNPCKHPS256,
19512 IX86_BUILTIN_UNPCKLPS256,
19514 IX86_BUILTIN_SI256_SI,
19515 IX86_BUILTIN_PS256_PS,
19516 IX86_BUILTIN_PD256_PD,
19517 IX86_BUILTIN_SI_SI256,
19518 IX86_BUILTIN_PS_PS256,
19519 IX86_BUILTIN_PD_PD256,
19521 IX86_BUILTIN_VTESTZPD,
19522 IX86_BUILTIN_VTESTCPD,
19523 IX86_BUILTIN_VTESTNZCPD,
19524 IX86_BUILTIN_VTESTZPS,
19525 IX86_BUILTIN_VTESTCPS,
19526 IX86_BUILTIN_VTESTNZCPS,
19527 IX86_BUILTIN_VTESTZPD256,
19528 IX86_BUILTIN_VTESTCPD256,
19529 IX86_BUILTIN_VTESTNZCPD256,
19530 IX86_BUILTIN_VTESTZPS256,
19531 IX86_BUILTIN_VTESTCPS256,
19532 IX86_BUILTIN_VTESTNZCPS256,
19533 IX86_BUILTIN_PTESTZ256,
19534 IX86_BUILTIN_PTESTC256,
19535 IX86_BUILTIN_PTESTNZC256,
19537 IX86_BUILTIN_MOVMSKPD256,
19538 IX86_BUILTIN_MOVMSKPS256,
19540 /* TFmode support builtins. */
19542 IX86_BUILTIN_FABSQ,
19543 IX86_BUILTIN_COPYSIGNQ,
19545 /* SSE5 instructions */
19546 IX86_BUILTIN_FMADDSS,
19547 IX86_BUILTIN_FMADDSD,
19548 IX86_BUILTIN_FMADDPS,
19549 IX86_BUILTIN_FMADDPD,
19550 IX86_BUILTIN_FMSUBSS,
19551 IX86_BUILTIN_FMSUBSD,
19552 IX86_BUILTIN_FMSUBPS,
19553 IX86_BUILTIN_FMSUBPD,
19554 IX86_BUILTIN_FNMADDSS,
19555 IX86_BUILTIN_FNMADDSD,
19556 IX86_BUILTIN_FNMADDPS,
19557 IX86_BUILTIN_FNMADDPD,
19558 IX86_BUILTIN_FNMSUBSS,
19559 IX86_BUILTIN_FNMSUBSD,
19560 IX86_BUILTIN_FNMSUBPS,
19561 IX86_BUILTIN_FNMSUBPD,
19562 IX86_BUILTIN_PCMOV,
19563 IX86_BUILTIN_PCMOV_V2DI,
19564 IX86_BUILTIN_PCMOV_V4SI,
19565 IX86_BUILTIN_PCMOV_V8HI,
19566 IX86_BUILTIN_PCMOV_V16QI,
19567 IX86_BUILTIN_PCMOV_V4SF,
19568 IX86_BUILTIN_PCMOV_V2DF,
19569 IX86_BUILTIN_PPERM,
19570 IX86_BUILTIN_PERMPS,
19571 IX86_BUILTIN_PERMPD,
19572 IX86_BUILTIN_PMACSSWW,
19573 IX86_BUILTIN_PMACSWW,
19574 IX86_BUILTIN_PMACSSWD,
19575 IX86_BUILTIN_PMACSWD,
19576 IX86_BUILTIN_PMACSSDD,
19577 IX86_BUILTIN_PMACSDD,
19578 IX86_BUILTIN_PMACSSDQL,
19579 IX86_BUILTIN_PMACSSDQH,
19580 IX86_BUILTIN_PMACSDQL,
19581 IX86_BUILTIN_PMACSDQH,
19582 IX86_BUILTIN_PMADCSSWD,
19583 IX86_BUILTIN_PMADCSWD,
19584 IX86_BUILTIN_PHADDBW,
19585 IX86_BUILTIN_PHADDBD,
19586 IX86_BUILTIN_PHADDBQ,
19587 IX86_BUILTIN_PHADDWD,
19588 IX86_BUILTIN_PHADDWQ,
19589 IX86_BUILTIN_PHADDDQ,
19590 IX86_BUILTIN_PHADDUBW,
19591 IX86_BUILTIN_PHADDUBD,
19592 IX86_BUILTIN_PHADDUBQ,
19593 IX86_BUILTIN_PHADDUWD,
19594 IX86_BUILTIN_PHADDUWQ,
19595 IX86_BUILTIN_PHADDUDQ,
19596 IX86_BUILTIN_PHSUBBW,
19597 IX86_BUILTIN_PHSUBWD,
19598 IX86_BUILTIN_PHSUBDQ,
19599 IX86_BUILTIN_PROTB,
19600 IX86_BUILTIN_PROTW,
19601 IX86_BUILTIN_PROTD,
19602 IX86_BUILTIN_PROTQ,
19603 IX86_BUILTIN_PROTB_IMM,
19604 IX86_BUILTIN_PROTW_IMM,
19605 IX86_BUILTIN_PROTD_IMM,
19606 IX86_BUILTIN_PROTQ_IMM,
19607 IX86_BUILTIN_PSHLB,
19608 IX86_BUILTIN_PSHLW,
19609 IX86_BUILTIN_PSHLD,
19610 IX86_BUILTIN_PSHLQ,
19611 IX86_BUILTIN_PSHAB,
19612 IX86_BUILTIN_PSHAW,
19613 IX86_BUILTIN_PSHAD,
19614 IX86_BUILTIN_PSHAQ,
19615 IX86_BUILTIN_FRCZSS,
19616 IX86_BUILTIN_FRCZSD,
19617 IX86_BUILTIN_FRCZPS,
19618 IX86_BUILTIN_FRCZPD,
19619 IX86_BUILTIN_CVTPH2PS,
19620 IX86_BUILTIN_CVTPS2PH,
19622 IX86_BUILTIN_COMEQSS,
19623 IX86_BUILTIN_COMNESS,
19624 IX86_BUILTIN_COMLTSS,
19625 IX86_BUILTIN_COMLESS,
19626 IX86_BUILTIN_COMGTSS,
19627 IX86_BUILTIN_COMGESS,
19628 IX86_BUILTIN_COMUEQSS,
19629 IX86_BUILTIN_COMUNESS,
19630 IX86_BUILTIN_COMULTSS,
19631 IX86_BUILTIN_COMULESS,
19632 IX86_BUILTIN_COMUGTSS,
19633 IX86_BUILTIN_COMUGESS,
19634 IX86_BUILTIN_COMORDSS,
19635 IX86_BUILTIN_COMUNORDSS,
19636 IX86_BUILTIN_COMFALSESS,
19637 IX86_BUILTIN_COMTRUESS,
19639 IX86_BUILTIN_COMEQSD,
19640 IX86_BUILTIN_COMNESD,
19641 IX86_BUILTIN_COMLTSD,
19642 IX86_BUILTIN_COMLESD,
19643 IX86_BUILTIN_COMGTSD,
19644 IX86_BUILTIN_COMGESD,
19645 IX86_BUILTIN_COMUEQSD,
19646 IX86_BUILTIN_COMUNESD,
19647 IX86_BUILTIN_COMULTSD,
19648 IX86_BUILTIN_COMULESD,
19649 IX86_BUILTIN_COMUGTSD,
19650 IX86_BUILTIN_COMUGESD,
19651 IX86_BUILTIN_COMORDSD,
19652 IX86_BUILTIN_COMUNORDSD,
19653 IX86_BUILTIN_COMFALSESD,
19654 IX86_BUILTIN_COMTRUESD,
19656 IX86_BUILTIN_COMEQPS,
19657 IX86_BUILTIN_COMNEPS,
19658 IX86_BUILTIN_COMLTPS,
19659 IX86_BUILTIN_COMLEPS,
19660 IX86_BUILTIN_COMGTPS,
19661 IX86_BUILTIN_COMGEPS,
19662 IX86_BUILTIN_COMUEQPS,
19663 IX86_BUILTIN_COMUNEPS,
19664 IX86_BUILTIN_COMULTPS,
19665 IX86_BUILTIN_COMULEPS,
19666 IX86_BUILTIN_COMUGTPS,
19667 IX86_BUILTIN_COMUGEPS,
19668 IX86_BUILTIN_COMORDPS,
19669 IX86_BUILTIN_COMUNORDPS,
19670 IX86_BUILTIN_COMFALSEPS,
19671 IX86_BUILTIN_COMTRUEPS,
19673 IX86_BUILTIN_COMEQPD,
19674 IX86_BUILTIN_COMNEPD,
19675 IX86_BUILTIN_COMLTPD,
19676 IX86_BUILTIN_COMLEPD,
19677 IX86_BUILTIN_COMGTPD,
19678 IX86_BUILTIN_COMGEPD,
19679 IX86_BUILTIN_COMUEQPD,
19680 IX86_BUILTIN_COMUNEPD,
19681 IX86_BUILTIN_COMULTPD,
19682 IX86_BUILTIN_COMULEPD,
19683 IX86_BUILTIN_COMUGTPD,
19684 IX86_BUILTIN_COMUGEPD,
19685 IX86_BUILTIN_COMORDPD,
19686 IX86_BUILTIN_COMUNORDPD,
19687 IX86_BUILTIN_COMFALSEPD,
19688 IX86_BUILTIN_COMTRUEPD,
19690 IX86_BUILTIN_PCOMEQUB,
19691 IX86_BUILTIN_PCOMNEUB,
19692 IX86_BUILTIN_PCOMLTUB,
19693 IX86_BUILTIN_PCOMLEUB,
19694 IX86_BUILTIN_PCOMGTUB,
19695 IX86_BUILTIN_PCOMGEUB,
19696 IX86_BUILTIN_PCOMFALSEUB,
19697 IX86_BUILTIN_PCOMTRUEUB,
19698 IX86_BUILTIN_PCOMEQUW,
19699 IX86_BUILTIN_PCOMNEUW,
19700 IX86_BUILTIN_PCOMLTUW,
19701 IX86_BUILTIN_PCOMLEUW,
19702 IX86_BUILTIN_PCOMGTUW,
19703 IX86_BUILTIN_PCOMGEUW,
19704 IX86_BUILTIN_PCOMFALSEUW,
19705 IX86_BUILTIN_PCOMTRUEUW,
19706 IX86_BUILTIN_PCOMEQUD,
19707 IX86_BUILTIN_PCOMNEUD,
19708 IX86_BUILTIN_PCOMLTUD,
19709 IX86_BUILTIN_PCOMLEUD,
19710 IX86_BUILTIN_PCOMGTUD,
19711 IX86_BUILTIN_PCOMGEUD,
19712 IX86_BUILTIN_PCOMFALSEUD,
19713 IX86_BUILTIN_PCOMTRUEUD,
19714 IX86_BUILTIN_PCOMEQUQ,
19715 IX86_BUILTIN_PCOMNEUQ,
19716 IX86_BUILTIN_PCOMLTUQ,
19717 IX86_BUILTIN_PCOMLEUQ,
19718 IX86_BUILTIN_PCOMGTUQ,
19719 IX86_BUILTIN_PCOMGEUQ,
19720 IX86_BUILTIN_PCOMFALSEUQ,
19721 IX86_BUILTIN_PCOMTRUEUQ,
19723 IX86_BUILTIN_PCOMEQB,
19724 IX86_BUILTIN_PCOMNEB,
19725 IX86_BUILTIN_PCOMLTB,
19726 IX86_BUILTIN_PCOMLEB,
19727 IX86_BUILTIN_PCOMGTB,
19728 IX86_BUILTIN_PCOMGEB,
19729 IX86_BUILTIN_PCOMFALSEB,
19730 IX86_BUILTIN_PCOMTRUEB,
19731 IX86_BUILTIN_PCOMEQW,
19732 IX86_BUILTIN_PCOMNEW,
19733 IX86_BUILTIN_PCOMLTW,
19734 IX86_BUILTIN_PCOMLEW,
19735 IX86_BUILTIN_PCOMGTW,
19736 IX86_BUILTIN_PCOMGEW,
19737 IX86_BUILTIN_PCOMFALSEW,
19738 IX86_BUILTIN_PCOMTRUEW,
19739 IX86_BUILTIN_PCOMEQD,
19740 IX86_BUILTIN_PCOMNED,
19741 IX86_BUILTIN_PCOMLTD,
19742 IX86_BUILTIN_PCOMLED,
19743 IX86_BUILTIN_PCOMGTD,
19744 IX86_BUILTIN_PCOMGED,
19745 IX86_BUILTIN_PCOMFALSED,
19746 IX86_BUILTIN_PCOMTRUED,
19747 IX86_BUILTIN_PCOMEQQ,
19748 IX86_BUILTIN_PCOMNEQ,
19749 IX86_BUILTIN_PCOMLTQ,
19750 IX86_BUILTIN_PCOMLEQ,
19751 IX86_BUILTIN_PCOMGTQ,
19752 IX86_BUILTIN_PCOMGEQ,
19753 IX86_BUILTIN_PCOMFALSEQ,
19754 IX86_BUILTIN_PCOMTRUEQ,
19759 /* Table for the ix86 builtin decls. */
19760 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
19762 /* Table of all of the builtin functions that are possible with different ISA's
19763 but are waiting to be built until a function is declared to use that
19765 struct builtin_isa GTY(())
19767 tree type; /* builtin type to use in the declaration */
19768 const char *name; /* function name */
19769 int isa; /* isa_flags this builtin is defined for */
19770 bool const_p; /* true if the declaration is constant */
19773 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
19776 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
19777 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
19778 * function decl in the ix86_builtins array. Returns the function decl or
19779 * NULL_TREE, if the builtin was not added.
19781 * If the front end has a special hook for builtin functions, delay adding
19782 * builtin functions that aren't in the current ISA until the ISA is changed
19783 * with function specific optimization. Doing so, can save about 300K for the
19784 * default compiler. When the builtin is expanded, check at that time whether
19787 * If the front end doesn't have a special hook, record all builtins, even if
19788 * it isn't an instruction set in the current ISA in case the user uses
19789 * function specific options for a different ISA, so that we don't get scope
19790 * errors if a builtin is added in the middle of a function scope. */
19793 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
19795 tree decl = NULL_TREE;
19797 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
19799 ix86_builtins_isa[(int) code].isa = mask;
19801 if ((mask & ix86_isa_flags) != 0
19802 || (lang_hooks.builtin_function
19803 == lang_hooks.builtin_function_ext_scope))
19806 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
19808 ix86_builtins[(int) code] = decl;
19809 ix86_builtins_isa[(int) code].type = NULL_TREE;
19813 ix86_builtins[(int) code] = NULL_TREE;
19814 ix86_builtins_isa[(int) code].const_p = false;
19815 ix86_builtins_isa[(int) code].type = type;
19816 ix86_builtins_isa[(int) code].name = name;
19823 /* Like def_builtin, but also marks the function decl "const". */
19826 def_builtin_const (int mask, const char *name, tree type,
19827 enum ix86_builtins code)
19829 tree decl = def_builtin (mask, name, type, code);
19831 TREE_READONLY (decl) = 1;
19833 ix86_builtins_isa[(int) code].const_p = true;
19838 /* Add any new builtin functions for a given ISA that may not have been
19839 declared. This saves a bit of space compared to adding all of the
19840 declarations to the tree, even if we didn't use them. */
19843 ix86_add_new_builtins (int isa)
19848 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
19850 if ((ix86_builtins_isa[i].isa & isa) != 0
19851 && ix86_builtins_isa[i].type != NULL_TREE)
19853 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
19854 ix86_builtins_isa[i].type,
19855 i, BUILT_IN_MD, NULL,
19858 ix86_builtins[i] = decl;
19859 ix86_builtins_isa[i].type = NULL_TREE;
19860 if (ix86_builtins_isa[i].const_p)
19861 TREE_READONLY (decl) = 1;
19866 /* Bits for builtin_description.flag. */
19868 /* Set when we don't support the comparison natively, and should
19869 swap_comparison in order to support it. */
19870 #define BUILTIN_DESC_SWAP_OPERANDS 1
19872 struct builtin_description
19874 const unsigned int mask;
19875 const enum insn_code icode;
19876 const char *const name;
19877 const enum ix86_builtins code;
19878 const enum rtx_code comparison;
19882 static const struct builtin_description bdesc_comi[] =
19884 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
19885 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
19886 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
19887 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
19888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
19889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
19890 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
19891 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
19892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
19893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
19894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
19895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
19896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
19897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
19898 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
19899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
19900 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
19901 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
19902 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
19903 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
19904 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
19905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
19906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
19907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
19910 static const struct builtin_description bdesc_pcmpestr[] =
19913 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
19914 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
19915 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
19916 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
19917 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
19918 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
19919 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
19922 static const struct builtin_description bdesc_pcmpistr[] =
19925 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
19926 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
19927 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
19928 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
19929 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
19930 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
19931 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
19934 /* Special builtin types */
19935 enum ix86_special_builtin_type
19937 SPECIAL_FTYPE_UNKNOWN,
19939 V32QI_FTYPE_PCCHAR,
19940 V16QI_FTYPE_PCCHAR,
19942 V8SF_FTYPE_PCFLOAT,
19944 V4DF_FTYPE_PCDOUBLE,
19945 V4SF_FTYPE_PCFLOAT,
19946 V2DF_FTYPE_PCDOUBLE,
19947 V8SF_FTYPE_PCV8SF_V8SF,
19948 V4DF_FTYPE_PCV4DF_V4DF,
19949 V4SF_FTYPE_V4SF_PCV2SF,
19950 V4SF_FTYPE_PCV4SF_V4SF,
19951 V2DF_FTYPE_V2DF_PCDOUBLE,
19952 V2DF_FTYPE_PCV2DF_V2DF,
19954 VOID_FTYPE_PV2SF_V4SF,
19955 VOID_FTYPE_PV2DI_V2DI,
19956 VOID_FTYPE_PCHAR_V32QI,
19957 VOID_FTYPE_PCHAR_V16QI,
19958 VOID_FTYPE_PFLOAT_V8SF,
19959 VOID_FTYPE_PFLOAT_V4SF,
19960 VOID_FTYPE_PDOUBLE_V4DF,
19961 VOID_FTYPE_PDOUBLE_V2DF,
19963 VOID_FTYPE_PINT_INT,
19964 VOID_FTYPE_PV8SF_V8SF_V8SF,
19965 VOID_FTYPE_PV4DF_V4DF_V4DF,
19966 VOID_FTYPE_PV4SF_V4SF_V4SF,
19967 VOID_FTYPE_PV2DF_V2DF_V2DF
19970 /* Builtin types */
19971 enum ix86_builtin_type
19974 FLOAT128_FTYPE_FLOAT128,
19976 FLOAT128_FTYPE_FLOAT128_FLOAT128,
19977 INT_FTYPE_V8SF_V8SF_PTEST,
19978 INT_FTYPE_V4DI_V4DI_PTEST,
19979 INT_FTYPE_V4DF_V4DF_PTEST,
19980 INT_FTYPE_V4SF_V4SF_PTEST,
19981 INT_FTYPE_V2DI_V2DI_PTEST,
19982 INT_FTYPE_V2DF_V2DF_PTEST,
20014 V4SF_FTYPE_V4SF_VEC_MERGE,
20023 V2DF_FTYPE_V2DF_VEC_MERGE,
20034 V16QI_FTYPE_V16QI_V16QI,
20035 V16QI_FTYPE_V8HI_V8HI,
20036 V8QI_FTYPE_V8QI_V8QI,
20037 V8QI_FTYPE_V4HI_V4HI,
20038 V8HI_FTYPE_V8HI_V8HI,
20039 V8HI_FTYPE_V8HI_V8HI_COUNT,
20040 V8HI_FTYPE_V16QI_V16QI,
20041 V8HI_FTYPE_V4SI_V4SI,
20042 V8HI_FTYPE_V8HI_SI_COUNT,
20043 V8SF_FTYPE_V8SF_V8SF,
20044 V8SF_FTYPE_V8SF_V8SI,
20045 V4SI_FTYPE_V4SI_V4SI,
20046 V4SI_FTYPE_V4SI_V4SI_COUNT,
20047 V4SI_FTYPE_V8HI_V8HI,
20048 V4SI_FTYPE_V4SF_V4SF,
20049 V4SI_FTYPE_V2DF_V2DF,
20050 V4SI_FTYPE_V4SI_SI_COUNT,
20051 V4HI_FTYPE_V4HI_V4HI,
20052 V4HI_FTYPE_V4HI_V4HI_COUNT,
20053 V4HI_FTYPE_V8QI_V8QI,
20054 V4HI_FTYPE_V2SI_V2SI,
20055 V4HI_FTYPE_V4HI_SI_COUNT,
20056 V4DF_FTYPE_V4DF_V4DF,
20057 V4DF_FTYPE_V4DF_V4DI,
20058 V4SF_FTYPE_V4SF_V4SF,
20059 V4SF_FTYPE_V4SF_V4SF_SWAP,
20060 V4SF_FTYPE_V4SF_V4SI,
20061 V4SF_FTYPE_V4SF_V2SI,
20062 V4SF_FTYPE_V4SF_V2DF,
20063 V4SF_FTYPE_V4SF_DI,
20064 V4SF_FTYPE_V4SF_SI,
20065 V2DI_FTYPE_V2DI_V2DI,
20066 V2DI_FTYPE_V2DI_V2DI_COUNT,
20067 V2DI_FTYPE_V16QI_V16QI,
20068 V2DI_FTYPE_V4SI_V4SI,
20069 V2DI_FTYPE_V2DI_V16QI,
20070 V2DI_FTYPE_V2DF_V2DF,
20071 V2DI_FTYPE_V2DI_SI_COUNT,
20072 V2SI_FTYPE_V2SI_V2SI,
20073 V2SI_FTYPE_V2SI_V2SI_COUNT,
20074 V2SI_FTYPE_V4HI_V4HI,
20075 V2SI_FTYPE_V2SF_V2SF,
20076 V2SI_FTYPE_V2SI_SI_COUNT,
20077 V2DF_FTYPE_V2DF_V2DF,
20078 V2DF_FTYPE_V2DF_V2DF_SWAP,
20079 V2DF_FTYPE_V2DF_V4SF,
20080 V2DF_FTYPE_V2DF_V2DI,
20081 V2DF_FTYPE_V2DF_DI,
20082 V2DF_FTYPE_V2DF_SI,
20083 V2SF_FTYPE_V2SF_V2SF,
20084 V1DI_FTYPE_V1DI_V1DI,
20085 V1DI_FTYPE_V1DI_V1DI_COUNT,
20086 V1DI_FTYPE_V8QI_V8QI,
20087 V1DI_FTYPE_V2SI_V2SI,
20088 V1DI_FTYPE_V1DI_SI_COUNT,
20089 UINT64_FTYPE_UINT64_UINT64,
20090 UINT_FTYPE_UINT_UINT,
20091 UINT_FTYPE_UINT_USHORT,
20092 UINT_FTYPE_UINT_UCHAR,
20093 V8HI_FTYPE_V8HI_INT,
20094 V4SI_FTYPE_V4SI_INT,
20095 V4HI_FTYPE_V4HI_INT,
20096 V8SF_FTYPE_V8SF_INT,
20097 V4SI_FTYPE_V8SI_INT,
20098 V4SF_FTYPE_V8SF_INT,
20099 V2DF_FTYPE_V4DF_INT,
20100 V4DF_FTYPE_V4DF_INT,
20101 V4SF_FTYPE_V4SF_INT,
20102 V2DI_FTYPE_V2DI_INT,
20103 V2DI2TI_FTYPE_V2DI_INT,
20104 V2DF_FTYPE_V2DF_INT,
20105 V16QI_FTYPE_V16QI_V16QI_V16QI,
20106 V8SF_FTYPE_V8SF_V8SF_V8SF,
20107 V4DF_FTYPE_V4DF_V4DF_V4DF,
20108 V4SF_FTYPE_V4SF_V4SF_V4SF,
20109 V2DF_FTYPE_V2DF_V2DF_V2DF,
20110 V16QI_FTYPE_V16QI_V16QI_INT,
20111 V8SI_FTYPE_V8SI_V8SI_INT,
20112 V8SI_FTYPE_V8SI_V4SI_INT,
20113 V8HI_FTYPE_V8HI_V8HI_INT,
20114 V8SF_FTYPE_V8SF_V8SF_INT,
20115 V8SF_FTYPE_V8SF_V4SF_INT,
20116 V4SI_FTYPE_V4SI_V4SI_INT,
20117 V4DF_FTYPE_V4DF_V4DF_INT,
20118 V4DF_FTYPE_V4DF_V2DF_INT,
20119 V4SF_FTYPE_V4SF_V4SF_INT,
20120 V2DI_FTYPE_V2DI_V2DI_INT,
20121 V2DI2TI_FTYPE_V2DI_V2DI_INT,
20122 V1DI2DI_FTYPE_V1DI_V1DI_INT,
20123 V2DF_FTYPE_V2DF_V2DF_INT,
20124 V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
20125 V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
20126 V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
20127 V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
20128 V2DI_FTYPE_V2DI_UINT_UINT,
20129 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
20132 /* Special builtins with variable number of arguments. */
20133 static const struct builtin_description bdesc_special_args[] =
20136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20139 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
20142 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20143 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20144 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20146 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20147 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
20148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
20151 /* SSE or 3DNow!A */
20152 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20153 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
20156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
20158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
20160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
20162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
20163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
20164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20167 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
20170 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
20173 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
20176 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
20177 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
20180 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
20181 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
20182 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
20184 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
20185 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20186 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20187 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
20188 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
20190 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
20191 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
20192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
20193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
20194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20195 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
20196 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
20198 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
20199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
20200 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
20201 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
20202 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
20203 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
20204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
20205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
20208 /* Builtins with variable number of arguments. */
20209 static const struct builtin_description bdesc_args[] =
20212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20215 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
20248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
20250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
20252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
20254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
20256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20261 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20263 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20264 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
20266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
20270 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
20271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
20272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
20273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
20276 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20277 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20278 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20279 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20281 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20282 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20283 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20284 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20285 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20286 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
20287 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20288 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20289 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20290 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20291 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20292 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20293 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20294 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20295 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20298 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
20299 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
20300 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20301 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
20302 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20303 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
20306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
20307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20308 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20310 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20314 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
20316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
20317 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
20319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20321 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20322 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20323 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
20341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
20343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
20344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
20345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
20347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
20348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
20349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
20351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
20353 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20354 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20356 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20358 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20360 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20361 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20366 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
20370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
20371 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
20373 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
20375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
20379 /* SSE MMX or 3Dnow!A */
20380 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20381 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20382 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20384 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20385 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20386 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20387 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20389 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
20390 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
20392 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
20395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
20398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
20399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
20400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
20401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
20403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
20406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
20407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
20409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
20411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
20413 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20414 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
20416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
20418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
20420 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20421 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20422 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20423 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
20434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
20440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
20442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
20443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
20444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
20446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
20447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
20448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
20450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20451 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20455 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20457 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20458 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20464 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
20466 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20467 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20468 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20469 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20470 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20471 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20472 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20473 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20484 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20485 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
20487 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20489 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20490 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20502 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20503 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20504 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
20517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
20518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
20520 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
20523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
20524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
20526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
20528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
20529 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
20530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
20531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
20533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
20534 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20535 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20536 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
20537 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20538 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20539 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
20541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
20542 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20543 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20544 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
20545 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20546 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20547 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
20549 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
20550 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
20551 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
20552 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
20554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
20555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
20556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
20558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
20560 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
20561 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
20563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
20566 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
20567 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
20570 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
20571 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
20573 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20574 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20575 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20576 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20577 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
20578 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
20581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
20582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
20583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
20584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
20585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
20586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
20588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20594 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20595 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20598 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20599 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20600 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
20601 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
20602 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20603 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20604 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20605 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20606 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20607 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
20608 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20609 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
20610 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20611 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
20614 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
20615 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
20618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
20621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
20622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
20626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
20627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
20629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
20630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
20631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
20632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
20633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
20634 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
20635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
20636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
20637 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
20638 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
20639 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
20640 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
20641 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
20643 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
20644 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20645 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20646 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20647 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20648 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20649 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
20650 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20651 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20652 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
20653 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
20654 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
20656 /* SSE4.1 and SSE5 */
20657 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
20658 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
20659 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20660 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20662 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20663 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20664 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
20667 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20668 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
20669 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
20670 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
20671 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
20674 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
20675 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
20676 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
20677 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20680 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
20681 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
20683 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20684 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20685 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20686 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
20689 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
20692 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20693 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20696 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20697 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20700 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20706 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20707 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20708 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20709 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20710 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20711 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20712 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20713 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20714 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20715 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20716 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20717 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
20720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
20721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
20722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
20724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
20727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
20728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
20734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
20735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
20738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
20739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
20740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
20741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
20742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
20743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
20744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
20745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
20746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
20747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
20748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
20749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
20750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
20751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
20752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
20753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
20754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
20755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
20756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
20757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
20758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
20759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
20760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
20761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
20763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
20767 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
20768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20769 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20771 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
20775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
20776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
20778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
20780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
20783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
20784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
20785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
20786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
20787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
20788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
20790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
20793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
20796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
20799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
20802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
20806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
20807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
20811 enum multi_arg_type {
20821 MULTI_ARG_3_PERMPS,
20822 MULTI_ARG_3_PERMPD,
20829 MULTI_ARG_2_DI_IMM,
20830 MULTI_ARG_2_SI_IMM,
20831 MULTI_ARG_2_HI_IMM,
20832 MULTI_ARG_2_QI_IMM,
20833 MULTI_ARG_2_SF_CMP,
20834 MULTI_ARG_2_DF_CMP,
20835 MULTI_ARG_2_DI_CMP,
20836 MULTI_ARG_2_SI_CMP,
20837 MULTI_ARG_2_HI_CMP,
20838 MULTI_ARG_2_QI_CMP,
20861 static const struct builtin_description bdesc_multi_arg[] =
20863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
20864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
20865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
20866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
20867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
20868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
20869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
20870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
20871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
20872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
20873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
20874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
20875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
20876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
20877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
20878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
20879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
20880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
20881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
20882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
20883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
20884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
20885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
20886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
20887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
20888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
20889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
20890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
20891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
20893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
20894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
20895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
20898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
20899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
20900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
20901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
20902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
20903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
20904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
20905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
20906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
20907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
20908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
20909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
20910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
20911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
20912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
20913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
20914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
20915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
20916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
20917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
20918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
20919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
20920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
20921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
20922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
20923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
20924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
20925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
20926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
20927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
20928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
20929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
20930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
20931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
20932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
20933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
20934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
20935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
20936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
20937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
20939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
20940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
20941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
20942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
20943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
20944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
20945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
20946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
20947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
20950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
20951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
20952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
20953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
20954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
20956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
20957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
20958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
20959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
20960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
20961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
20962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
20963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
20964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
20965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
20966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
20967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
20968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
20969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
20970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
20971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
20973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
20974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
20975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
20976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
20977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
20978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
20979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
20980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
20981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
20983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
20984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
20985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
20986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
20987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
20988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
20990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
20991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
20992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
20993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
20994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
20995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
20996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
20997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
20998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
20999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
21008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
21011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
21012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
21013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
21015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
21016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
21019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
21020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
21021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
21023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
21024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
21027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
21028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
21029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
21031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
21035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
21036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
21037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
21039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
21040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
21043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
21044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
21045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
21047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
21048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
21051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
21052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
21053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
21055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
21056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
21059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
21060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
21061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
21063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
21067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
21068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
21069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
21071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
21072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
21073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
21074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
21075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
21076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
21077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
21078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
21080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
21087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
21089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
21094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
21095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
21096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
21099 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
21100 in the current target ISA to allow the user to compile particular modules
21101 with different target specific options that differ from the command line
21104 ix86_init_mmx_sse_builtins (void)
21106 const struct builtin_description * d;
21109 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
21110 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21111 tree V1DI_type_node
21112 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
21113 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
21114 tree V2DI_type_node
21115 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
21116 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
21117 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
21118 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
21119 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21120 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
21121 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
21123 tree pchar_type_node = build_pointer_type (char_type_node);
21124 tree pcchar_type_node
21125 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
21126 tree pfloat_type_node = build_pointer_type (float_type_node);
21127 tree pcfloat_type_node
21128 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
21129 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
21130 tree pcv2sf_type_node
21131 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
21132 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
21133 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
21136 tree int_ftype_v4sf_v4sf
21137 = build_function_type_list (integer_type_node,
21138 V4SF_type_node, V4SF_type_node, NULL_TREE);
21139 tree v4si_ftype_v4sf_v4sf
21140 = build_function_type_list (V4SI_type_node,
21141 V4SF_type_node, V4SF_type_node, NULL_TREE);
21142 /* MMX/SSE/integer conversions. */
21143 tree int_ftype_v4sf
21144 = build_function_type_list (integer_type_node,
21145 V4SF_type_node, NULL_TREE);
21146 tree int64_ftype_v4sf
21147 = build_function_type_list (long_long_integer_type_node,
21148 V4SF_type_node, NULL_TREE);
21149 tree int_ftype_v8qi
21150 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
21151 tree v4sf_ftype_v4sf_int
21152 = build_function_type_list (V4SF_type_node,
21153 V4SF_type_node, integer_type_node, NULL_TREE);
21154 tree v4sf_ftype_v4sf_int64
21155 = build_function_type_list (V4SF_type_node,
21156 V4SF_type_node, long_long_integer_type_node,
21158 tree v4sf_ftype_v4sf_v2si
21159 = build_function_type_list (V4SF_type_node,
21160 V4SF_type_node, V2SI_type_node, NULL_TREE);
21162 /* Miscellaneous. */
21163 tree v8qi_ftype_v4hi_v4hi
21164 = build_function_type_list (V8QI_type_node,
21165 V4HI_type_node, V4HI_type_node, NULL_TREE);
21166 tree v4hi_ftype_v2si_v2si
21167 = build_function_type_list (V4HI_type_node,
21168 V2SI_type_node, V2SI_type_node, NULL_TREE);
21169 tree v4sf_ftype_v4sf_v4sf_int
21170 = build_function_type_list (V4SF_type_node,
21171 V4SF_type_node, V4SF_type_node,
21172 integer_type_node, NULL_TREE);
21173 tree v2si_ftype_v4hi_v4hi
21174 = build_function_type_list (V2SI_type_node,
21175 V4HI_type_node, V4HI_type_node, NULL_TREE);
21176 tree v4hi_ftype_v4hi_int
21177 = build_function_type_list (V4HI_type_node,
21178 V4HI_type_node, integer_type_node, NULL_TREE);
21179 tree v2si_ftype_v2si_int
21180 = build_function_type_list (V2SI_type_node,
21181 V2SI_type_node, integer_type_node, NULL_TREE);
21182 tree v1di_ftype_v1di_int
21183 = build_function_type_list (V1DI_type_node,
21184 V1DI_type_node, integer_type_node, NULL_TREE);
21186 tree void_ftype_void
21187 = build_function_type (void_type_node, void_list_node);
21188 tree void_ftype_unsigned
21189 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
21190 tree void_ftype_unsigned_unsigned
21191 = build_function_type_list (void_type_node, unsigned_type_node,
21192 unsigned_type_node, NULL_TREE);
21193 tree void_ftype_pcvoid_unsigned_unsigned
21194 = build_function_type_list (void_type_node, const_ptr_type_node,
21195 unsigned_type_node, unsigned_type_node,
21197 tree unsigned_ftype_void
21198 = build_function_type (unsigned_type_node, void_list_node);
21199 tree v2si_ftype_v4sf
21200 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
21201 /* Loads/stores. */
21202 tree void_ftype_v8qi_v8qi_pchar
21203 = build_function_type_list (void_type_node,
21204 V8QI_type_node, V8QI_type_node,
21205 pchar_type_node, NULL_TREE);
21206 tree v4sf_ftype_pcfloat
21207 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
21208 tree v4sf_ftype_v4sf_pcv2sf
21209 = build_function_type_list (V4SF_type_node,
21210 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
21211 tree void_ftype_pv2sf_v4sf
21212 = build_function_type_list (void_type_node,
21213 pv2sf_type_node, V4SF_type_node, NULL_TREE);
21214 tree void_ftype_pfloat_v4sf
21215 = build_function_type_list (void_type_node,
21216 pfloat_type_node, V4SF_type_node, NULL_TREE);
21217 tree void_ftype_pdi_di
21218 = build_function_type_list (void_type_node,
21219 pdi_type_node, long_long_unsigned_type_node,
21221 tree void_ftype_pv2di_v2di
21222 = build_function_type_list (void_type_node,
21223 pv2di_type_node, V2DI_type_node, NULL_TREE);
21224 /* Normal vector unops. */
21225 tree v4sf_ftype_v4sf
21226 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
21227 tree v16qi_ftype_v16qi
21228 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
21229 tree v8hi_ftype_v8hi
21230 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
21231 tree v4si_ftype_v4si
21232 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
21233 tree v8qi_ftype_v8qi
21234 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
21235 tree v4hi_ftype_v4hi
21236 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
21238 /* Normal vector binops. */
21239 tree v4sf_ftype_v4sf_v4sf
21240 = build_function_type_list (V4SF_type_node,
21241 V4SF_type_node, V4SF_type_node, NULL_TREE);
21242 tree v8qi_ftype_v8qi_v8qi
21243 = build_function_type_list (V8QI_type_node,
21244 V8QI_type_node, V8QI_type_node, NULL_TREE);
21245 tree v4hi_ftype_v4hi_v4hi
21246 = build_function_type_list (V4HI_type_node,
21247 V4HI_type_node, V4HI_type_node, NULL_TREE);
21248 tree v2si_ftype_v2si_v2si
21249 = build_function_type_list (V2SI_type_node,
21250 V2SI_type_node, V2SI_type_node, NULL_TREE);
21251 tree v1di_ftype_v1di_v1di
21252 = build_function_type_list (V1DI_type_node,
21253 V1DI_type_node, V1DI_type_node, NULL_TREE);
21254 tree v1di_ftype_v1di_v1di_int
21255 = build_function_type_list (V1DI_type_node,
21256 V1DI_type_node, V1DI_type_node,
21257 integer_type_node, NULL_TREE);
21258 tree v2si_ftype_v2sf
21259 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
21260 tree v2sf_ftype_v2si
21261 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
21262 tree v2si_ftype_v2si
21263 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
21264 tree v2sf_ftype_v2sf
21265 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
21266 tree v2sf_ftype_v2sf_v2sf
21267 = build_function_type_list (V2SF_type_node,
21268 V2SF_type_node, V2SF_type_node, NULL_TREE);
21269 tree v2si_ftype_v2sf_v2sf
21270 = build_function_type_list (V2SI_type_node,
21271 V2SF_type_node, V2SF_type_node, NULL_TREE);
21272 tree pint_type_node = build_pointer_type (integer_type_node);
21273 tree pdouble_type_node = build_pointer_type (double_type_node);
21274 tree pcdouble_type_node = build_pointer_type (
21275 build_type_variant (double_type_node, 1, 0));
21276 tree int_ftype_v2df_v2df
21277 = build_function_type_list (integer_type_node,
21278 V2DF_type_node, V2DF_type_node, NULL_TREE);
21280 tree void_ftype_pcvoid
21281 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
21282 tree v4sf_ftype_v4si
21283 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
21284 tree v4si_ftype_v4sf
21285 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
21286 tree v2df_ftype_v4si
21287 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
21288 tree v4si_ftype_v2df
21289 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
21290 tree v4si_ftype_v2df_v2df
21291 = build_function_type_list (V4SI_type_node,
21292 V2DF_type_node, V2DF_type_node, NULL_TREE);
21293 tree v2si_ftype_v2df
21294 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
21295 tree v4sf_ftype_v2df
21296 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
21297 tree v2df_ftype_v2si
21298 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
21299 tree v2df_ftype_v4sf
21300 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
21301 tree int_ftype_v2df
21302 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
21303 tree int64_ftype_v2df
21304 = build_function_type_list (long_long_integer_type_node,
21305 V2DF_type_node, NULL_TREE);
21306 tree v2df_ftype_v2df_int
21307 = build_function_type_list (V2DF_type_node,
21308 V2DF_type_node, integer_type_node, NULL_TREE);
21309 tree v2df_ftype_v2df_int64
21310 = build_function_type_list (V2DF_type_node,
21311 V2DF_type_node, long_long_integer_type_node,
21313 tree v4sf_ftype_v4sf_v2df
21314 = build_function_type_list (V4SF_type_node,
21315 V4SF_type_node, V2DF_type_node, NULL_TREE);
21316 tree v2df_ftype_v2df_v4sf
21317 = build_function_type_list (V2DF_type_node,
21318 V2DF_type_node, V4SF_type_node, NULL_TREE);
21319 tree v2df_ftype_v2df_v2df_int
21320 = build_function_type_list (V2DF_type_node,
21321 V2DF_type_node, V2DF_type_node,
21324 tree v2df_ftype_v2df_pcdouble
21325 = build_function_type_list (V2DF_type_node,
21326 V2DF_type_node, pcdouble_type_node, NULL_TREE);
21327 tree void_ftype_pdouble_v2df
21328 = build_function_type_list (void_type_node,
21329 pdouble_type_node, V2DF_type_node, NULL_TREE);
21330 tree void_ftype_pint_int
21331 = build_function_type_list (void_type_node,
21332 pint_type_node, integer_type_node, NULL_TREE);
21333 tree void_ftype_v16qi_v16qi_pchar
21334 = build_function_type_list (void_type_node,
21335 V16QI_type_node, V16QI_type_node,
21336 pchar_type_node, NULL_TREE);
21337 tree v2df_ftype_pcdouble
21338 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
21339 tree v2df_ftype_v2df_v2df
21340 = build_function_type_list (V2DF_type_node,
21341 V2DF_type_node, V2DF_type_node, NULL_TREE);
21342 tree v16qi_ftype_v16qi_v16qi
21343 = build_function_type_list (V16QI_type_node,
21344 V16QI_type_node, V16QI_type_node, NULL_TREE);
21345 tree v8hi_ftype_v8hi_v8hi
21346 = build_function_type_list (V8HI_type_node,
21347 V8HI_type_node, V8HI_type_node, NULL_TREE);
21348 tree v4si_ftype_v4si_v4si
21349 = build_function_type_list (V4SI_type_node,
21350 V4SI_type_node, V4SI_type_node, NULL_TREE);
21351 tree v2di_ftype_v2di_v2di
21352 = build_function_type_list (V2DI_type_node,
21353 V2DI_type_node, V2DI_type_node, NULL_TREE);
21354 tree v2di_ftype_v2df_v2df
21355 = build_function_type_list (V2DI_type_node,
21356 V2DF_type_node, V2DF_type_node, NULL_TREE);
21357 tree v2df_ftype_v2df
21358 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
21359 tree v2di_ftype_v2di_int
21360 = build_function_type_list (V2DI_type_node,
21361 V2DI_type_node, integer_type_node, NULL_TREE);
21362 tree v2di_ftype_v2di_v2di_int
21363 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21364 V2DI_type_node, integer_type_node, NULL_TREE);
21365 tree v4si_ftype_v4si_int
21366 = build_function_type_list (V4SI_type_node,
21367 V4SI_type_node, integer_type_node, NULL_TREE);
21368 tree v8hi_ftype_v8hi_int
21369 = build_function_type_list (V8HI_type_node,
21370 V8HI_type_node, integer_type_node, NULL_TREE);
21371 tree v4si_ftype_v8hi_v8hi
21372 = build_function_type_list (V4SI_type_node,
21373 V8HI_type_node, V8HI_type_node, NULL_TREE);
21374 tree v1di_ftype_v8qi_v8qi
21375 = build_function_type_list (V1DI_type_node,
21376 V8QI_type_node, V8QI_type_node, NULL_TREE);
21377 tree v1di_ftype_v2si_v2si
21378 = build_function_type_list (V1DI_type_node,
21379 V2SI_type_node, V2SI_type_node, NULL_TREE);
21380 tree v2di_ftype_v16qi_v16qi
21381 = build_function_type_list (V2DI_type_node,
21382 V16QI_type_node, V16QI_type_node, NULL_TREE);
21383 tree v2di_ftype_v4si_v4si
21384 = build_function_type_list (V2DI_type_node,
21385 V4SI_type_node, V4SI_type_node, NULL_TREE);
21386 tree int_ftype_v16qi
21387 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
21388 tree v16qi_ftype_pcchar
21389 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
21390 tree void_ftype_pchar_v16qi
21391 = build_function_type_list (void_type_node,
21392 pchar_type_node, V16QI_type_node, NULL_TREE);
21394 tree v2di_ftype_v2di_unsigned_unsigned
21395 = build_function_type_list (V2DI_type_node, V2DI_type_node,
21396 unsigned_type_node, unsigned_type_node,
21398 tree v2di_ftype_v2di_v2di_unsigned_unsigned
21399 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
21400 unsigned_type_node, unsigned_type_node,
21402 tree v2di_ftype_v2di_v16qi
21403 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
21405 tree v2df_ftype_v2df_v2df_v2df
21406 = build_function_type_list (V2DF_type_node,
21407 V2DF_type_node, V2DF_type_node,
21408 V2DF_type_node, NULL_TREE);
21409 tree v4sf_ftype_v4sf_v4sf_v4sf
21410 = build_function_type_list (V4SF_type_node,
21411 V4SF_type_node, V4SF_type_node,
21412 V4SF_type_node, NULL_TREE);
21413 tree v8hi_ftype_v16qi
21414 = build_function_type_list (V8HI_type_node, V16QI_type_node,
21416 tree v4si_ftype_v16qi
21417 = build_function_type_list (V4SI_type_node, V16QI_type_node,
21419 tree v2di_ftype_v16qi
21420 = build_function_type_list (V2DI_type_node, V16QI_type_node,
21422 tree v4si_ftype_v8hi
21423 = build_function_type_list (V4SI_type_node, V8HI_type_node,
21425 tree v2di_ftype_v8hi
21426 = build_function_type_list (V2DI_type_node, V8HI_type_node,
21428 tree v2di_ftype_v4si
21429 = build_function_type_list (V2DI_type_node, V4SI_type_node,
21431 tree v2di_ftype_pv2di
21432 = build_function_type_list (V2DI_type_node, pv2di_type_node,
21434 tree v16qi_ftype_v16qi_v16qi_int
21435 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21436 V16QI_type_node, integer_type_node,
21438 tree v16qi_ftype_v16qi_v16qi_v16qi
21439 = build_function_type_list (V16QI_type_node, V16QI_type_node,
21440 V16QI_type_node, V16QI_type_node,
21442 tree v8hi_ftype_v8hi_v8hi_int
21443 = build_function_type_list (V8HI_type_node, V8HI_type_node,
21444 V8HI_type_node, integer_type_node,
21446 tree v4si_ftype_v4si_v4si_int
21447 = build_function_type_list (V4SI_type_node, V4SI_type_node,
21448 V4SI_type_node, integer_type_node,
21450 tree int_ftype_v2di_v2di
21451 = build_function_type_list (integer_type_node,
21452 V2DI_type_node, V2DI_type_node,
21454 tree int_ftype_v16qi_int_v16qi_int_int
21455 = build_function_type_list (integer_type_node,
21462 tree v16qi_ftype_v16qi_int_v16qi_int_int
21463 = build_function_type_list (V16QI_type_node,
21470 tree int_ftype_v16qi_v16qi_int
21471 = build_function_type_list (integer_type_node,
21477 /* SSE5 instructions */
21478 tree v2di_ftype_v2di_v2di_v2di
21479 = build_function_type_list (V2DI_type_node,
21485 tree v4si_ftype_v4si_v4si_v4si
21486 = build_function_type_list (V4SI_type_node,
21492 tree v4si_ftype_v4si_v4si_v2di
21493 = build_function_type_list (V4SI_type_node,
21499 tree v8hi_ftype_v8hi_v8hi_v8hi
21500 = build_function_type_list (V8HI_type_node,
21506 tree v8hi_ftype_v8hi_v8hi_v4si
21507 = build_function_type_list (V8HI_type_node,
21513 tree v2df_ftype_v2df_v2df_v16qi
21514 = build_function_type_list (V2DF_type_node,
21520 tree v4sf_ftype_v4sf_v4sf_v16qi
21521 = build_function_type_list (V4SF_type_node,
21527 tree v2di_ftype_v2di_si
21528 = build_function_type_list (V2DI_type_node,
21533 tree v4si_ftype_v4si_si
21534 = build_function_type_list (V4SI_type_node,
21539 tree v8hi_ftype_v8hi_si
21540 = build_function_type_list (V8HI_type_node,
21545 tree v16qi_ftype_v16qi_si
21546 = build_function_type_list (V16QI_type_node,
21550 tree v4sf_ftype_v4hi
21551 = build_function_type_list (V4SF_type_node,
21555 tree v4hi_ftype_v4sf
21556 = build_function_type_list (V4HI_type_node,
21560 tree v2di_ftype_v2di
21561 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
21563 tree v16qi_ftype_v8hi_v8hi
21564 = build_function_type_list (V16QI_type_node,
21565 V8HI_type_node, V8HI_type_node,
21567 tree v8hi_ftype_v4si_v4si
21568 = build_function_type_list (V8HI_type_node,
21569 V4SI_type_node, V4SI_type_node,
21571 tree v8hi_ftype_v16qi_v16qi
21572 = build_function_type_list (V8HI_type_node,
21573 V16QI_type_node, V16QI_type_node,
21575 tree v4hi_ftype_v8qi_v8qi
21576 = build_function_type_list (V4HI_type_node,
21577 V8QI_type_node, V8QI_type_node,
21579 tree unsigned_ftype_unsigned_uchar
21580 = build_function_type_list (unsigned_type_node,
21581 unsigned_type_node,
21582 unsigned_char_type_node,
21584 tree unsigned_ftype_unsigned_ushort
21585 = build_function_type_list (unsigned_type_node,
21586 unsigned_type_node,
21587 short_unsigned_type_node,
21589 tree unsigned_ftype_unsigned_unsigned
21590 = build_function_type_list (unsigned_type_node,
21591 unsigned_type_node,
21592 unsigned_type_node,
21594 tree uint64_ftype_uint64_uint64
21595 = build_function_type_list (long_long_unsigned_type_node,
21596 long_long_unsigned_type_node,
21597 long_long_unsigned_type_node,
21599 tree float_ftype_float
21600 = build_function_type_list (float_type_node,
21605 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
21607 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
21609 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
21611 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
21613 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
21615 tree v8sf_ftype_v8sf
21616 = build_function_type_list (V8SF_type_node,
21619 tree v8si_ftype_v8sf
21620 = build_function_type_list (V8SI_type_node,
21623 tree v8sf_ftype_v8si
21624 = build_function_type_list (V8SF_type_node,
21627 tree v4si_ftype_v4df
21628 = build_function_type_list (V4SI_type_node,
21631 tree v4df_ftype_v4df
21632 = build_function_type_list (V4DF_type_node,
21635 tree v4df_ftype_v4si
21636 = build_function_type_list (V4DF_type_node,
21639 tree v4df_ftype_v4sf
21640 = build_function_type_list (V4DF_type_node,
21643 tree v4sf_ftype_v4df
21644 = build_function_type_list (V4SF_type_node,
21647 tree v8sf_ftype_v8sf_v8sf
21648 = build_function_type_list (V8SF_type_node,
21649 V8SF_type_node, V8SF_type_node,
21651 tree v4df_ftype_v4df_v4df
21652 = build_function_type_list (V4DF_type_node,
21653 V4DF_type_node, V4DF_type_node,
21655 tree v8sf_ftype_v8sf_int
21656 = build_function_type_list (V8SF_type_node,
21657 V8SF_type_node, integer_type_node,
21659 tree v4si_ftype_v8si_int
21660 = build_function_type_list (V4SI_type_node,
21661 V8SI_type_node, integer_type_node,
21663 tree v4df_ftype_v4df_int
21664 = build_function_type_list (V4DF_type_node,
21665 V4DF_type_node, integer_type_node,
21667 tree v4sf_ftype_v8sf_int
21668 = build_function_type_list (V4SF_type_node,
21669 V8SF_type_node, integer_type_node,
21671 tree v2df_ftype_v4df_int
21672 = build_function_type_list (V2DF_type_node,
21673 V4DF_type_node, integer_type_node,
21675 tree v8sf_ftype_v8sf_v8sf_int
21676 = build_function_type_list (V8SF_type_node,
21677 V8SF_type_node, V8SF_type_node,
21680 tree v8sf_ftype_v8sf_v8sf_v8sf
21681 = build_function_type_list (V8SF_type_node,
21682 V8SF_type_node, V8SF_type_node,
21685 tree v4df_ftype_v4df_v4df_v4df
21686 = build_function_type_list (V4DF_type_node,
21687 V4DF_type_node, V4DF_type_node,
21690 tree v8si_ftype_v8si_v8si_int
21691 = build_function_type_list (V8SI_type_node,
21692 V8SI_type_node, V8SI_type_node,
21695 tree v4df_ftype_v4df_v4df_int
21696 = build_function_type_list (V4DF_type_node,
21697 V4DF_type_node, V4DF_type_node,
21700 tree v8sf_ftype_v8sf_v8sf_v8si_int
21701 = build_function_type_list (V8SF_type_node,
21702 V8SF_type_node, V8SF_type_node,
21703 V8SI_type_node, integer_type_node,
21705 tree v4df_ftype_v4df_v4df_v4di_int
21706 = build_function_type_list (V4DF_type_node,
21707 V4DF_type_node, V4DF_type_node,
21708 V4DI_type_node, integer_type_node,
21710 tree v4sf_ftype_v4sf_v4sf_v4si_int
21711 = build_function_type_list (V4SF_type_node,
21712 V4SF_type_node, V4SF_type_node,
21713 V4SI_type_node, integer_type_node,
21715 tree v2df_ftype_v2df_v2df_v2di_int
21716 = build_function_type_list (V2DF_type_node,
21717 V2DF_type_node, V2DF_type_node,
21718 V2DI_type_node, integer_type_node,
21720 tree v8sf_ftype_pcfloat
21721 = build_function_type_list (V8SF_type_node,
21724 tree v4df_ftype_pcdouble
21725 = build_function_type_list (V4DF_type_node,
21726 pcdouble_type_node,
21728 tree pcv4sf_type_node
21729 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
21730 tree pcv2df_type_node
21731 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
21732 tree v8sf_ftype_pcv4sf
21733 = build_function_type_list (V8SF_type_node,
21736 tree v4df_ftype_pcv2df
21737 = build_function_type_list (V4DF_type_node,
21740 tree v32qi_ftype_pcchar
21741 = build_function_type_list (V32QI_type_node,
21744 tree void_ftype_pchar_v32qi
21745 = build_function_type_list (void_type_node,
21746 pchar_type_node, V32QI_type_node,
21748 tree v8si_ftype_v8si_v4si_int
21749 = build_function_type_list (V8SI_type_node,
21750 V8SI_type_node, V4SI_type_node,
21753 tree v8sf_ftype_v8sf_v4sf_int
21754 = build_function_type_list (V8SF_type_node,
21755 V8SF_type_node, V4SF_type_node,
21758 tree v4df_ftype_v4df_v2df_int
21759 = build_function_type_list (V4DF_type_node,
21760 V4DF_type_node, V2DF_type_node,
21763 tree void_ftype_pfloat_v8sf
21764 = build_function_type_list (void_type_node,
21765 pfloat_type_node, V8SF_type_node,
21767 tree void_ftype_pdouble_v4df
21768 = build_function_type_list (void_type_node,
21769 pdouble_type_node, V4DF_type_node,
21771 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
21772 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
21773 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
21774 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
21775 tree pcv8sf_type_node
21776 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
21777 tree pcv4df_type_node
21778 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
21779 tree v8sf_ftype_pcv8sf_v8sf
21780 = build_function_type_list (V8SF_type_node,
21781 pcv8sf_type_node, V8SF_type_node,
21783 tree v4df_ftype_pcv4df_v4df
21784 = build_function_type_list (V4DF_type_node,
21785 pcv4df_type_node, V4DF_type_node,
21787 tree v4sf_ftype_pcv4sf_v4sf
21788 = build_function_type_list (V4SF_type_node,
21789 pcv4sf_type_node, V4SF_type_node,
21791 tree v2df_ftype_pcv2df_v2df
21792 = build_function_type_list (V2DF_type_node,
21793 pcv2df_type_node, V2DF_type_node,
21795 tree void_ftype_pv8sf_v8sf_v8sf
21796 = build_function_type_list (void_type_node,
21797 pv8sf_type_node, V8SF_type_node,
21800 tree void_ftype_pv4df_v4df_v4df
21801 = build_function_type_list (void_type_node,
21802 pv4df_type_node, V4DF_type_node,
21805 tree void_ftype_pv4sf_v4sf_v4sf
21806 = build_function_type_list (void_type_node,
21807 pv4sf_type_node, V4SF_type_node,
21810 tree void_ftype_pv2df_v2df_v2df
21811 = build_function_type_list (void_type_node,
21812 pv2df_type_node, V2DF_type_node,
21815 tree v4df_ftype_v2df
21816 = build_function_type_list (V4DF_type_node,
21819 tree v8sf_ftype_v4sf
21820 = build_function_type_list (V8SF_type_node,
21823 tree v8si_ftype_v4si
21824 = build_function_type_list (V8SI_type_node,
21827 tree v2df_ftype_v4df
21828 = build_function_type_list (V2DF_type_node,
21831 tree v4sf_ftype_v8sf
21832 = build_function_type_list (V4SF_type_node,
21835 tree v4si_ftype_v8si
21836 = build_function_type_list (V4SI_type_node,
21839 tree int_ftype_v4df
21840 = build_function_type_list (integer_type_node,
21843 tree int_ftype_v8sf
21844 = build_function_type_list (integer_type_node,
21847 tree int_ftype_v8sf_v8sf
21848 = build_function_type_list (integer_type_node,
21849 V8SF_type_node, V8SF_type_node,
21851 tree int_ftype_v4di_v4di
21852 = build_function_type_list (integer_type_node,
21853 V4DI_type_node, V4DI_type_node,
21855 tree int_ftype_v4df_v4df
21856 = build_function_type_list (integer_type_node,
21857 V4DF_type_node, V4DF_type_node,
21859 tree v8sf_ftype_v8sf_v8si
21860 = build_function_type_list (V8SF_type_node,
21861 V8SF_type_node, V8SI_type_node,
21863 tree v4df_ftype_v4df_v4di
21864 = build_function_type_list (V4DF_type_node,
21865 V4DF_type_node, V4DI_type_node,
21867 tree v4sf_ftype_v4sf_v4si
21868 = build_function_type_list (V4SF_type_node,
21869 V4SF_type_node, V4SI_type_node, NULL_TREE);
21870 tree v2df_ftype_v2df_v2di
21871 = build_function_type_list (V2DF_type_node,
21872 V2DF_type_node, V2DI_type_node, NULL_TREE);
21876 /* Add all special builtins with variable number of operands. */
21877 for (i = 0, d = bdesc_special_args;
21878 i < ARRAY_SIZE (bdesc_special_args);
21886 switch ((enum ix86_special_builtin_type) d->flag)
21888 case VOID_FTYPE_VOID:
21889 type = void_ftype_void;
21891 case V32QI_FTYPE_PCCHAR:
21892 type = v32qi_ftype_pcchar;
21894 case V16QI_FTYPE_PCCHAR:
21895 type = v16qi_ftype_pcchar;
21897 case V8SF_FTYPE_PCV4SF:
21898 type = v8sf_ftype_pcv4sf;
21900 case V8SF_FTYPE_PCFLOAT:
21901 type = v8sf_ftype_pcfloat;
21903 case V4DF_FTYPE_PCV2DF:
21904 type = v4df_ftype_pcv2df;
21906 case V4DF_FTYPE_PCDOUBLE:
21907 type = v4df_ftype_pcdouble;
21909 case V4SF_FTYPE_PCFLOAT:
21910 type = v4sf_ftype_pcfloat;
21912 case V2DI_FTYPE_PV2DI:
21913 type = v2di_ftype_pv2di;
21915 case V2DF_FTYPE_PCDOUBLE:
21916 type = v2df_ftype_pcdouble;
21918 case V8SF_FTYPE_PCV8SF_V8SF:
21919 type = v8sf_ftype_pcv8sf_v8sf;
21921 case V4DF_FTYPE_PCV4DF_V4DF:
21922 type = v4df_ftype_pcv4df_v4df;
21924 case V4SF_FTYPE_V4SF_PCV2SF:
21925 type = v4sf_ftype_v4sf_pcv2sf;
21927 case V4SF_FTYPE_PCV4SF_V4SF:
21928 type = v4sf_ftype_pcv4sf_v4sf;
21930 case V2DF_FTYPE_V2DF_PCDOUBLE:
21931 type = v2df_ftype_v2df_pcdouble;
21933 case V2DF_FTYPE_PCV2DF_V2DF:
21934 type = v2df_ftype_pcv2df_v2df;
21936 case VOID_FTYPE_PV2SF_V4SF:
21937 type = void_ftype_pv2sf_v4sf;
21939 case VOID_FTYPE_PV2DI_V2DI:
21940 type = void_ftype_pv2di_v2di;
21942 case VOID_FTYPE_PCHAR_V32QI:
21943 type = void_ftype_pchar_v32qi;
21945 case VOID_FTYPE_PCHAR_V16QI:
21946 type = void_ftype_pchar_v16qi;
21948 case VOID_FTYPE_PFLOAT_V8SF:
21949 type = void_ftype_pfloat_v8sf;
21951 case VOID_FTYPE_PFLOAT_V4SF:
21952 type = void_ftype_pfloat_v4sf;
21954 case VOID_FTYPE_PDOUBLE_V4DF:
21955 type = void_ftype_pdouble_v4df;
21957 case VOID_FTYPE_PDOUBLE_V2DF:
21958 type = void_ftype_pdouble_v2df;
21960 case VOID_FTYPE_PDI_DI:
21961 type = void_ftype_pdi_di;
21963 case VOID_FTYPE_PINT_INT:
21964 type = void_ftype_pint_int;
21966 case VOID_FTYPE_PV8SF_V8SF_V8SF:
21967 type = void_ftype_pv8sf_v8sf_v8sf;
21969 case VOID_FTYPE_PV4DF_V4DF_V4DF:
21970 type = void_ftype_pv4df_v4df_v4df;
21972 case VOID_FTYPE_PV4SF_V4SF_V4SF:
21973 type = void_ftype_pv4sf_v4sf_v4sf;
21975 case VOID_FTYPE_PV2DF_V2DF_V2DF:
21976 type = void_ftype_pv2df_v2df_v2df;
21979 gcc_unreachable ();
21982 def_builtin (d->mask, d->name, type, d->code);
21985 /* Add all builtins with variable number of operands. */
21986 for (i = 0, d = bdesc_args;
21987 i < ARRAY_SIZE (bdesc_args);
21995 switch ((enum ix86_builtin_type) d->flag)
21997 case FLOAT_FTYPE_FLOAT:
21998 type = float_ftype_float;
22000 case INT_FTYPE_V8SF_V8SF_PTEST:
22001 type = int_ftype_v8sf_v8sf;
22003 case INT_FTYPE_V4DI_V4DI_PTEST:
22004 type = int_ftype_v4di_v4di;
22006 case INT_FTYPE_V4DF_V4DF_PTEST:
22007 type = int_ftype_v4df_v4df;
22009 case INT_FTYPE_V4SF_V4SF_PTEST:
22010 type = int_ftype_v4sf_v4sf;
22012 case INT_FTYPE_V2DI_V2DI_PTEST:
22013 type = int_ftype_v2di_v2di;
22015 case INT_FTYPE_V2DF_V2DF_PTEST:
22016 type = int_ftype_v2df_v2df;
22018 case INT64_FTYPE_V4SF:
22019 type = int64_ftype_v4sf;
22021 case INT64_FTYPE_V2DF:
22022 type = int64_ftype_v2df;
22024 case INT_FTYPE_V16QI:
22025 type = int_ftype_v16qi;
22027 case INT_FTYPE_V8QI:
22028 type = int_ftype_v8qi;
22030 case INT_FTYPE_V8SF:
22031 type = int_ftype_v8sf;
22033 case INT_FTYPE_V4DF:
22034 type = int_ftype_v4df;
22036 case INT_FTYPE_V4SF:
22037 type = int_ftype_v4sf;
22039 case INT_FTYPE_V2DF:
22040 type = int_ftype_v2df;
22042 case V16QI_FTYPE_V16QI:
22043 type = v16qi_ftype_v16qi;
22045 case V8SI_FTYPE_V8SF:
22046 type = v8si_ftype_v8sf;
22048 case V8SI_FTYPE_V4SI:
22049 type = v8si_ftype_v4si;
22051 case V8HI_FTYPE_V8HI:
22052 type = v8hi_ftype_v8hi;
22054 case V8HI_FTYPE_V16QI:
22055 type = v8hi_ftype_v16qi;
22057 case V8QI_FTYPE_V8QI:
22058 type = v8qi_ftype_v8qi;
22060 case V8SF_FTYPE_V8SF:
22061 type = v8sf_ftype_v8sf;
22063 case V8SF_FTYPE_V8SI:
22064 type = v8sf_ftype_v8si;
22066 case V8SF_FTYPE_V4SF:
22067 type = v8sf_ftype_v4sf;
22069 case V4SI_FTYPE_V4DF:
22070 type = v4si_ftype_v4df;
22072 case V4SI_FTYPE_V4SI:
22073 type = v4si_ftype_v4si;
22075 case V4SI_FTYPE_V16QI:
22076 type = v4si_ftype_v16qi;
22078 case V4SI_FTYPE_V8SI:
22079 type = v4si_ftype_v8si;
22081 case V4SI_FTYPE_V8HI:
22082 type = v4si_ftype_v8hi;
22084 case V4SI_FTYPE_V4SF:
22085 type = v4si_ftype_v4sf;
22087 case V4SI_FTYPE_V2DF:
22088 type = v4si_ftype_v2df;
22090 case V4HI_FTYPE_V4HI:
22091 type = v4hi_ftype_v4hi;
22093 case V4DF_FTYPE_V4DF:
22094 type = v4df_ftype_v4df;
22096 case V4DF_FTYPE_V4SI:
22097 type = v4df_ftype_v4si;
22099 case V4DF_FTYPE_V4SF:
22100 type = v4df_ftype_v4sf;
22102 case V4DF_FTYPE_V2DF:
22103 type = v4df_ftype_v2df;
22105 case V4SF_FTYPE_V4SF:
22106 case V4SF_FTYPE_V4SF_VEC_MERGE:
22107 type = v4sf_ftype_v4sf;
22109 case V4SF_FTYPE_V8SF:
22110 type = v4sf_ftype_v8sf;
22112 case V4SF_FTYPE_V4SI:
22113 type = v4sf_ftype_v4si;
22115 case V4SF_FTYPE_V4DF:
22116 type = v4sf_ftype_v4df;
22118 case V4SF_FTYPE_V2DF:
22119 type = v4sf_ftype_v2df;
22121 case V2DI_FTYPE_V2DI:
22122 type = v2di_ftype_v2di;
22124 case V2DI_FTYPE_V16QI:
22125 type = v2di_ftype_v16qi;
22127 case V2DI_FTYPE_V8HI:
22128 type = v2di_ftype_v8hi;
22130 case V2DI_FTYPE_V4SI:
22131 type = v2di_ftype_v4si;
22133 case V2SI_FTYPE_V2SI:
22134 type = v2si_ftype_v2si;
22136 case V2SI_FTYPE_V4SF:
22137 type = v2si_ftype_v4sf;
22139 case V2SI_FTYPE_V2DF:
22140 type = v2si_ftype_v2df;
22142 case V2SI_FTYPE_V2SF:
22143 type = v2si_ftype_v2sf;
22145 case V2DF_FTYPE_V4DF:
22146 type = v2df_ftype_v4df;
22148 case V2DF_FTYPE_V4SF:
22149 type = v2df_ftype_v4sf;
22151 case V2DF_FTYPE_V2DF:
22152 case V2DF_FTYPE_V2DF_VEC_MERGE:
22153 type = v2df_ftype_v2df;
22155 case V2DF_FTYPE_V2SI:
22156 type = v2df_ftype_v2si;
22158 case V2DF_FTYPE_V4SI:
22159 type = v2df_ftype_v4si;
22161 case V2SF_FTYPE_V2SF:
22162 type = v2sf_ftype_v2sf;
22164 case V2SF_FTYPE_V2SI:
22165 type = v2sf_ftype_v2si;
22167 case V16QI_FTYPE_V16QI_V16QI:
22168 type = v16qi_ftype_v16qi_v16qi;
22170 case V16QI_FTYPE_V8HI_V8HI:
22171 type = v16qi_ftype_v8hi_v8hi;
22173 case V8QI_FTYPE_V8QI_V8QI:
22174 type = v8qi_ftype_v8qi_v8qi;
22176 case V8QI_FTYPE_V4HI_V4HI:
22177 type = v8qi_ftype_v4hi_v4hi;
22179 case V8HI_FTYPE_V8HI_V8HI:
22180 case V8HI_FTYPE_V8HI_V8HI_COUNT:
22181 type = v8hi_ftype_v8hi_v8hi;
22183 case V8HI_FTYPE_V16QI_V16QI:
22184 type = v8hi_ftype_v16qi_v16qi;
22186 case V8HI_FTYPE_V4SI_V4SI:
22187 type = v8hi_ftype_v4si_v4si;
22189 case V8HI_FTYPE_V8HI_SI_COUNT:
22190 type = v8hi_ftype_v8hi_int;
22192 case V8SF_FTYPE_V8SF_V8SF:
22193 type = v8sf_ftype_v8sf_v8sf;
22195 case V8SF_FTYPE_V8SF_V8SI:
22196 type = v8sf_ftype_v8sf_v8si;
22198 case V4SI_FTYPE_V4SI_V4SI:
22199 case V4SI_FTYPE_V4SI_V4SI_COUNT:
22200 type = v4si_ftype_v4si_v4si;
22202 case V4SI_FTYPE_V8HI_V8HI:
22203 type = v4si_ftype_v8hi_v8hi;
22205 case V4SI_FTYPE_V4SF_V4SF:
22206 type = v4si_ftype_v4sf_v4sf;
22208 case V4SI_FTYPE_V2DF_V2DF:
22209 type = v4si_ftype_v2df_v2df;
22211 case V4SI_FTYPE_V4SI_SI_COUNT:
22212 type = v4si_ftype_v4si_int;
22214 case V4HI_FTYPE_V4HI_V4HI:
22215 case V4HI_FTYPE_V4HI_V4HI_COUNT:
22216 type = v4hi_ftype_v4hi_v4hi;
22218 case V4HI_FTYPE_V8QI_V8QI:
22219 type = v4hi_ftype_v8qi_v8qi;
22221 case V4HI_FTYPE_V2SI_V2SI:
22222 type = v4hi_ftype_v2si_v2si;
22224 case V4HI_FTYPE_V4HI_SI_COUNT:
22225 type = v4hi_ftype_v4hi_int;
22227 case V4DF_FTYPE_V4DF_V4DF:
22228 type = v4df_ftype_v4df_v4df;
22230 case V4DF_FTYPE_V4DF_V4DI:
22231 type = v4df_ftype_v4df_v4di;
22233 case V4SF_FTYPE_V4SF_V4SF:
22234 case V4SF_FTYPE_V4SF_V4SF_SWAP:
22235 type = v4sf_ftype_v4sf_v4sf;
22237 case V4SF_FTYPE_V4SF_V4SI:
22238 type = v4sf_ftype_v4sf_v4si;
22240 case V4SF_FTYPE_V4SF_V2SI:
22241 type = v4sf_ftype_v4sf_v2si;
22243 case V4SF_FTYPE_V4SF_V2DF:
22244 type = v4sf_ftype_v4sf_v2df;
22246 case V4SF_FTYPE_V4SF_DI:
22247 type = v4sf_ftype_v4sf_int64;
22249 case V4SF_FTYPE_V4SF_SI:
22250 type = v4sf_ftype_v4sf_int;
22252 case V2DI_FTYPE_V2DI_V2DI:
22253 case V2DI_FTYPE_V2DI_V2DI_COUNT:
22254 type = v2di_ftype_v2di_v2di;
22256 case V2DI_FTYPE_V16QI_V16QI:
22257 type = v2di_ftype_v16qi_v16qi;
22259 case V2DI_FTYPE_V4SI_V4SI:
22260 type = v2di_ftype_v4si_v4si;
22262 case V2DI_FTYPE_V2DI_V16QI:
22263 type = v2di_ftype_v2di_v16qi;
22265 case V2DI_FTYPE_V2DF_V2DF:
22266 type = v2di_ftype_v2df_v2df;
22268 case V2DI_FTYPE_V2DI_SI_COUNT:
22269 type = v2di_ftype_v2di_int;
22271 case V2SI_FTYPE_V2SI_V2SI:
22272 case V2SI_FTYPE_V2SI_V2SI_COUNT:
22273 type = v2si_ftype_v2si_v2si;
22275 case V2SI_FTYPE_V4HI_V4HI:
22276 type = v2si_ftype_v4hi_v4hi;
22278 case V2SI_FTYPE_V2SF_V2SF:
22279 type = v2si_ftype_v2sf_v2sf;
22281 case V2SI_FTYPE_V2SI_SI_COUNT:
22282 type = v2si_ftype_v2si_int;
22284 case V2DF_FTYPE_V2DF_V2DF:
22285 case V2DF_FTYPE_V2DF_V2DF_SWAP:
22286 type = v2df_ftype_v2df_v2df;
22288 case V2DF_FTYPE_V2DF_V4SF:
22289 type = v2df_ftype_v2df_v4sf;
22291 case V2DF_FTYPE_V2DF_V2DI:
22292 type = v2df_ftype_v2df_v2di;
22294 case V2DF_FTYPE_V2DF_DI:
22295 type = v2df_ftype_v2df_int64;
22297 case V2DF_FTYPE_V2DF_SI:
22298 type = v2df_ftype_v2df_int;
22300 case V2SF_FTYPE_V2SF_V2SF:
22301 type = v2sf_ftype_v2sf_v2sf;
22303 case V1DI_FTYPE_V1DI_V1DI:
22304 case V1DI_FTYPE_V1DI_V1DI_COUNT:
22305 type = v1di_ftype_v1di_v1di;
22307 case V1DI_FTYPE_V8QI_V8QI:
22308 type = v1di_ftype_v8qi_v8qi;
22310 case V1DI_FTYPE_V2SI_V2SI:
22311 type = v1di_ftype_v2si_v2si;
22313 case V1DI_FTYPE_V1DI_SI_COUNT:
22314 type = v1di_ftype_v1di_int;
22316 case UINT64_FTYPE_UINT64_UINT64:
22317 type = uint64_ftype_uint64_uint64;
22319 case UINT_FTYPE_UINT_UINT:
22320 type = unsigned_ftype_unsigned_unsigned;
22322 case UINT_FTYPE_UINT_USHORT:
22323 type = unsigned_ftype_unsigned_ushort;
22325 case UINT_FTYPE_UINT_UCHAR:
22326 type = unsigned_ftype_unsigned_uchar;
22328 case V8HI_FTYPE_V8HI_INT:
22329 type = v8hi_ftype_v8hi_int;
22331 case V8SF_FTYPE_V8SF_INT:
22332 type = v8sf_ftype_v8sf_int;
22334 case V4SI_FTYPE_V4SI_INT:
22335 type = v4si_ftype_v4si_int;
22337 case V4SI_FTYPE_V8SI_INT:
22338 type = v4si_ftype_v8si_int;
22340 case V4HI_FTYPE_V4HI_INT:
22341 type = v4hi_ftype_v4hi_int;
22343 case V4DF_FTYPE_V4DF_INT:
22344 type = v4df_ftype_v4df_int;
22346 case V4SF_FTYPE_V4SF_INT:
22347 type = v4sf_ftype_v4sf_int;
22349 case V4SF_FTYPE_V8SF_INT:
22350 type = v4sf_ftype_v8sf_int;
22352 case V2DI_FTYPE_V2DI_INT:
22353 case V2DI2TI_FTYPE_V2DI_INT:
22354 type = v2di_ftype_v2di_int;
22356 case V2DF_FTYPE_V2DF_INT:
22357 type = v2df_ftype_v2df_int;
22359 case V2DF_FTYPE_V4DF_INT:
22360 type = v2df_ftype_v4df_int;
22362 case V16QI_FTYPE_V16QI_V16QI_V16QI:
22363 type = v16qi_ftype_v16qi_v16qi_v16qi;
22365 case V8SF_FTYPE_V8SF_V8SF_V8SF:
22366 type = v8sf_ftype_v8sf_v8sf_v8sf;
22368 case V4DF_FTYPE_V4DF_V4DF_V4DF:
22369 type = v4df_ftype_v4df_v4df_v4df;
22371 case V4SF_FTYPE_V4SF_V4SF_V4SF:
22372 type = v4sf_ftype_v4sf_v4sf_v4sf;
22374 case V2DF_FTYPE_V2DF_V2DF_V2DF:
22375 type = v2df_ftype_v2df_v2df_v2df;
22377 case V16QI_FTYPE_V16QI_V16QI_INT:
22378 type = v16qi_ftype_v16qi_v16qi_int;
22380 case V8SI_FTYPE_V8SI_V8SI_INT:
22381 type = v8si_ftype_v8si_v8si_int;
22383 case V8SI_FTYPE_V8SI_V4SI_INT:
22384 type = v8si_ftype_v8si_v4si_int;
22386 case V8HI_FTYPE_V8HI_V8HI_INT:
22387 type = v8hi_ftype_v8hi_v8hi_int;
22389 case V8SF_FTYPE_V8SF_V8SF_INT:
22390 type = v8sf_ftype_v8sf_v8sf_int;
22392 case V8SF_FTYPE_V8SF_V4SF_INT:
22393 type = v8sf_ftype_v8sf_v4sf_int;
22395 case V4SI_FTYPE_V4SI_V4SI_INT:
22396 type = v4si_ftype_v4si_v4si_int;
22398 case V4DF_FTYPE_V4DF_V4DF_INT:
22399 type = v4df_ftype_v4df_v4df_int;
22401 case V4DF_FTYPE_V4DF_V2DF_INT:
22402 type = v4df_ftype_v4df_v2df_int;
22404 case V4SF_FTYPE_V4SF_V4SF_INT:
22405 type = v4sf_ftype_v4sf_v4sf_int;
22407 case V2DI_FTYPE_V2DI_V2DI_INT:
22408 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
22409 type = v2di_ftype_v2di_v2di_int;
22411 case V2DF_FTYPE_V2DF_V2DF_INT:
22412 type = v2df_ftype_v2df_v2df_int;
22414 case V2DI_FTYPE_V2DI_UINT_UINT:
22415 type = v2di_ftype_v2di_unsigned_unsigned;
22417 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
22418 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
22420 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
22421 type = v1di_ftype_v1di_v1di_int;
22423 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
22424 type = v8sf_ftype_v8sf_v8sf_v8si_int;
22426 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
22427 type = v4df_ftype_v4df_v4df_v4di_int;
22429 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
22430 type = v4sf_ftype_v4sf_v4sf_v4si_int;
22432 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
22433 type = v2df_ftype_v2df_v2df_v2di_int;
22436 gcc_unreachable ();
22439 def_builtin_const (d->mask, d->name, type, d->code);
22442 /* pcmpestr[im] insns. */
22443 for (i = 0, d = bdesc_pcmpestr;
22444 i < ARRAY_SIZE (bdesc_pcmpestr);
22447 if (d->code == IX86_BUILTIN_PCMPESTRM128)
22448 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
22450 ftype = int_ftype_v16qi_int_v16qi_int_int;
22451 def_builtin_const (d->mask, d->name, ftype, d->code);
22454 /* pcmpistr[im] insns. */
22455 for (i = 0, d = bdesc_pcmpistr;
22456 i < ARRAY_SIZE (bdesc_pcmpistr);
22459 if (d->code == IX86_BUILTIN_PCMPISTRM128)
22460 ftype = v16qi_ftype_v16qi_v16qi_int;
22462 ftype = int_ftype_v16qi_v16qi_int;
22463 def_builtin_const (d->mask, d->name, ftype, d->code);
22466 /* comi/ucomi insns. */
22467 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22468 if (d->mask == OPTION_MASK_ISA_SSE2)
22469 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
22471 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
22474 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
22475 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
22477 /* SSE or 3DNow!A */
22478 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
22481 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
22483 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
22484 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
22487 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
22488 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
22491 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
22492 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
22493 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
22494 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
22495 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
22496 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
22499 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
22502 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
22503 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
22505 /* Access to the vec_init patterns. */
22506 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
22507 integer_type_node, NULL_TREE);
22508 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
22510 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
22511 short_integer_type_node,
22512 short_integer_type_node,
22513 short_integer_type_node, NULL_TREE);
22514 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
22516 ftype = build_function_type_list (V8QI_type_node, char_type_node,
22517 char_type_node, char_type_node,
22518 char_type_node, char_type_node,
22519 char_type_node, char_type_node,
22520 char_type_node, NULL_TREE);
22521 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
22523 /* Access to the vec_extract patterns. */
22524 ftype = build_function_type_list (double_type_node, V2DF_type_node,
22525 integer_type_node, NULL_TREE);
22526 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
22528 ftype = build_function_type_list (long_long_integer_type_node,
22529 V2DI_type_node, integer_type_node,
22531 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
22533 ftype = build_function_type_list (float_type_node, V4SF_type_node,
22534 integer_type_node, NULL_TREE);
22535 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
22537 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
22538 integer_type_node, NULL_TREE);
22539 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
22541 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
22542 integer_type_node, NULL_TREE);
22543 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
22545 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
22546 integer_type_node, NULL_TREE);
22547 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
22549 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
22550 integer_type_node, NULL_TREE);
22551 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
22553 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
22554 integer_type_node, NULL_TREE);
22555 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
22557 /* Access to the vec_set patterns. */
22558 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
22560 integer_type_node, NULL_TREE);
22561 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
22563 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
22565 integer_type_node, NULL_TREE);
22566 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
22568 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
22570 integer_type_node, NULL_TREE);
22571 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
22573 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
22575 integer_type_node, NULL_TREE);
22576 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
22578 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
22580 integer_type_node, NULL_TREE);
22581 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
22583 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
22585 integer_type_node, NULL_TREE);
22586 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
22588 /* Add SSE5 multi-arg argument instructions */
22589 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
22591 tree mtype = NULL_TREE;
22596 switch ((enum multi_arg_type)d->flag)
22598 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
22599 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
22600 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
22601 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
22602 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
22603 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
22604 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
22605 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
22606 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
22607 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
22608 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
22609 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
22610 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
22611 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
22612 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
22613 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
22614 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
22615 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
22616 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
22617 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
22618 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
22619 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
22620 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
22621 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
22622 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
22623 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
22624 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
22625 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
22626 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
22627 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
22628 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
22629 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
22630 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
22631 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
22632 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
22633 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
22634 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
22635 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
22636 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
22637 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
22638 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
22639 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
22640 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
22641 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
22642 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
22643 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
22644 case MULTI_ARG_UNKNOWN:
22646 gcc_unreachable ();
22650 def_builtin_const (d->mask, d->name, mtype, d->code);
22654 /* Internal method for ix86_init_builtins. */
22657 ix86_init_builtins_va_builtins_abi (void)
22659 tree ms_va_ref, sysv_va_ref;
22660 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
22661 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
22662 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
22663 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
22667 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
22668 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
22669 ms_va_ref = build_reference_type (ms_va_list_type_node);
22671 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
22674 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22675 fnvoid_va_start_ms =
22676 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22677 fnvoid_va_end_sysv =
22678 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
22679 fnvoid_va_start_sysv =
22680 build_varargs_function_type_list (void_type_node, sysv_va_ref,
22682 fnvoid_va_copy_ms =
22683 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
22685 fnvoid_va_copy_sysv =
22686 build_function_type_list (void_type_node, sysv_va_ref,
22687 sysv_va_ref, NULL_TREE);
22689 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
22690 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
22691 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
22692 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
22693 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
22694 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
22695 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
22696 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22697 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
22698 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22699 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
22700 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22704 ix86_init_builtins (void)
22706 tree float128_type_node = make_node (REAL_TYPE);
22709 /* The __float80 type. */
22710 if (TYPE_MODE (long_double_type_node) == XFmode)
22711 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
22715 /* The __float80 type. */
22716 tree float80_type_node = make_node (REAL_TYPE);
22718 TYPE_PRECISION (float80_type_node) = 80;
22719 layout_type (float80_type_node);
22720 (*lang_hooks.types.register_builtin_type) (float80_type_node,
22724 /* The __float128 type. */
22725 TYPE_PRECISION (float128_type_node) = 128;
22726 layout_type (float128_type_node);
22727 (*lang_hooks.types.register_builtin_type) (float128_type_node,
22730 /* TFmode support builtins. */
22731 ftype = build_function_type (float128_type_node, void_list_node);
22732 decl = add_builtin_function ("__builtin_infq", ftype,
22733 IX86_BUILTIN_INFQ, BUILT_IN_MD,
22735 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
22737 /* We will expand them to normal call if SSE2 isn't available since
22738 they are used by libgcc. */
22739 ftype = build_function_type_list (float128_type_node,
22740 float128_type_node,
22742 decl = add_builtin_function ("__builtin_fabsq", ftype,
22743 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
22744 "__fabstf2", NULL_TREE);
22745 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
22746 TREE_READONLY (decl) = 1;
22748 ftype = build_function_type_list (float128_type_node,
22749 float128_type_node,
22750 float128_type_node,
22752 decl = add_builtin_function ("__builtin_copysignq", ftype,
22753 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
22754 "__copysigntf3", NULL_TREE);
22755 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
22756 TREE_READONLY (decl) = 1;
22758 ix86_init_mmx_sse_builtins ();
22760 ix86_init_builtins_va_builtins_abi ();
22763 /* Errors in the source file can cause expand_expr to return const0_rtx
22764 where we expect a vector. To avoid crashing, use one of the vector
22765 clear instructions. */
22767 safe_vector_operand (rtx x, enum machine_mode mode)
22769 if (x == const0_rtx)
22770 x = CONST0_RTX (mode);
22774 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
22777 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
22780 tree arg0 = CALL_EXPR_ARG (exp, 0);
22781 tree arg1 = CALL_EXPR_ARG (exp, 1);
22782 rtx op0 = expand_normal (arg0);
22783 rtx op1 = expand_normal (arg1);
22784 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22785 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22786 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22788 if (VECTOR_MODE_P (mode0))
22789 op0 = safe_vector_operand (op0, mode0);
22790 if (VECTOR_MODE_P (mode1))
22791 op1 = safe_vector_operand (op1, mode1);
22793 if (optimize || !target
22794 || GET_MODE (target) != tmode
22795 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22796 target = gen_reg_rtx (tmode);
22798 if (GET_MODE (op1) == SImode && mode1 == TImode)
22800 rtx x = gen_reg_rtx (V4SImode);
22801 emit_insn (gen_sse2_loadd (x, op1));
22802 op1 = gen_lowpart (TImode, x);
22805 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22806 op0 = copy_to_mode_reg (mode0, op0);
22807 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22808 op1 = copy_to_mode_reg (mode1, op1);
22810 pat = GEN_FCN (icode) (target, op0, op1);
22819 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
22822 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
22823 enum multi_arg_type m_type,
22824 enum insn_code sub_code)
22829 bool comparison_p = false;
22831 bool last_arg_constant = false;
22832 int num_memory = 0;
22835 enum machine_mode mode;
22838 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22842 case MULTI_ARG_3_SF:
22843 case MULTI_ARG_3_DF:
22844 case MULTI_ARG_3_DI:
22845 case MULTI_ARG_3_SI:
22846 case MULTI_ARG_3_SI_DI:
22847 case MULTI_ARG_3_HI:
22848 case MULTI_ARG_3_HI_SI:
22849 case MULTI_ARG_3_QI:
22850 case MULTI_ARG_3_PERMPS:
22851 case MULTI_ARG_3_PERMPD:
22855 case MULTI_ARG_2_SF:
22856 case MULTI_ARG_2_DF:
22857 case MULTI_ARG_2_DI:
22858 case MULTI_ARG_2_SI:
22859 case MULTI_ARG_2_HI:
22860 case MULTI_ARG_2_QI:
22864 case MULTI_ARG_2_DI_IMM:
22865 case MULTI_ARG_2_SI_IMM:
22866 case MULTI_ARG_2_HI_IMM:
22867 case MULTI_ARG_2_QI_IMM:
22869 last_arg_constant = true;
22872 case MULTI_ARG_1_SF:
22873 case MULTI_ARG_1_DF:
22874 case MULTI_ARG_1_DI:
22875 case MULTI_ARG_1_SI:
22876 case MULTI_ARG_1_HI:
22877 case MULTI_ARG_1_QI:
22878 case MULTI_ARG_1_SI_DI:
22879 case MULTI_ARG_1_HI_DI:
22880 case MULTI_ARG_1_HI_SI:
22881 case MULTI_ARG_1_QI_DI:
22882 case MULTI_ARG_1_QI_SI:
22883 case MULTI_ARG_1_QI_HI:
22884 case MULTI_ARG_1_PH2PS:
22885 case MULTI_ARG_1_PS2PH:
22889 case MULTI_ARG_2_SF_CMP:
22890 case MULTI_ARG_2_DF_CMP:
22891 case MULTI_ARG_2_DI_CMP:
22892 case MULTI_ARG_2_SI_CMP:
22893 case MULTI_ARG_2_HI_CMP:
22894 case MULTI_ARG_2_QI_CMP:
22896 comparison_p = true;
22899 case MULTI_ARG_2_SF_TF:
22900 case MULTI_ARG_2_DF_TF:
22901 case MULTI_ARG_2_DI_TF:
22902 case MULTI_ARG_2_SI_TF:
22903 case MULTI_ARG_2_HI_TF:
22904 case MULTI_ARG_2_QI_TF:
22909 case MULTI_ARG_UNKNOWN:
22911 gcc_unreachable ();
22914 if (optimize || !target
22915 || GET_MODE (target) != tmode
22916 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22917 target = gen_reg_rtx (tmode);
22919 gcc_assert (nargs <= 4);
22921 for (i = 0; i < nargs; i++)
22923 tree arg = CALL_EXPR_ARG (exp, i);
22924 rtx op = expand_normal (arg);
22925 int adjust = (comparison_p) ? 1 : 0;
22926 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
22928 if (last_arg_constant && i == nargs-1)
22930 if (GET_CODE (op) != CONST_INT)
22932 error ("last argument must be an immediate");
22933 return gen_reg_rtx (tmode);
22938 if (VECTOR_MODE_P (mode))
22939 op = safe_vector_operand (op, mode);
22941 /* If we aren't optimizing, only allow one memory operand to be
22943 if (memory_operand (op, mode))
22946 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
22949 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
22951 op = force_reg (mode, op);
22955 args[i].mode = mode;
22961 pat = GEN_FCN (icode) (target, args[0].op);
22966 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
22967 GEN_INT ((int)sub_code));
22968 else if (! comparison_p)
22969 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
22972 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
22976 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
22981 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
22985 gcc_unreachable ();
22995 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
22996 insns with vec_merge. */
22999 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23003 tree arg0 = CALL_EXPR_ARG (exp, 0);
23004 rtx op1, op0 = expand_normal (arg0);
23005 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23006 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23008 if (optimize || !target
23009 || GET_MODE (target) != tmode
23010 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23011 target = gen_reg_rtx (tmode);
23013 if (VECTOR_MODE_P (mode0))
23014 op0 = safe_vector_operand (op0, mode0);
23016 if ((optimize && !register_operand (op0, mode0))
23017 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23018 op0 = copy_to_mode_reg (mode0, op0);
23021 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23022 op1 = copy_to_mode_reg (mode0, op1);
23024 pat = GEN_FCN (icode) (target, op0, op1);
23031 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23034 ix86_expand_sse_compare (const struct builtin_description *d,
23035 tree exp, rtx target, bool swap)
23038 tree arg0 = CALL_EXPR_ARG (exp, 0);
23039 tree arg1 = CALL_EXPR_ARG (exp, 1);
23040 rtx op0 = expand_normal (arg0);
23041 rtx op1 = expand_normal (arg1);
23043 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23044 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23045 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23046 enum rtx_code comparison = d->comparison;
23048 if (VECTOR_MODE_P (mode0))
23049 op0 = safe_vector_operand (op0, mode0);
23050 if (VECTOR_MODE_P (mode1))
23051 op1 = safe_vector_operand (op1, mode1);
23053 /* Swap operands if we have a comparison that isn't available in
23057 rtx tmp = gen_reg_rtx (mode1);
23058 emit_move_insn (tmp, op1);
23063 if (optimize || !target
23064 || GET_MODE (target) != tmode
23065 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23066 target = gen_reg_rtx (tmode);
23068 if ((optimize && !register_operand (op0, mode0))
23069 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23070 op0 = copy_to_mode_reg (mode0, op0);
23071 if ((optimize && !register_operand (op1, mode1))
23072 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23073 op1 = copy_to_mode_reg (mode1, op1);
23075 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23076 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23083 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23086 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23090 tree arg0 = CALL_EXPR_ARG (exp, 0);
23091 tree arg1 = CALL_EXPR_ARG (exp, 1);
23092 rtx op0 = expand_normal (arg0);
23093 rtx op1 = expand_normal (arg1);
23094 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23095 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23096 enum rtx_code comparison = d->comparison;
23098 if (VECTOR_MODE_P (mode0))
23099 op0 = safe_vector_operand (op0, mode0);
23100 if (VECTOR_MODE_P (mode1))
23101 op1 = safe_vector_operand (op1, mode1);
23103 /* Swap operands if we have a comparison that isn't available in
23105 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23112 target = gen_reg_rtx (SImode);
23113 emit_move_insn (target, const0_rtx);
23114 target = gen_rtx_SUBREG (QImode, target, 0);
23116 if ((optimize && !register_operand (op0, mode0))
23117 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23118 op0 = copy_to_mode_reg (mode0, op0);
23119 if ((optimize && !register_operand (op1, mode1))
23120 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23121 op1 = copy_to_mode_reg (mode1, op1);
23123 pat = GEN_FCN (d->icode) (op0, op1);
23127 emit_insn (gen_rtx_SET (VOIDmode,
23128 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23129 gen_rtx_fmt_ee (comparison, QImode,
23133 return SUBREG_REG (target);
23136 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
23139 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23143 tree arg0 = CALL_EXPR_ARG (exp, 0);
23144 tree arg1 = CALL_EXPR_ARG (exp, 1);
23145 rtx op0 = expand_normal (arg0);
23146 rtx op1 = expand_normal (arg1);
23147 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23148 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23149 enum rtx_code comparison = d->comparison;
23151 if (VECTOR_MODE_P (mode0))
23152 op0 = safe_vector_operand (op0, mode0);
23153 if (VECTOR_MODE_P (mode1))
23154 op1 = safe_vector_operand (op1, mode1);
23156 target = gen_reg_rtx (SImode);
23157 emit_move_insn (target, const0_rtx);
23158 target = gen_rtx_SUBREG (QImode, target, 0);
23160 if ((optimize && !register_operand (op0, mode0))
23161 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23162 op0 = copy_to_mode_reg (mode0, op0);
23163 if ((optimize && !register_operand (op1, mode1))
23164 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23165 op1 = copy_to_mode_reg (mode1, op1);
23167 pat = GEN_FCN (d->icode) (op0, op1);
23171 emit_insn (gen_rtx_SET (VOIDmode,
23172 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23173 gen_rtx_fmt_ee (comparison, QImode,
23177 return SUBREG_REG (target);
23180 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
23183 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23184 tree exp, rtx target)
23187 tree arg0 = CALL_EXPR_ARG (exp, 0);
23188 tree arg1 = CALL_EXPR_ARG (exp, 1);
23189 tree arg2 = CALL_EXPR_ARG (exp, 2);
23190 tree arg3 = CALL_EXPR_ARG (exp, 3);
23191 tree arg4 = CALL_EXPR_ARG (exp, 4);
23192 rtx scratch0, scratch1;
23193 rtx op0 = expand_normal (arg0);
23194 rtx op1 = expand_normal (arg1);
23195 rtx op2 = expand_normal (arg2);
23196 rtx op3 = expand_normal (arg3);
23197 rtx op4 = expand_normal (arg4);
23198 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23200 tmode0 = insn_data[d->icode].operand[0].mode;
23201 tmode1 = insn_data[d->icode].operand[1].mode;
23202 modev2 = insn_data[d->icode].operand[2].mode;
23203 modei3 = insn_data[d->icode].operand[3].mode;
23204 modev4 = insn_data[d->icode].operand[4].mode;
23205 modei5 = insn_data[d->icode].operand[5].mode;
23206 modeimm = insn_data[d->icode].operand[6].mode;
23208 if (VECTOR_MODE_P (modev2))
23209 op0 = safe_vector_operand (op0, modev2);
23210 if (VECTOR_MODE_P (modev4))
23211 op2 = safe_vector_operand (op2, modev4);
23213 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23214 op0 = copy_to_mode_reg (modev2, op0);
23215 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23216 op1 = copy_to_mode_reg (modei3, op1);
23217 if ((optimize && !register_operand (op2, modev4))
23218 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23219 op2 = copy_to_mode_reg (modev4, op2);
23220 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23221 op3 = copy_to_mode_reg (modei5, op3);
23223 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23225 error ("the fifth argument must be a 8-bit immediate");
23229 if (d->code == IX86_BUILTIN_PCMPESTRI128)
23231 if (optimize || !target
23232 || GET_MODE (target) != tmode0
23233 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23234 target = gen_reg_rtx (tmode0);
23236 scratch1 = gen_reg_rtx (tmode1);
23238 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23240 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23242 if (optimize || !target
23243 || GET_MODE (target) != tmode1
23244 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23245 target = gen_reg_rtx (tmode1);
23247 scratch0 = gen_reg_rtx (tmode0);
23249 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23253 gcc_assert (d->flag);
23255 scratch0 = gen_reg_rtx (tmode0);
23256 scratch1 = gen_reg_rtx (tmode1);
23258 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23268 target = gen_reg_rtx (SImode);
23269 emit_move_insn (target, const0_rtx);
23270 target = gen_rtx_SUBREG (QImode, target, 0);
23273 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23274 gen_rtx_fmt_ee (EQ, QImode,
23275 gen_rtx_REG ((enum machine_mode) d->flag,
23278 return SUBREG_REG (target);
23285 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
23288 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23289 tree exp, rtx target)
23292 tree arg0 = CALL_EXPR_ARG (exp, 0);
23293 tree arg1 = CALL_EXPR_ARG (exp, 1);
23294 tree arg2 = CALL_EXPR_ARG (exp, 2);
23295 rtx scratch0, scratch1;
23296 rtx op0 = expand_normal (arg0);
23297 rtx op1 = expand_normal (arg1);
23298 rtx op2 = expand_normal (arg2);
23299 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23301 tmode0 = insn_data[d->icode].operand[0].mode;
23302 tmode1 = insn_data[d->icode].operand[1].mode;
23303 modev2 = insn_data[d->icode].operand[2].mode;
23304 modev3 = insn_data[d->icode].operand[3].mode;
23305 modeimm = insn_data[d->icode].operand[4].mode;
23307 if (VECTOR_MODE_P (modev2))
23308 op0 = safe_vector_operand (op0, modev2);
23309 if (VECTOR_MODE_P (modev3))
23310 op1 = safe_vector_operand (op1, modev3);
23312 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23313 op0 = copy_to_mode_reg (modev2, op0);
23314 if ((optimize && !register_operand (op1, modev3))
23315 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23316 op1 = copy_to_mode_reg (modev3, op1);
23318 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
23320 error ("the third argument must be a 8-bit immediate");
23324 if (d->code == IX86_BUILTIN_PCMPISTRI128)
23326 if (optimize || !target
23327 || GET_MODE (target) != tmode0
23328 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23329 target = gen_reg_rtx (tmode0);
23331 scratch1 = gen_reg_rtx (tmode1);
23333 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
23335 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
23337 if (optimize || !target
23338 || GET_MODE (target) != tmode1
23339 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23340 target = gen_reg_rtx (tmode1);
23342 scratch0 = gen_reg_rtx (tmode0);
23344 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
23348 gcc_assert (d->flag);
23350 scratch0 = gen_reg_rtx (tmode0);
23351 scratch1 = gen_reg_rtx (tmode1);
23353 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
23363 target = gen_reg_rtx (SImode);
23364 emit_move_insn (target, const0_rtx);
23365 target = gen_rtx_SUBREG (QImode, target, 0);
23368 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23369 gen_rtx_fmt_ee (EQ, QImode,
23370 gen_rtx_REG ((enum machine_mode) d->flag,
23373 return SUBREG_REG (target);
23379 /* Subroutine of ix86_expand_builtin to take care of insns with
23380 variable number of operands. */
23383 ix86_expand_args_builtin (const struct builtin_description *d,
23384 tree exp, rtx target)
23386 rtx pat, real_target;
23387 unsigned int i, nargs;
23388 unsigned int nargs_constant = 0;
23389 int num_memory = 0;
23393 enum machine_mode mode;
23395 bool last_arg_count = false;
23396 enum insn_code icode = d->icode;
23397 const struct insn_data *insn_p = &insn_data[icode];
23398 enum machine_mode tmode = insn_p->operand[0].mode;
23399 enum machine_mode rmode = VOIDmode;
23401 enum rtx_code comparison = d->comparison;
23403 switch ((enum ix86_builtin_type) d->flag)
23405 case INT_FTYPE_V8SF_V8SF_PTEST:
23406 case INT_FTYPE_V4DI_V4DI_PTEST:
23407 case INT_FTYPE_V4DF_V4DF_PTEST:
23408 case INT_FTYPE_V4SF_V4SF_PTEST:
23409 case INT_FTYPE_V2DI_V2DI_PTEST:
23410 case INT_FTYPE_V2DF_V2DF_PTEST:
23411 return ix86_expand_sse_ptest (d, exp, target);
23412 case FLOAT128_FTYPE_FLOAT128:
23413 case FLOAT_FTYPE_FLOAT:
23414 case INT64_FTYPE_V4SF:
23415 case INT64_FTYPE_V2DF:
23416 case INT_FTYPE_V16QI:
23417 case INT_FTYPE_V8QI:
23418 case INT_FTYPE_V8SF:
23419 case INT_FTYPE_V4DF:
23420 case INT_FTYPE_V4SF:
23421 case INT_FTYPE_V2DF:
23422 case V16QI_FTYPE_V16QI:
23423 case V8SI_FTYPE_V8SF:
23424 case V8SI_FTYPE_V4SI:
23425 case V8HI_FTYPE_V8HI:
23426 case V8HI_FTYPE_V16QI:
23427 case V8QI_FTYPE_V8QI:
23428 case V8SF_FTYPE_V8SF:
23429 case V8SF_FTYPE_V8SI:
23430 case V8SF_FTYPE_V4SF:
23431 case V4SI_FTYPE_V4SI:
23432 case V4SI_FTYPE_V16QI:
23433 case V4SI_FTYPE_V4SF:
23434 case V4SI_FTYPE_V8SI:
23435 case V4SI_FTYPE_V8HI:
23436 case V4SI_FTYPE_V4DF:
23437 case V4SI_FTYPE_V2DF:
23438 case V4HI_FTYPE_V4HI:
23439 case V4DF_FTYPE_V4DF:
23440 case V4DF_FTYPE_V4SI:
23441 case V4DF_FTYPE_V4SF:
23442 case V4DF_FTYPE_V2DF:
23443 case V4SF_FTYPE_V4SF:
23444 case V4SF_FTYPE_V4SI:
23445 case V4SF_FTYPE_V8SF:
23446 case V4SF_FTYPE_V4DF:
23447 case V4SF_FTYPE_V2DF:
23448 case V2DI_FTYPE_V2DI:
23449 case V2DI_FTYPE_V16QI:
23450 case V2DI_FTYPE_V8HI:
23451 case V2DI_FTYPE_V4SI:
23452 case V2DF_FTYPE_V2DF:
23453 case V2DF_FTYPE_V4SI:
23454 case V2DF_FTYPE_V4DF:
23455 case V2DF_FTYPE_V4SF:
23456 case V2DF_FTYPE_V2SI:
23457 case V2SI_FTYPE_V2SI:
23458 case V2SI_FTYPE_V4SF:
23459 case V2SI_FTYPE_V2SF:
23460 case V2SI_FTYPE_V2DF:
23461 case V2SF_FTYPE_V2SF:
23462 case V2SF_FTYPE_V2SI:
23465 case V4SF_FTYPE_V4SF_VEC_MERGE:
23466 case V2DF_FTYPE_V2DF_VEC_MERGE:
23467 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
23468 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
23469 case V16QI_FTYPE_V16QI_V16QI:
23470 case V16QI_FTYPE_V8HI_V8HI:
23471 case V8QI_FTYPE_V8QI_V8QI:
23472 case V8QI_FTYPE_V4HI_V4HI:
23473 case V8HI_FTYPE_V8HI_V8HI:
23474 case V8HI_FTYPE_V16QI_V16QI:
23475 case V8HI_FTYPE_V4SI_V4SI:
23476 case V8SF_FTYPE_V8SF_V8SF:
23477 case V8SF_FTYPE_V8SF_V8SI:
23478 case V4SI_FTYPE_V4SI_V4SI:
23479 case V4SI_FTYPE_V8HI_V8HI:
23480 case V4SI_FTYPE_V4SF_V4SF:
23481 case V4SI_FTYPE_V2DF_V2DF:
23482 case V4HI_FTYPE_V4HI_V4HI:
23483 case V4HI_FTYPE_V8QI_V8QI:
23484 case V4HI_FTYPE_V2SI_V2SI:
23485 case V4DF_FTYPE_V4DF_V4DF:
23486 case V4DF_FTYPE_V4DF_V4DI:
23487 case V4SF_FTYPE_V4SF_V4SF:
23488 case V4SF_FTYPE_V4SF_V4SI:
23489 case V4SF_FTYPE_V4SF_V2SI:
23490 case V4SF_FTYPE_V4SF_V2DF:
23491 case V4SF_FTYPE_V4SF_DI:
23492 case V4SF_FTYPE_V4SF_SI:
23493 case V2DI_FTYPE_V2DI_V2DI:
23494 case V2DI_FTYPE_V16QI_V16QI:
23495 case V2DI_FTYPE_V4SI_V4SI:
23496 case V2DI_FTYPE_V2DI_V16QI:
23497 case V2DI_FTYPE_V2DF_V2DF:
23498 case V2SI_FTYPE_V2SI_V2SI:
23499 case V2SI_FTYPE_V4HI_V4HI:
23500 case V2SI_FTYPE_V2SF_V2SF:
23501 case V2DF_FTYPE_V2DF_V2DF:
23502 case V2DF_FTYPE_V2DF_V4SF:
23503 case V2DF_FTYPE_V2DF_V2DI:
23504 case V2DF_FTYPE_V2DF_DI:
23505 case V2DF_FTYPE_V2DF_SI:
23506 case V2SF_FTYPE_V2SF_V2SF:
23507 case V1DI_FTYPE_V1DI_V1DI:
23508 case V1DI_FTYPE_V8QI_V8QI:
23509 case V1DI_FTYPE_V2SI_V2SI:
23510 if (comparison == UNKNOWN)
23511 return ix86_expand_binop_builtin (icode, exp, target);
23514 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23515 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23516 gcc_assert (comparison != UNKNOWN);
23520 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23521 case V8HI_FTYPE_V8HI_SI_COUNT:
23522 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23523 case V4SI_FTYPE_V4SI_SI_COUNT:
23524 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23525 case V4HI_FTYPE_V4HI_SI_COUNT:
23526 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23527 case V2DI_FTYPE_V2DI_SI_COUNT:
23528 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23529 case V2SI_FTYPE_V2SI_SI_COUNT:
23530 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23531 case V1DI_FTYPE_V1DI_SI_COUNT:
23533 last_arg_count = true;
23535 case UINT64_FTYPE_UINT64_UINT64:
23536 case UINT_FTYPE_UINT_UINT:
23537 case UINT_FTYPE_UINT_USHORT:
23538 case UINT_FTYPE_UINT_UCHAR:
23541 case V2DI2TI_FTYPE_V2DI_INT:
23544 nargs_constant = 1;
23546 case V8HI_FTYPE_V8HI_INT:
23547 case V8SF_FTYPE_V8SF_INT:
23548 case V4SI_FTYPE_V4SI_INT:
23549 case V4SI_FTYPE_V8SI_INT:
23550 case V4HI_FTYPE_V4HI_INT:
23551 case V4DF_FTYPE_V4DF_INT:
23552 case V4SF_FTYPE_V4SF_INT:
23553 case V4SF_FTYPE_V8SF_INT:
23554 case V2DI_FTYPE_V2DI_INT:
23555 case V2DF_FTYPE_V2DF_INT:
23556 case V2DF_FTYPE_V4DF_INT:
23558 nargs_constant = 1;
23560 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23561 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23562 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23563 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23564 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23567 case V16QI_FTYPE_V16QI_V16QI_INT:
23568 case V8HI_FTYPE_V8HI_V8HI_INT:
23569 case V8SI_FTYPE_V8SI_V8SI_INT:
23570 case V8SI_FTYPE_V8SI_V4SI_INT:
23571 case V8SF_FTYPE_V8SF_V8SF_INT:
23572 case V8SF_FTYPE_V8SF_V4SF_INT:
23573 case V4SI_FTYPE_V4SI_V4SI_INT:
23574 case V4DF_FTYPE_V4DF_V4DF_INT:
23575 case V4DF_FTYPE_V4DF_V2DF_INT:
23576 case V4SF_FTYPE_V4SF_V4SF_INT:
23577 case V2DI_FTYPE_V2DI_V2DI_INT:
23578 case V2DF_FTYPE_V2DF_V2DF_INT:
23580 nargs_constant = 1;
23582 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23585 nargs_constant = 1;
23587 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23590 nargs_constant = 1;
23592 case V2DI_FTYPE_V2DI_UINT_UINT:
23594 nargs_constant = 2;
23596 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
23597 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
23598 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
23599 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
23601 nargs_constant = 1;
23603 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23605 nargs_constant = 2;
23608 gcc_unreachable ();
23611 gcc_assert (nargs <= ARRAY_SIZE (args));
23613 if (comparison != UNKNOWN)
23615 gcc_assert (nargs == 2);
23616 return ix86_expand_sse_compare (d, exp, target, swap);
23619 if (rmode == VOIDmode || rmode == tmode)
23623 || GET_MODE (target) != tmode
23624 || ! (*insn_p->operand[0].predicate) (target, tmode))
23625 target = gen_reg_rtx (tmode);
23626 real_target = target;
23630 target = gen_reg_rtx (rmode);
23631 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
23634 for (i = 0; i < nargs; i++)
23636 tree arg = CALL_EXPR_ARG (exp, i);
23637 rtx op = expand_normal (arg);
23638 enum machine_mode mode = insn_p->operand[i + 1].mode;
23639 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
23641 if (last_arg_count && (i + 1) == nargs)
23643 /* SIMD shift insns take either an 8-bit immediate or
23644 register as count. But builtin functions take int as
23645 count. If count doesn't match, we put it in register. */
23648 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
23649 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
23650 op = copy_to_reg (op);
23653 else if ((nargs - i) <= nargs_constant)
23658 case CODE_FOR_sse4_1_roundpd:
23659 case CODE_FOR_sse4_1_roundps:
23660 case CODE_FOR_sse4_1_roundsd:
23661 case CODE_FOR_sse4_1_roundss:
23662 case CODE_FOR_sse4_1_blendps:
23663 case CODE_FOR_avx_blendpd256:
23664 case CODE_FOR_avx_vpermilv4df:
23665 case CODE_FOR_avx_roundpd256:
23666 case CODE_FOR_avx_roundps256:
23667 error ("the last argument must be a 4-bit immediate");
23670 case CODE_FOR_sse4_1_blendpd:
23671 case CODE_FOR_avx_vpermilv2df:
23672 case CODE_FOR_avx_vpermil2v2df3:
23673 case CODE_FOR_avx_vpermil2v4sf3:
23674 case CODE_FOR_avx_vpermil2v4df3:
23675 case CODE_FOR_avx_vpermil2v8sf3:
23676 error ("the last argument must be a 2-bit immediate");
23679 case CODE_FOR_avx_vextractf128v4df:
23680 case CODE_FOR_avx_vextractf128v8sf:
23681 case CODE_FOR_avx_vextractf128v8si:
23682 case CODE_FOR_avx_vinsertf128v4df:
23683 case CODE_FOR_avx_vinsertf128v8sf:
23684 case CODE_FOR_avx_vinsertf128v8si:
23685 error ("the last argument must be a 1-bit immediate");
23688 case CODE_FOR_avx_cmpsdv2df3:
23689 case CODE_FOR_avx_cmpssv4sf3:
23690 case CODE_FOR_avx_cmppdv2df3:
23691 case CODE_FOR_avx_cmppsv4sf3:
23692 case CODE_FOR_avx_cmppdv4df3:
23693 case CODE_FOR_avx_cmppsv8sf3:
23694 error ("the last argument must be a 5-bit immediate");
23698 switch (nargs_constant)
23701 if ((nargs - i) == nargs_constant)
23703 error ("the next to last argument must be an 8-bit immediate");
23707 error ("the last argument must be an 8-bit immediate");
23710 gcc_unreachable ();
23717 if (VECTOR_MODE_P (mode))
23718 op = safe_vector_operand (op, mode);
23720 /* If we aren't optimizing, only allow one memory operand to
23722 if (memory_operand (op, mode))
23725 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
23727 if (optimize || !match || num_memory > 1)
23728 op = copy_to_mode_reg (mode, op);
23732 op = copy_to_reg (op);
23733 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
23738 args[i].mode = mode;
23744 pat = GEN_FCN (icode) (real_target, args[0].op);
23747 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
23750 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23754 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23755 args[2].op, args[3].op);
23758 gcc_unreachable ();
23768 /* Subroutine of ix86_expand_builtin to take care of special insns
23769 with variable number of operands. */
23772 ix86_expand_special_args_builtin (const struct builtin_description *d,
23773 tree exp, rtx target)
23777 unsigned int i, nargs, arg_adjust, memory;
23781 enum machine_mode mode;
23783 enum insn_code icode = d->icode;
23784 bool last_arg_constant = false;
23785 const struct insn_data *insn_p = &insn_data[icode];
23786 enum machine_mode tmode = insn_p->operand[0].mode;
23787 enum { load, store } klass;
23789 switch ((enum ix86_special_builtin_type) d->flag)
23791 case VOID_FTYPE_VOID:
23792 emit_insn (GEN_FCN (icode) (target));
23794 case V2DI_FTYPE_PV2DI:
23795 case V32QI_FTYPE_PCCHAR:
23796 case V16QI_FTYPE_PCCHAR:
23797 case V8SF_FTYPE_PCV4SF:
23798 case V8SF_FTYPE_PCFLOAT:
23799 case V4SF_FTYPE_PCFLOAT:
23800 case V4DF_FTYPE_PCV2DF:
23801 case V4DF_FTYPE_PCDOUBLE:
23802 case V2DF_FTYPE_PCDOUBLE:
23807 case VOID_FTYPE_PV2SF_V4SF:
23808 case VOID_FTYPE_PV2DI_V2DI:
23809 case VOID_FTYPE_PCHAR_V32QI:
23810 case VOID_FTYPE_PCHAR_V16QI:
23811 case VOID_FTYPE_PFLOAT_V8SF:
23812 case VOID_FTYPE_PFLOAT_V4SF:
23813 case VOID_FTYPE_PDOUBLE_V4DF:
23814 case VOID_FTYPE_PDOUBLE_V2DF:
23815 case VOID_FTYPE_PDI_DI:
23816 case VOID_FTYPE_PINT_INT:
23819 /* Reserve memory operand for target. */
23820 memory = ARRAY_SIZE (args);
23822 case V4SF_FTYPE_V4SF_PCV2SF:
23823 case V2DF_FTYPE_V2DF_PCDOUBLE:
23828 case V8SF_FTYPE_PCV8SF_V8SF:
23829 case V4DF_FTYPE_PCV4DF_V4DF:
23830 case V4SF_FTYPE_PCV4SF_V4SF:
23831 case V2DF_FTYPE_PCV2DF_V2DF:
23836 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23837 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23838 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23839 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23842 /* Reserve memory operand for target. */
23843 memory = ARRAY_SIZE (args);
23846 gcc_unreachable ();
23849 gcc_assert (nargs <= ARRAY_SIZE (args));
23851 if (klass == store)
23853 arg = CALL_EXPR_ARG (exp, 0);
23854 op = expand_normal (arg);
23855 gcc_assert (target == 0);
23856 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
23864 || GET_MODE (target) != tmode
23865 || ! (*insn_p->operand[0].predicate) (target, tmode))
23866 target = gen_reg_rtx (tmode);
23869 for (i = 0; i < nargs; i++)
23871 enum machine_mode mode = insn_p->operand[i + 1].mode;
23874 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
23875 op = expand_normal (arg);
23876 match = (*insn_p->operand[i + 1].predicate) (op, mode);
23878 if (last_arg_constant && (i + 1) == nargs)
23884 error ("the last argument must be an 8-bit immediate");
23892 /* This must be the memory operand. */
23893 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
23894 gcc_assert (GET_MODE (op) == mode
23895 || GET_MODE (op) == VOIDmode);
23899 /* This must be register. */
23900 if (VECTOR_MODE_P (mode))
23901 op = safe_vector_operand (op, mode);
23903 gcc_assert (GET_MODE (op) == mode
23904 || GET_MODE (op) == VOIDmode);
23905 op = copy_to_mode_reg (mode, op);
23910 args[i].mode = mode;
23916 pat = GEN_FCN (icode) (target, args[0].op);
23919 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23922 gcc_unreachable ();
23928 return klass == store ? 0 : target;
23931 /* Return the integer constant in ARG. Constrain it to be in the range
23932 of the subparts of VEC_TYPE; issue an error if not. */
23935 get_element_number (tree vec_type, tree arg)
23937 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
23939 if (!host_integerp (arg, 1)
23940 || (elt = tree_low_cst (arg, 1), elt > max))
23942 error ("selector must be an integer constant in the range 0..%wi", max);
23949 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
23950 ix86_expand_vector_init. We DO have language-level syntax for this, in
23951 the form of (type){ init-list }. Except that since we can't place emms
23952 instructions from inside the compiler, we can't allow the use of MMX
23953 registers unless the user explicitly asks for it. So we do *not* define
23954 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
23955 we have builtins invoked by mmintrin.h that gives us license to emit
23956 these sorts of instructions. */
23959 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
23961 enum machine_mode tmode = TYPE_MODE (type);
23962 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
23963 int i, n_elt = GET_MODE_NUNITS (tmode);
23964 rtvec v = rtvec_alloc (n_elt);
23966 gcc_assert (VECTOR_MODE_P (tmode));
23967 gcc_assert (call_expr_nargs (exp) == n_elt);
23969 for (i = 0; i < n_elt; ++i)
23971 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
23972 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
23975 if (!target || !register_operand (target, tmode))
23976 target = gen_reg_rtx (tmode);
23978 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
23982 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
23983 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
23984 had a language-level syntax for referencing vector elements. */
23987 ix86_expand_vec_ext_builtin (tree exp, rtx target)
23989 enum machine_mode tmode, mode0;
23994 arg0 = CALL_EXPR_ARG (exp, 0);
23995 arg1 = CALL_EXPR_ARG (exp, 1);
23997 op0 = expand_normal (arg0);
23998 elt = get_element_number (TREE_TYPE (arg0), arg1);
24000 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24001 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24002 gcc_assert (VECTOR_MODE_P (mode0));
24004 op0 = force_reg (mode0, op0);
24006 if (optimize || !target || !register_operand (target, tmode))
24007 target = gen_reg_rtx (tmode);
24009 ix86_expand_vector_extract (true, target, op0, elt);
24014 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24015 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24016 a language-level syntax for referencing vector elements. */
24019 ix86_expand_vec_set_builtin (tree exp)
24021 enum machine_mode tmode, mode1;
24022 tree arg0, arg1, arg2;
24024 rtx op0, op1, target;
24026 arg0 = CALL_EXPR_ARG (exp, 0);
24027 arg1 = CALL_EXPR_ARG (exp, 1);
24028 arg2 = CALL_EXPR_ARG (exp, 2);
24030 tmode = TYPE_MODE (TREE_TYPE (arg0));
24031 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24032 gcc_assert (VECTOR_MODE_P (tmode));
24034 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24035 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24036 elt = get_element_number (TREE_TYPE (arg0), arg2);
24038 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24039 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24041 op0 = force_reg (tmode, op0);
24042 op1 = force_reg (mode1, op1);
24044 /* OP0 is the source of these builtin functions and shouldn't be
24045 modified. Create a copy, use it and return it as target. */
24046 target = gen_reg_rtx (tmode);
24047 emit_move_insn (target, op0);
24048 ix86_expand_vector_set (true, target, op1, elt);
24053 /* Expand an expression EXP that calls a built-in function,
24054 with result going to TARGET if that's convenient
24055 (and in mode MODE if that's convenient).
24056 SUBTARGET may be used as the target for computing one of EXP's operands.
24057 IGNORE is nonzero if the value is to be ignored. */
24060 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24061 enum machine_mode mode ATTRIBUTE_UNUSED,
24062 int ignore ATTRIBUTE_UNUSED)
24064 const struct builtin_description *d;
24066 enum insn_code icode;
24067 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24068 tree arg0, arg1, arg2;
24069 rtx op0, op1, op2, pat;
24070 enum machine_mode mode0, mode1, mode2;
24071 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24073 /* Determine whether the builtin function is available under the current ISA.
24074 Originally the builtin was not created if it wasn't applicable to the
24075 current ISA based on the command line switches. With function specific
24076 options, we need to check in the context of the function making the call
24077 whether it is supported. */
24078 if (ix86_builtins_isa[fcode].isa
24079 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24081 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24082 NULL, NULL, false);
24085 error ("%qE needs unknown isa option", fndecl);
24088 gcc_assert (opts != NULL);
24089 error ("%qE needs isa option %s", fndecl, opts);
24097 case IX86_BUILTIN_MASKMOVQ:
24098 case IX86_BUILTIN_MASKMOVDQU:
24099 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24100 ? CODE_FOR_mmx_maskmovq
24101 : CODE_FOR_sse2_maskmovdqu);
24102 /* Note the arg order is different from the operand order. */
24103 arg1 = CALL_EXPR_ARG (exp, 0);
24104 arg2 = CALL_EXPR_ARG (exp, 1);
24105 arg0 = CALL_EXPR_ARG (exp, 2);
24106 op0 = expand_normal (arg0);
24107 op1 = expand_normal (arg1);
24108 op2 = expand_normal (arg2);
24109 mode0 = insn_data[icode].operand[0].mode;
24110 mode1 = insn_data[icode].operand[1].mode;
24111 mode2 = insn_data[icode].operand[2].mode;
24113 op0 = force_reg (Pmode, op0);
24114 op0 = gen_rtx_MEM (mode1, op0);
24116 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24117 op0 = copy_to_mode_reg (mode0, op0);
24118 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24119 op1 = copy_to_mode_reg (mode1, op1);
24120 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24121 op2 = copy_to_mode_reg (mode2, op2);
24122 pat = GEN_FCN (icode) (op0, op1, op2);
24128 case IX86_BUILTIN_LDMXCSR:
24129 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24130 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24131 emit_move_insn (target, op0);
24132 emit_insn (gen_sse_ldmxcsr (target));
24135 case IX86_BUILTIN_STMXCSR:
24136 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24137 emit_insn (gen_sse_stmxcsr (target));
24138 return copy_to_mode_reg (SImode, target);
24140 case IX86_BUILTIN_CLFLUSH:
24141 arg0 = CALL_EXPR_ARG (exp, 0);
24142 op0 = expand_normal (arg0);
24143 icode = CODE_FOR_sse2_clflush;
24144 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24145 op0 = copy_to_mode_reg (Pmode, op0);
24147 emit_insn (gen_sse2_clflush (op0));
24150 case IX86_BUILTIN_MONITOR:
24151 arg0 = CALL_EXPR_ARG (exp, 0);
24152 arg1 = CALL_EXPR_ARG (exp, 1);
24153 arg2 = CALL_EXPR_ARG (exp, 2);
24154 op0 = expand_normal (arg0);
24155 op1 = expand_normal (arg1);
24156 op2 = expand_normal (arg2);
24158 op0 = copy_to_mode_reg (Pmode, op0);
24160 op1 = copy_to_mode_reg (SImode, op1);
24162 op2 = copy_to_mode_reg (SImode, op2);
24163 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24166 case IX86_BUILTIN_MWAIT:
24167 arg0 = CALL_EXPR_ARG (exp, 0);
24168 arg1 = CALL_EXPR_ARG (exp, 1);
24169 op0 = expand_normal (arg0);
24170 op1 = expand_normal (arg1);
24172 op0 = copy_to_mode_reg (SImode, op0);
24174 op1 = copy_to_mode_reg (SImode, op1);
24175 emit_insn (gen_sse3_mwait (op0, op1));
24178 case IX86_BUILTIN_VEC_INIT_V2SI:
24179 case IX86_BUILTIN_VEC_INIT_V4HI:
24180 case IX86_BUILTIN_VEC_INIT_V8QI:
24181 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24183 case IX86_BUILTIN_VEC_EXT_V2DF:
24184 case IX86_BUILTIN_VEC_EXT_V2DI:
24185 case IX86_BUILTIN_VEC_EXT_V4SF:
24186 case IX86_BUILTIN_VEC_EXT_V4SI:
24187 case IX86_BUILTIN_VEC_EXT_V8HI:
24188 case IX86_BUILTIN_VEC_EXT_V2SI:
24189 case IX86_BUILTIN_VEC_EXT_V4HI:
24190 case IX86_BUILTIN_VEC_EXT_V16QI:
24191 return ix86_expand_vec_ext_builtin (exp, target);
24193 case IX86_BUILTIN_VEC_SET_V2DI:
24194 case IX86_BUILTIN_VEC_SET_V4SF:
24195 case IX86_BUILTIN_VEC_SET_V4SI:
24196 case IX86_BUILTIN_VEC_SET_V8HI:
24197 case IX86_BUILTIN_VEC_SET_V4HI:
24198 case IX86_BUILTIN_VEC_SET_V16QI:
24199 return ix86_expand_vec_set_builtin (exp);
24201 case IX86_BUILTIN_INFQ:
24203 REAL_VALUE_TYPE inf;
24207 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24209 tmp = validize_mem (force_const_mem (mode, tmp));
24212 target = gen_reg_rtx (mode);
24214 emit_move_insn (target, tmp);
24222 for (i = 0, d = bdesc_special_args;
24223 i < ARRAY_SIZE (bdesc_special_args);
24225 if (d->code == fcode)
24226 return ix86_expand_special_args_builtin (d, exp, target);
24228 for (i = 0, d = bdesc_args;
24229 i < ARRAY_SIZE (bdesc_args);
24231 if (d->code == fcode)
24234 case IX86_BUILTIN_FABSQ:
24235 case IX86_BUILTIN_COPYSIGNQ:
24237 /* Emit a normal call if SSE2 isn't available. */
24238 return expand_call (exp, target, ignore);
24240 return ix86_expand_args_builtin (d, exp, target);
24243 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24244 if (d->code == fcode)
24245 return ix86_expand_sse_comi (d, exp, target);
24247 for (i = 0, d = bdesc_pcmpestr;
24248 i < ARRAY_SIZE (bdesc_pcmpestr);
24250 if (d->code == fcode)
24251 return ix86_expand_sse_pcmpestr (d, exp, target);
24253 for (i = 0, d = bdesc_pcmpistr;
24254 i < ARRAY_SIZE (bdesc_pcmpistr);
24256 if (d->code == fcode)
24257 return ix86_expand_sse_pcmpistr (d, exp, target);
24259 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24260 if (d->code == fcode)
24261 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24262 (enum multi_arg_type)d->flag,
24265 gcc_unreachable ();
24268 /* Returns a function decl for a vectorized version of the builtin function
24269 with builtin function code FN and the result vector type TYPE, or NULL_TREE
24270 if it is not available. */
24273 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
24276 enum machine_mode in_mode, out_mode;
24279 if (TREE_CODE (type_out) != VECTOR_TYPE
24280 || TREE_CODE (type_in) != VECTOR_TYPE)
24283 out_mode = TYPE_MODE (TREE_TYPE (type_out));
24284 out_n = TYPE_VECTOR_SUBPARTS (type_out);
24285 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24286 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24290 case BUILT_IN_SQRT:
24291 if (out_mode == DFmode && out_n == 2
24292 && in_mode == DFmode && in_n == 2)
24293 return ix86_builtins[IX86_BUILTIN_SQRTPD];
24296 case BUILT_IN_SQRTF:
24297 if (out_mode == SFmode && out_n == 4
24298 && in_mode == SFmode && in_n == 4)
24299 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24302 case BUILT_IN_LRINT:
24303 if (out_mode == SImode && out_n == 4
24304 && in_mode == DFmode && in_n == 2)
24305 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24308 case BUILT_IN_LRINTF:
24309 if (out_mode == SImode && out_n == 4
24310 && in_mode == SFmode && in_n == 4)
24311 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24318 /* Dispatch to a handler for a vectorization library. */
24319 if (ix86_veclib_handler)
24320 return (*ix86_veclib_handler)(fn, type_out, type_in);
24325 /* Handler for an SVML-style interface to
24326 a library with vectorized intrinsics. */
24329 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
24332 tree fntype, new_fndecl, args;
24335 enum machine_mode el_mode, in_mode;
24338 /* The SVML is suitable for unsafe math only. */
24339 if (!flag_unsafe_math_optimizations)
24342 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24343 n = TYPE_VECTOR_SUBPARTS (type_out);
24344 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24345 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24346 if (el_mode != in_mode
24354 case BUILT_IN_LOG10:
24356 case BUILT_IN_TANH:
24358 case BUILT_IN_ATAN:
24359 case BUILT_IN_ATAN2:
24360 case BUILT_IN_ATANH:
24361 case BUILT_IN_CBRT:
24362 case BUILT_IN_SINH:
24364 case BUILT_IN_ASINH:
24365 case BUILT_IN_ASIN:
24366 case BUILT_IN_COSH:
24368 case BUILT_IN_ACOSH:
24369 case BUILT_IN_ACOS:
24370 if (el_mode != DFmode || n != 2)
24374 case BUILT_IN_EXPF:
24375 case BUILT_IN_LOGF:
24376 case BUILT_IN_LOG10F:
24377 case BUILT_IN_POWF:
24378 case BUILT_IN_TANHF:
24379 case BUILT_IN_TANF:
24380 case BUILT_IN_ATANF:
24381 case BUILT_IN_ATAN2F:
24382 case BUILT_IN_ATANHF:
24383 case BUILT_IN_CBRTF:
24384 case BUILT_IN_SINHF:
24385 case BUILT_IN_SINF:
24386 case BUILT_IN_ASINHF:
24387 case BUILT_IN_ASINF:
24388 case BUILT_IN_COSHF:
24389 case BUILT_IN_COSF:
24390 case BUILT_IN_ACOSHF:
24391 case BUILT_IN_ACOSF:
24392 if (el_mode != SFmode || n != 4)
24400 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24402 if (fn == BUILT_IN_LOGF)
24403 strcpy (name, "vmlsLn4");
24404 else if (fn == BUILT_IN_LOG)
24405 strcpy (name, "vmldLn2");
24408 sprintf (name, "vmls%s", bname+10);
24409 name[strlen (name)-1] = '4';
24412 sprintf (name, "vmld%s2", bname+10);
24414 /* Convert to uppercase. */
24418 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24419 args = TREE_CHAIN (args))
24423 fntype = build_function_type_list (type_out, type_in, NULL);
24425 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24427 /* Build a function declaration for the vectorized function. */
24428 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24429 TREE_PUBLIC (new_fndecl) = 1;
24430 DECL_EXTERNAL (new_fndecl) = 1;
24431 DECL_IS_NOVOPS (new_fndecl) = 1;
24432 TREE_READONLY (new_fndecl) = 1;
24437 /* Handler for an ACML-style interface to
24438 a library with vectorized intrinsics. */
24441 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
24443 char name[20] = "__vr.._";
24444 tree fntype, new_fndecl, args;
24447 enum machine_mode el_mode, in_mode;
24450 /* The ACML is 64bits only and suitable for unsafe math only as
24451 it does not correctly support parts of IEEE with the required
24452 precision such as denormals. */
24454 || !flag_unsafe_math_optimizations)
24457 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24458 n = TYPE_VECTOR_SUBPARTS (type_out);
24459 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24460 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24461 if (el_mode != in_mode
24471 case BUILT_IN_LOG2:
24472 case BUILT_IN_LOG10:
24475 if (el_mode != DFmode
24480 case BUILT_IN_SINF:
24481 case BUILT_IN_COSF:
24482 case BUILT_IN_EXPF:
24483 case BUILT_IN_POWF:
24484 case BUILT_IN_LOGF:
24485 case BUILT_IN_LOG2F:
24486 case BUILT_IN_LOG10F:
24489 if (el_mode != SFmode
24498 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24499 sprintf (name + 7, "%s", bname+10);
24502 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24503 args = TREE_CHAIN (args))
24507 fntype = build_function_type_list (type_out, type_in, NULL);
24509 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24511 /* Build a function declaration for the vectorized function. */
24512 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
24513 TREE_PUBLIC (new_fndecl) = 1;
24514 DECL_EXTERNAL (new_fndecl) = 1;
24515 DECL_IS_NOVOPS (new_fndecl) = 1;
24516 TREE_READONLY (new_fndecl) = 1;
24522 /* Returns a decl of a function that implements conversion of an integer vector
24523 into a floating-point vector, or vice-versa. TYPE is the type of the integer
24524 side of the conversion.
24525 Return NULL_TREE if it is not available. */
24528 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
24530 if (TREE_CODE (type) != VECTOR_TYPE)
24536 switch (TYPE_MODE (type))
24539 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
24544 case FIX_TRUNC_EXPR:
24545 switch (TYPE_MODE (type))
24548 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
24558 /* Returns a code for a target-specific builtin that implements
24559 reciprocal of the function, or NULL_TREE if not available. */
24562 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
24563 bool sqrt ATTRIBUTE_UNUSED)
24565 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
24566 && flag_finite_math_only && !flag_trapping_math
24567 && flag_unsafe_math_optimizations))
24571 /* Machine dependent builtins. */
24574 /* Vectorized version of sqrt to rsqrt conversion. */
24575 case IX86_BUILTIN_SQRTPS_NR:
24576 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
24582 /* Normal builtins. */
24585 /* Sqrt to rsqrt conversion. */
24586 case BUILT_IN_SQRTF:
24587 return ix86_builtins[IX86_BUILTIN_RSQRTF];
24594 /* Store OPERAND to the memory after reload is completed. This means
24595 that we can't easily use assign_stack_local. */
24597 ix86_force_to_memory (enum machine_mode mode, rtx operand)
24601 gcc_assert (reload_completed);
24602 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
24604 result = gen_rtx_MEM (mode,
24605 gen_rtx_PLUS (Pmode,
24607 GEN_INT (-RED_ZONE_SIZE)));
24608 emit_move_insn (result, operand);
24610 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
24616 operand = gen_lowpart (DImode, operand);
24620 gen_rtx_SET (VOIDmode,
24621 gen_rtx_MEM (DImode,
24622 gen_rtx_PRE_DEC (DImode,
24623 stack_pointer_rtx)),
24627 gcc_unreachable ();
24629 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24638 split_di (&operand, 1, operands, operands + 1);
24640 gen_rtx_SET (VOIDmode,
24641 gen_rtx_MEM (SImode,
24642 gen_rtx_PRE_DEC (Pmode,
24643 stack_pointer_rtx)),
24646 gen_rtx_SET (VOIDmode,
24647 gen_rtx_MEM (SImode,
24648 gen_rtx_PRE_DEC (Pmode,
24649 stack_pointer_rtx)),
24654 /* Store HImodes as SImodes. */
24655 operand = gen_lowpart (SImode, operand);
24659 gen_rtx_SET (VOIDmode,
24660 gen_rtx_MEM (GET_MODE (operand),
24661 gen_rtx_PRE_DEC (SImode,
24662 stack_pointer_rtx)),
24666 gcc_unreachable ();
24668 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24673 /* Free operand from the memory. */
24675 ix86_free_from_memory (enum machine_mode mode)
24677 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
24681 if (mode == DImode || TARGET_64BIT)
24685 /* Use LEA to deallocate stack space. In peephole2 it will be converted
24686 to pop or add instruction if registers are available. */
24687 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24688 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
24693 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
24694 QImode must go into class Q_REGS.
24695 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
24696 movdf to do mem-to-mem moves through integer regs. */
24698 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
24700 enum machine_mode mode = GET_MODE (x);
24702 /* We're only allowed to return a subclass of CLASS. Many of the
24703 following checks fail for NO_REGS, so eliminate that early. */
24704 if (regclass == NO_REGS)
24707 /* All classes can load zeros. */
24708 if (x == CONST0_RTX (mode))
24711 /* Force constants into memory if we are loading a (nonzero) constant into
24712 an MMX or SSE register. This is because there are no MMX/SSE instructions
24713 to load from a constant. */
24715 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
24718 /* Prefer SSE regs only, if we can use them for math. */
24719 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
24720 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
24722 /* Floating-point constants need more complex checks. */
24723 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
24725 /* General regs can load everything. */
24726 if (reg_class_subset_p (regclass, GENERAL_REGS))
24729 /* Floats can load 0 and 1 plus some others. Note that we eliminated
24730 zero above. We only want to wind up preferring 80387 registers if
24731 we plan on doing computation with them. */
24733 && standard_80387_constant_p (x))
24735 /* Limit class to non-sse. */
24736 if (regclass == FLOAT_SSE_REGS)
24738 if (regclass == FP_TOP_SSE_REGS)
24740 if (regclass == FP_SECOND_SSE_REGS)
24741 return FP_SECOND_REG;
24742 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
24749 /* Generally when we see PLUS here, it's the function invariant
24750 (plus soft-fp const_int). Which can only be computed into general
24752 if (GET_CODE (x) == PLUS)
24753 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
24755 /* QImode constants are easy to load, but non-constant QImode data
24756 must go into Q_REGS. */
24757 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
24759 if (reg_class_subset_p (regclass, Q_REGS))
24761 if (reg_class_subset_p (Q_REGS, regclass))
24769 /* Discourage putting floating-point values in SSE registers unless
24770 SSE math is being used, and likewise for the 387 registers. */
24772 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
24774 enum machine_mode mode = GET_MODE (x);
24776 /* Restrict the output reload class to the register bank that we are doing
24777 math on. If we would like not to return a subset of CLASS, reject this
24778 alternative: if reload cannot do this, it will still use its choice. */
24779 mode = GET_MODE (x);
24780 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
24781 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
24783 if (X87_FLOAT_MODE_P (mode))
24785 if (regclass == FP_TOP_SSE_REGS)
24787 else if (regclass == FP_SECOND_SSE_REGS)
24788 return FP_SECOND_REG;
24790 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
24796 static enum reg_class
24797 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
24798 enum machine_mode mode,
24799 secondary_reload_info *sri ATTRIBUTE_UNUSED)
24801 /* QImode spills from non-QI registers require
24802 intermediate register on 32bit targets. */
24803 if (!in_p && mode == QImode && !TARGET_64BIT
24804 && (rclass == GENERAL_REGS
24805 || rclass == LEGACY_REGS
24806 || rclass == INDEX_REGS))
24815 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
24816 regno = true_regnum (x);
24818 /* Return Q_REGS if the operand is in memory. */
24826 /* If we are copying between general and FP registers, we need a memory
24827 location. The same is true for SSE and MMX registers.
24829 To optimize register_move_cost performance, allow inline variant.
24831 The macro can't work reliably when one of the CLASSES is class containing
24832 registers from multiple units (SSE, MMX, integer). We avoid this by never
24833 combining those units in single alternative in the machine description.
24834 Ensure that this constraint holds to avoid unexpected surprises.
24836 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
24837 enforce these sanity checks. */
24840 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
24841 enum machine_mode mode, int strict)
24843 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
24844 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
24845 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
24846 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
24847 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
24848 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
24850 gcc_assert (!strict);
24854 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
24857 /* ??? This is a lie. We do have moves between mmx/general, and for
24858 mmx/sse2. But by saying we need secondary memory we discourage the
24859 register allocator from using the mmx registers unless needed. */
24860 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
24863 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
24865 /* SSE1 doesn't have any direct moves from other classes. */
24869 /* If the target says that inter-unit moves are more expensive
24870 than moving through memory, then don't generate them. */
24871 if (!TARGET_INTER_UNIT_MOVES)
24874 /* Between SSE and general, we have moves no larger than word size. */
24875 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
24883 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
24884 enum machine_mode mode, int strict)
24886 return inline_secondary_memory_needed (class1, class2, mode, strict);
24889 /* Return true if the registers in CLASS cannot represent the change from
24890 modes FROM to TO. */
24893 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
24894 enum reg_class regclass)
24899 /* x87 registers can't do subreg at all, as all values are reformatted
24900 to extended precision. */
24901 if (MAYBE_FLOAT_CLASS_P (regclass))
24904 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
24906 /* Vector registers do not support QI or HImode loads. If we don't
24907 disallow a change to these modes, reload will assume it's ok to
24908 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
24909 the vec_dupv4hi pattern. */
24910 if (GET_MODE_SIZE (from) < 4)
24913 /* Vector registers do not support subreg with nonzero offsets, which
24914 are otherwise valid for integer registers. Since we can't see
24915 whether we have a nonzero offset from here, prohibit all
24916 nonparadoxical subregs changing size. */
24917 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
24924 /* Return the cost of moving data of mode M between a
24925 register and memory. A value of 2 is the default; this cost is
24926 relative to those in `REGISTER_MOVE_COST'.
24928 This function is used extensively by register_move_cost that is used to
24929 build tables at startup. Make it inline in this case.
24930 When IN is 2, return maximum of in and out move cost.
24932 If moving between registers and memory is more expensive than
24933 between two registers, you should define this macro to express the
24936 Model also increased moving costs of QImode registers in non
24940 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
24944 if (FLOAT_CLASS_P (regclass))
24962 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
24963 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
24965 if (SSE_CLASS_P (regclass))
24968 switch (GET_MODE_SIZE (mode))
24983 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
24984 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
24986 if (MMX_CLASS_P (regclass))
24989 switch (GET_MODE_SIZE (mode))
25001 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25002 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25004 switch (GET_MODE_SIZE (mode))
25007 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25010 return ix86_cost->int_store[0];
25011 if (TARGET_PARTIAL_REG_DEPENDENCY
25012 && optimize_function_for_speed_p (cfun))
25013 cost = ix86_cost->movzbl_load;
25015 cost = ix86_cost->int_load[0];
25017 return MAX (cost, ix86_cost->int_store[0]);
25023 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25025 return ix86_cost->movzbl_load;
25027 return ix86_cost->int_store[0] + 4;
25032 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25033 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25035 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25036 if (mode == TFmode)
25039 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25041 cost = ix86_cost->int_load[2];
25043 cost = ix86_cost->int_store[2];
25044 return (cost * (((int) GET_MODE_SIZE (mode)
25045 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25050 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25052 return inline_memory_move_cost (mode, regclass, in);
25056 /* Return the cost of moving data from a register in class CLASS1 to
25057 one in class CLASS2.
25059 It is not required that the cost always equal 2 when FROM is the same as TO;
25060 on some machines it is expensive to move between registers if they are not
25061 general registers. */
25064 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25065 enum reg_class class2)
25067 /* In case we require secondary memory, compute cost of the store followed
25068 by load. In order to avoid bad register allocation choices, we need
25069 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25071 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25075 cost += inline_memory_move_cost (mode, class1, 2);
25076 cost += inline_memory_move_cost (mode, class2, 2);
25078 /* In case of copying from general_purpose_register we may emit multiple
25079 stores followed by single load causing memory size mismatch stall.
25080 Count this as arbitrarily high cost of 20. */
25081 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25084 /* In the case of FP/MMX moves, the registers actually overlap, and we
25085 have to switch modes in order to treat them differently. */
25086 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25087 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25093 /* Moves between SSE/MMX and integer unit are expensive. */
25094 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25095 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25097 /* ??? By keeping returned value relatively high, we limit the number
25098 of moves between integer and MMX/SSE registers for all targets.
25099 Additionally, high value prevents problem with x86_modes_tieable_p(),
25100 where integer modes in MMX/SSE registers are not tieable
25101 because of missing QImode and HImode moves to, from or between
25102 MMX/SSE registers. */
25103 return MAX (8, ix86_cost->mmxsse_to_integer);
25105 if (MAYBE_FLOAT_CLASS_P (class1))
25106 return ix86_cost->fp_move;
25107 if (MAYBE_SSE_CLASS_P (class1))
25108 return ix86_cost->sse_move;
25109 if (MAYBE_MMX_CLASS_P (class1))
25110 return ix86_cost->mmx_move;
25114 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
25117 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25119 /* Flags and only flags can only hold CCmode values. */
25120 if (CC_REGNO_P (regno))
25121 return GET_MODE_CLASS (mode) == MODE_CC;
25122 if (GET_MODE_CLASS (mode) == MODE_CC
25123 || GET_MODE_CLASS (mode) == MODE_RANDOM
25124 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25126 if (FP_REGNO_P (regno))
25127 return VALID_FP_MODE_P (mode);
25128 if (SSE_REGNO_P (regno))
25130 /* We implement the move patterns for all vector modes into and
25131 out of SSE registers, even when no operation instructions
25132 are available. OImode move is available only when AVX is
25134 return ((TARGET_AVX && mode == OImode)
25135 || VALID_AVX256_REG_MODE (mode)
25136 || VALID_SSE_REG_MODE (mode)
25137 || VALID_SSE2_REG_MODE (mode)
25138 || VALID_MMX_REG_MODE (mode)
25139 || VALID_MMX_REG_MODE_3DNOW (mode));
25141 if (MMX_REGNO_P (regno))
25143 /* We implement the move patterns for 3DNOW modes even in MMX mode,
25144 so if the register is available at all, then we can move data of
25145 the given mode into or out of it. */
25146 return (VALID_MMX_REG_MODE (mode)
25147 || VALID_MMX_REG_MODE_3DNOW (mode));
25150 if (mode == QImode)
25152 /* Take care for QImode values - they can be in non-QI regs,
25153 but then they do cause partial register stalls. */
25154 if (regno < 4 || TARGET_64BIT)
25156 if (!TARGET_PARTIAL_REG_STALL)
25158 return reload_in_progress || reload_completed;
25160 /* We handle both integer and floats in the general purpose registers. */
25161 else if (VALID_INT_MODE_P (mode))
25163 else if (VALID_FP_MODE_P (mode))
25165 else if (VALID_DFP_MODE_P (mode))
25167 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
25168 on to use that value in smaller contexts, this can easily force a
25169 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
25170 supporting DImode, allow it. */
25171 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25177 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
25178 tieable integer mode. */
25181 ix86_tieable_integer_mode_p (enum machine_mode mode)
25190 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25193 return TARGET_64BIT;
25200 /* Return true if MODE1 is accessible in a register that can hold MODE2
25201 without copying. That is, all register classes that can hold MODE2
25202 can also hold MODE1. */
25205 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25207 if (mode1 == mode2)
25210 if (ix86_tieable_integer_mode_p (mode1)
25211 && ix86_tieable_integer_mode_p (mode2))
25214 /* MODE2 being XFmode implies fp stack or general regs, which means we
25215 can tie any smaller floating point modes to it. Note that we do not
25216 tie this with TFmode. */
25217 if (mode2 == XFmode)
25218 return mode1 == SFmode || mode1 == DFmode;
25220 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25221 that we can tie it with SFmode. */
25222 if (mode2 == DFmode)
25223 return mode1 == SFmode;
25225 /* If MODE2 is only appropriate for an SSE register, then tie with
25226 any other mode acceptable to SSE registers. */
25227 if (GET_MODE_SIZE (mode2) == 16
25228 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25229 return (GET_MODE_SIZE (mode1) == 16
25230 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25232 /* If MODE2 is appropriate for an MMX register, then tie
25233 with any other mode acceptable to MMX registers. */
25234 if (GET_MODE_SIZE (mode2) == 8
25235 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25236 return (GET_MODE_SIZE (mode1) == 8
25237 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25242 /* Compute a (partial) cost for rtx X. Return true if the complete
25243 cost has been computed, and false if subexpressions should be
25244 scanned. In either case, *TOTAL contains the cost result. */
25247 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
25249 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25250 enum machine_mode mode = GET_MODE (x);
25258 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25260 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25262 else if (flag_pic && SYMBOLIC_CONST (x)
25264 || (!GET_CODE (x) != LABEL_REF
25265 && (GET_CODE (x) != SYMBOL_REF
25266 || !SYMBOL_REF_LOCAL_P (x)))))
25273 if (mode == VOIDmode)
25276 switch (standard_80387_constant_p (x))
25281 default: /* Other constants */
25286 /* Start with (MEM (SYMBOL_REF)), since that's where
25287 it'll probably end up. Add a penalty for size. */
25288 *total = (COSTS_N_INSNS (1)
25289 + (flag_pic != 0 && !TARGET_64BIT)
25290 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25296 /* The zero extensions is often completely free on x86_64, so make
25297 it as cheap as possible. */
25298 if (TARGET_64BIT && mode == DImode
25299 && GET_MODE (XEXP (x, 0)) == SImode)
25301 else if (TARGET_ZERO_EXTEND_WITH_AND)
25302 *total = ix86_cost->add;
25304 *total = ix86_cost->movzx;
25308 *total = ix86_cost->movsx;
25312 if (CONST_INT_P (XEXP (x, 1))
25313 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25315 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25318 *total = ix86_cost->add;
25321 if ((value == 2 || value == 3)
25322 && ix86_cost->lea <= ix86_cost->shift_const)
25324 *total = ix86_cost->lea;
25334 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
25336 if (CONST_INT_P (XEXP (x, 1)))
25338 if (INTVAL (XEXP (x, 1)) > 32)
25339 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
25341 *total = ix86_cost->shift_const * 2;
25345 if (GET_CODE (XEXP (x, 1)) == AND)
25346 *total = ix86_cost->shift_var * 2;
25348 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
25353 if (CONST_INT_P (XEXP (x, 1)))
25354 *total = ix86_cost->shift_const;
25356 *total = ix86_cost->shift_var;
25361 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25363 /* ??? SSE scalar cost should be used here. */
25364 *total = ix86_cost->fmul;
25367 else if (X87_FLOAT_MODE_P (mode))
25369 *total = ix86_cost->fmul;
25372 else if (FLOAT_MODE_P (mode))
25374 /* ??? SSE vector cost should be used here. */
25375 *total = ix86_cost->fmul;
25380 rtx op0 = XEXP (x, 0);
25381 rtx op1 = XEXP (x, 1);
25383 if (CONST_INT_P (XEXP (x, 1)))
25385 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25386 for (nbits = 0; value != 0; value &= value - 1)
25390 /* This is arbitrary. */
25393 /* Compute costs correctly for widening multiplication. */
25394 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
25395 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
25396 == GET_MODE_SIZE (mode))
25398 int is_mulwiden = 0;
25399 enum machine_mode inner_mode = GET_MODE (op0);
25401 if (GET_CODE (op0) == GET_CODE (op1))
25402 is_mulwiden = 1, op1 = XEXP (op1, 0);
25403 else if (CONST_INT_P (op1))
25405 if (GET_CODE (op0) == SIGN_EXTEND)
25406 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
25409 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
25413 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
25416 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
25417 + nbits * ix86_cost->mult_bit
25418 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
25427 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25428 /* ??? SSE cost should be used here. */
25429 *total = ix86_cost->fdiv;
25430 else if (X87_FLOAT_MODE_P (mode))
25431 *total = ix86_cost->fdiv;
25432 else if (FLOAT_MODE_P (mode))
25433 /* ??? SSE vector cost should be used here. */
25434 *total = ix86_cost->fdiv;
25436 *total = ix86_cost->divide[MODE_INDEX (mode)];
25440 if (GET_MODE_CLASS (mode) == MODE_INT
25441 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
25443 if (GET_CODE (XEXP (x, 0)) == PLUS
25444 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
25445 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
25446 && CONSTANT_P (XEXP (x, 1)))
25448 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
25449 if (val == 2 || val == 4 || val == 8)
25451 *total = ix86_cost->lea;
25452 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
25453 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
25455 *total += rtx_cost (XEXP (x, 1), outer_code);
25459 else if (GET_CODE (XEXP (x, 0)) == MULT
25460 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
25462 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
25463 if (val == 2 || val == 4 || val == 8)
25465 *total = ix86_cost->lea;
25466 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
25467 *total += rtx_cost (XEXP (x, 1), outer_code);
25471 else if (GET_CODE (XEXP (x, 0)) == PLUS)
25473 *total = ix86_cost->lea;
25474 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
25475 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
25476 *total += rtx_cost (XEXP (x, 1), outer_code);
25483 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25485 /* ??? SSE cost should be used here. */
25486 *total = ix86_cost->fadd;
25489 else if (X87_FLOAT_MODE_P (mode))
25491 *total = ix86_cost->fadd;
25494 else if (FLOAT_MODE_P (mode))
25496 /* ??? SSE vector cost should be used here. */
25497 *total = ix86_cost->fadd;
25505 if (!TARGET_64BIT && mode == DImode)
25507 *total = (ix86_cost->add * 2
25508 + (rtx_cost (XEXP (x, 0), outer_code)
25509 << (GET_MODE (XEXP (x, 0)) != DImode))
25510 + (rtx_cost (XEXP (x, 1), outer_code)
25511 << (GET_MODE (XEXP (x, 1)) != DImode)));
25517 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25519 /* ??? SSE cost should be used here. */
25520 *total = ix86_cost->fchs;
25523 else if (X87_FLOAT_MODE_P (mode))
25525 *total = ix86_cost->fchs;
25528 else if (FLOAT_MODE_P (mode))
25530 /* ??? SSE vector cost should be used here. */
25531 *total = ix86_cost->fchs;
25537 if (!TARGET_64BIT && mode == DImode)
25538 *total = ix86_cost->add * 2;
25540 *total = ix86_cost->add;
25544 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
25545 && XEXP (XEXP (x, 0), 1) == const1_rtx
25546 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
25547 && XEXP (x, 1) == const0_rtx)
25549 /* This kind of construct is implemented using test[bwl].
25550 Treat it as if we had an AND. */
25551 *total = (ix86_cost->add
25552 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
25553 + rtx_cost (const1_rtx, outer_code));
25559 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
25564 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25565 /* ??? SSE cost should be used here. */
25566 *total = ix86_cost->fabs;
25567 else if (X87_FLOAT_MODE_P (mode))
25568 *total = ix86_cost->fabs;
25569 else if (FLOAT_MODE_P (mode))
25570 /* ??? SSE vector cost should be used here. */
25571 *total = ix86_cost->fabs;
25575 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25576 /* ??? SSE cost should be used here. */
25577 *total = ix86_cost->fsqrt;
25578 else if (X87_FLOAT_MODE_P (mode))
25579 *total = ix86_cost->fsqrt;
25580 else if (FLOAT_MODE_P (mode))
25581 /* ??? SSE vector cost should be used here. */
25582 *total = ix86_cost->fsqrt;
25586 if (XINT (x, 1) == UNSPEC_TP)
25597 static int current_machopic_label_num;
25599 /* Given a symbol name and its associated stub, write out the
25600 definition of the stub. */
25603 machopic_output_stub (FILE *file, const char *symb, const char *stub)
25605 unsigned int length;
25606 char *binder_name, *symbol_name, lazy_ptr_name[32];
25607 int label = ++current_machopic_label_num;
25609 /* For 64-bit we shouldn't get here. */
25610 gcc_assert (!TARGET_64BIT);
25612 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
25613 symb = (*targetm.strip_name_encoding) (symb);
25615 length = strlen (stub);
25616 binder_name = XALLOCAVEC (char, length + 32);
25617 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
25619 length = strlen (symb);
25620 symbol_name = XALLOCAVEC (char, length + 32);
25621 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
25623 sprintf (lazy_ptr_name, "L%d$lz", label);
25626 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
25628 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
25630 fprintf (file, "%s:\n", stub);
25631 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25635 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
25636 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
25637 fprintf (file, "\tjmp\t*%%edx\n");
25640 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
25642 fprintf (file, "%s:\n", binder_name);
25646 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
25647 fprintf (file, "\tpushl\t%%eax\n");
25650 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
25652 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
25654 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
25655 fprintf (file, "%s:\n", lazy_ptr_name);
25656 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25657 fprintf (file, "\t.long %s\n", binder_name);
25661 darwin_x86_file_end (void)
25663 darwin_file_end ();
25666 #endif /* TARGET_MACHO */
25668 /* Order the registers for register allocator. */
25671 x86_order_regs_for_local_alloc (void)
25676 /* First allocate the local general purpose registers. */
25677 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25678 if (GENERAL_REGNO_P (i) && call_used_regs[i])
25679 reg_alloc_order [pos++] = i;
25681 /* Global general purpose registers. */
25682 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25683 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
25684 reg_alloc_order [pos++] = i;
25686 /* x87 registers come first in case we are doing FP math
25688 if (!TARGET_SSE_MATH)
25689 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25690 reg_alloc_order [pos++] = i;
25692 /* SSE registers. */
25693 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
25694 reg_alloc_order [pos++] = i;
25695 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
25696 reg_alloc_order [pos++] = i;
25698 /* x87 registers. */
25699 if (TARGET_SSE_MATH)
25700 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25701 reg_alloc_order [pos++] = i;
25703 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
25704 reg_alloc_order [pos++] = i;
25706 /* Initialize the rest of array as we do not allocate some registers
25708 while (pos < FIRST_PSEUDO_REGISTER)
25709 reg_alloc_order [pos++] = 0;
25712 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
25713 struct attribute_spec.handler. */
25715 ix86_handle_abi_attribute (tree *node, tree name,
25716 tree args ATTRIBUTE_UNUSED,
25717 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25719 if (TREE_CODE (*node) != FUNCTION_TYPE
25720 && TREE_CODE (*node) != METHOD_TYPE
25721 && TREE_CODE (*node) != FIELD_DECL
25722 && TREE_CODE (*node) != TYPE_DECL)
25724 warning (OPT_Wattributes, "%qs attribute only applies to functions",
25725 IDENTIFIER_POINTER (name));
25726 *no_add_attrs = true;
25731 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
25732 IDENTIFIER_POINTER (name));
25733 *no_add_attrs = true;
25737 /* Can combine regparm with all attributes but fastcall. */
25738 if (is_attribute_p ("ms_abi", name))
25740 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
25742 error ("ms_abi and sysv_abi attributes are not compatible");
25747 else if (is_attribute_p ("sysv_abi", name))
25749 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
25751 error ("ms_abi and sysv_abi attributes are not compatible");
25760 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
25761 struct attribute_spec.handler. */
25763 ix86_handle_struct_attribute (tree *node, tree name,
25764 tree args ATTRIBUTE_UNUSED,
25765 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25768 if (DECL_P (*node))
25770 if (TREE_CODE (*node) == TYPE_DECL)
25771 type = &TREE_TYPE (*node);
25776 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
25777 || TREE_CODE (*type) == UNION_TYPE)))
25779 warning (OPT_Wattributes, "%qs attribute ignored",
25780 IDENTIFIER_POINTER (name));
25781 *no_add_attrs = true;
25784 else if ((is_attribute_p ("ms_struct", name)
25785 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
25786 || ((is_attribute_p ("gcc_struct", name)
25787 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
25789 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
25790 IDENTIFIER_POINTER (name));
25791 *no_add_attrs = true;
25798 ix86_ms_bitfield_layout_p (const_tree record_type)
25800 return (TARGET_MS_BITFIELD_LAYOUT &&
25801 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
25802 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
25805 /* Returns an expression indicating where the this parameter is
25806 located on entry to the FUNCTION. */
25809 x86_this_parameter (tree function)
25811 tree type = TREE_TYPE (function);
25812 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
25817 const int *parm_regs;
25819 if (ix86_function_type_abi (type) == MS_ABI)
25820 parm_regs = x86_64_ms_abi_int_parameter_registers;
25822 parm_regs = x86_64_int_parameter_registers;
25823 return gen_rtx_REG (DImode, parm_regs[aggr]);
25826 nregs = ix86_function_regparm (type, function);
25828 if (nregs > 0 && !stdarg_p (type))
25832 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
25833 regno = aggr ? DX_REG : CX_REG;
25841 return gen_rtx_MEM (SImode,
25842 plus_constant (stack_pointer_rtx, 4));
25845 return gen_rtx_REG (SImode, regno);
25848 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
25851 /* Determine whether x86_output_mi_thunk can succeed. */
25854 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
25855 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
25856 HOST_WIDE_INT vcall_offset, const_tree function)
25858 /* 64-bit can handle anything. */
25862 /* For 32-bit, everything's fine if we have one free register. */
25863 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
25866 /* Need a free register for vcall_offset. */
25870 /* Need a free register for GOT references. */
25871 if (flag_pic && !(*targetm.binds_local_p) (function))
25874 /* Otherwise ok. */
25878 /* Output the assembler code for a thunk function. THUNK_DECL is the
25879 declaration for the thunk function itself, FUNCTION is the decl for
25880 the target function. DELTA is an immediate constant offset to be
25881 added to THIS. If VCALL_OFFSET is nonzero, the word at
25882 *(*this + vcall_offset) should be added to THIS. */
25885 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
25886 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
25887 HOST_WIDE_INT vcall_offset, tree function)
25890 rtx this_param = x86_this_parameter (function);
25893 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
25894 pull it in now and let DELTA benefit. */
25895 if (REG_P (this_param))
25896 this_reg = this_param;
25897 else if (vcall_offset)
25899 /* Put the this parameter into %eax. */
25900 xops[0] = this_param;
25901 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
25902 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
25905 this_reg = NULL_RTX;
25907 /* Adjust the this parameter by a fixed constant. */
25910 xops[0] = GEN_INT (delta);
25911 xops[1] = this_reg ? this_reg : this_param;
25914 if (!x86_64_general_operand (xops[0], DImode))
25916 tmp = gen_rtx_REG (DImode, R10_REG);
25918 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
25920 xops[1] = this_param;
25922 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
25925 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
25928 /* Adjust the this parameter by a value stored in the vtable. */
25932 tmp = gen_rtx_REG (DImode, R10_REG);
25935 int tmp_regno = CX_REG;
25936 if (lookup_attribute ("fastcall",
25937 TYPE_ATTRIBUTES (TREE_TYPE (function))))
25938 tmp_regno = AX_REG;
25939 tmp = gen_rtx_REG (SImode, tmp_regno);
25942 xops[0] = gen_rtx_MEM (Pmode, this_reg);
25944 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
25946 /* Adjust the this parameter. */
25947 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
25948 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
25950 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
25951 xops[0] = GEN_INT (vcall_offset);
25953 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
25954 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
25956 xops[1] = this_reg;
25957 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
25960 /* If necessary, drop THIS back to its stack slot. */
25961 if (this_reg && this_reg != this_param)
25963 xops[0] = this_reg;
25964 xops[1] = this_param;
25965 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
25968 xops[0] = XEXP (DECL_RTL (function), 0);
25971 if (!flag_pic || (*targetm.binds_local_p) (function))
25972 output_asm_insn ("jmp\t%P0", xops);
25973 /* All thunks should be in the same object as their target,
25974 and thus binds_local_p should be true. */
25975 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
25976 gcc_unreachable ();
25979 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
25980 tmp = gen_rtx_CONST (Pmode, tmp);
25981 tmp = gen_rtx_MEM (QImode, tmp);
25983 output_asm_insn ("jmp\t%A0", xops);
25988 if (!flag_pic || (*targetm.binds_local_p) (function))
25989 output_asm_insn ("jmp\t%P0", xops);
25994 rtx sym_ref = XEXP (DECL_RTL (function), 0);
25995 tmp = (gen_rtx_SYMBOL_REF
25997 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
25998 tmp = gen_rtx_MEM (QImode, tmp);
26000 output_asm_insn ("jmp\t%0", xops);
26003 #endif /* TARGET_MACHO */
26005 tmp = gen_rtx_REG (SImode, CX_REG);
26006 output_set_got (tmp, NULL_RTX);
26009 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26010 output_asm_insn ("jmp\t{*}%1", xops);
26016 x86_file_start (void)
26018 default_file_start ();
26020 darwin_file_start ();
26022 if (X86_FILE_START_VERSION_DIRECTIVE)
26023 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26024 if (X86_FILE_START_FLTUSED)
26025 fputs ("\t.global\t__fltused\n", asm_out_file);
26026 if (ix86_asm_dialect == ASM_INTEL)
26027 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26031 x86_field_alignment (tree field, int computed)
26033 enum machine_mode mode;
26034 tree type = TREE_TYPE (field);
26036 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26038 mode = TYPE_MODE (strip_array_types (type));
26039 if (mode == DFmode || mode == DCmode
26040 || GET_MODE_CLASS (mode) == MODE_INT
26041 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26042 return MIN (32, computed);
26046 /* Output assembler code to FILE to increment profiler label # LABELNO
26047 for profiling a function entry. */
26049 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26053 #ifndef NO_PROFILE_COUNTERS
26054 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
26057 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26058 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26060 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26064 #ifndef NO_PROFILE_COUNTERS
26065 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26066 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26068 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26072 #ifndef NO_PROFILE_COUNTERS
26073 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26074 PROFILE_COUNT_REGISTER);
26076 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26080 /* We don't have exact information about the insn sizes, but we may assume
26081 quite safely that we are informed about all 1 byte insns and memory
26082 address sizes. This is enough to eliminate unnecessary padding in
26086 min_insn_size (rtx insn)
26090 if (!INSN_P (insn) || !active_insn_p (insn))
26093 /* Discard alignments we've emit and jump instructions. */
26094 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26095 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26098 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26099 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26102 /* Important case - calls are always 5 bytes.
26103 It is common to have many calls in the row. */
26105 && symbolic_reference_mentioned_p (PATTERN (insn))
26106 && !SIBLING_CALL_P (insn))
26108 if (get_attr_length (insn) <= 1)
26111 /* For normal instructions we may rely on the sizes of addresses
26112 and the presence of symbol to require 4 bytes of encoding.
26113 This is not the case for jumps where references are PC relative. */
26114 if (!JUMP_P (insn))
26116 l = get_attr_length_address (insn);
26117 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26126 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26130 ix86_avoid_jump_misspredicts (void)
26132 rtx insn, start = get_insns ();
26133 int nbytes = 0, njumps = 0;
26136 /* Look for all minimal intervals of instructions containing 4 jumps.
26137 The intervals are bounded by START and INSN. NBYTES is the total
26138 size of instructions in the interval including INSN and not including
26139 START. When the NBYTES is smaller than 16 bytes, it is possible
26140 that the end of START and INSN ends up in the same 16byte page.
26142 The smallest offset in the page INSN can start is the case where START
26143 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
26144 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
26146 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26149 nbytes += min_insn_size (insn);
26151 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
26152 INSN_UID (insn), min_insn_size (insn));
26154 && GET_CODE (PATTERN (insn)) != ADDR_VEC
26155 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26163 start = NEXT_INSN (start);
26164 if ((JUMP_P (start)
26165 && GET_CODE (PATTERN (start)) != ADDR_VEC
26166 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26168 njumps--, isjump = 1;
26171 nbytes -= min_insn_size (start);
26173 gcc_assert (njumps >= 0);
26175 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26176 INSN_UID (start), INSN_UID (insn), nbytes);
26178 if (njumps == 3 && isjump && nbytes < 16)
26180 int padsize = 15 - nbytes + min_insn_size (insn);
26183 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26184 INSN_UID (insn), padsize);
26185 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
26190 /* AMD Athlon works faster
26191 when RET is not destination of conditional jump or directly preceded
26192 by other jump instruction. We avoid the penalty by inserting NOP just
26193 before the RET instructions in such cases. */
26195 ix86_pad_returns (void)
26200 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26202 basic_block bb = e->src;
26203 rtx ret = BB_END (bb);
26205 bool replace = false;
26207 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26208 || optimize_bb_for_size_p (bb))
26210 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26211 if (active_insn_p (prev) || LABEL_P (prev))
26213 if (prev && LABEL_P (prev))
26218 FOR_EACH_EDGE (e, ei, bb->preds)
26219 if (EDGE_FREQUENCY (e) && e->src->index >= 0
26220 && !(e->flags & EDGE_FALLTHRU))
26225 prev = prev_active_insn (ret);
26227 && ((JUMP_P (prev) && any_condjump_p (prev))
26230 /* Empty functions get branch mispredict even when the jump destination
26231 is not visible to us. */
26232 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26237 emit_insn_before (gen_return_internal_long (), ret);
26243 /* Implement machine specific optimizations. We implement padding of returns
26244 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
26248 if (TARGET_PAD_RETURNS && optimize
26249 && optimize_function_for_speed_p (cfun))
26250 ix86_pad_returns ();
26251 if (TARGET_FOUR_JUMP_LIMIT && optimize
26252 && optimize_function_for_speed_p (cfun))
26253 ix86_avoid_jump_misspredicts ();
26256 /* Return nonzero when QImode register that must be represented via REX prefix
26259 x86_extended_QIreg_mentioned_p (rtx insn)
26262 extract_insn_cached (insn);
26263 for (i = 0; i < recog_data.n_operands; i++)
26264 if (REG_P (recog_data.operand[i])
26265 && REGNO (recog_data.operand[i]) >= 4)
26270 /* Return nonzero when P points to register encoded via REX prefix.
26271 Called via for_each_rtx. */
26273 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26275 unsigned int regno;
26278 regno = REGNO (*p);
26279 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26282 /* Return true when INSN mentions register that must be encoded using REX
26285 x86_extended_reg_mentioned_p (rtx insn)
26287 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26288 extended_reg_mentioned_1, NULL);
26291 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
26292 optabs would emit if we didn't have TFmode patterns. */
26295 x86_emit_floatuns (rtx operands[2])
26297 rtx neglab, donelab, i0, i1, f0, in, out;
26298 enum machine_mode mode, inmode;
26300 inmode = GET_MODE (operands[1]);
26301 gcc_assert (inmode == SImode || inmode == DImode);
26304 in = force_reg (inmode, operands[1]);
26305 mode = GET_MODE (out);
26306 neglab = gen_label_rtx ();
26307 donelab = gen_label_rtx ();
26308 f0 = gen_reg_rtx (mode);
26310 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26312 expand_float (out, in, 0);
26314 emit_jump_insn (gen_jump (donelab));
26317 emit_label (neglab);
26319 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26321 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26323 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26325 expand_float (f0, i0, 0);
26327 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
26329 emit_label (donelab);
26332 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26333 with all elements equal to VAR. Return true if successful. */
26336 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
26337 rtx target, rtx val)
26339 enum machine_mode hmode, smode, wsmode, wvmode;
26354 val = force_reg (GET_MODE_INNER (mode), val);
26355 x = gen_rtx_VEC_DUPLICATE (mode, val);
26356 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26362 if (TARGET_SSE || TARGET_3DNOW_A)
26364 val = gen_lowpart (SImode, val);
26365 x = gen_rtx_TRUNCATE (HImode, val);
26366 x = gen_rtx_VEC_DUPLICATE (mode, x);
26367 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26389 /* Extend HImode to SImode using a paradoxical SUBREG. */
26390 tmp1 = gen_reg_rtx (SImode);
26391 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26392 /* Insert the SImode value as low element of V4SImode vector. */
26393 tmp2 = gen_reg_rtx (V4SImode);
26394 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26395 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26396 CONST0_RTX (V4SImode),
26398 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26399 /* Cast the V4SImode vector back to a V8HImode vector. */
26400 tmp1 = gen_reg_rtx (V8HImode);
26401 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
26402 /* Duplicate the low short through the whole low SImode word. */
26403 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
26404 /* Cast the V8HImode vector back to a V4SImode vector. */
26405 tmp2 = gen_reg_rtx (V4SImode);
26406 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26407 /* Replicate the low element of the V4SImode vector. */
26408 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26409 /* Cast the V2SImode back to V8HImode, and store in target. */
26410 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
26421 /* Extend QImode to SImode using a paradoxical SUBREG. */
26422 tmp1 = gen_reg_rtx (SImode);
26423 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26424 /* Insert the SImode value as low element of V4SImode vector. */
26425 tmp2 = gen_reg_rtx (V4SImode);
26426 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
26427 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
26428 CONST0_RTX (V4SImode),
26430 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
26431 /* Cast the V4SImode vector back to a V16QImode vector. */
26432 tmp1 = gen_reg_rtx (V16QImode);
26433 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
26434 /* Duplicate the low byte through the whole low SImode word. */
26435 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26436 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
26437 /* Cast the V16QImode vector back to a V4SImode vector. */
26438 tmp2 = gen_reg_rtx (V4SImode);
26439 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
26440 /* Replicate the low element of the V4SImode vector. */
26441 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
26442 /* Cast the V2SImode back to V16QImode, and store in target. */
26443 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
26451 /* Replicate the value once into the next wider mode and recurse. */
26452 val = convert_modes (wsmode, smode, val, true);
26453 x = expand_simple_binop (wsmode, ASHIFT, val,
26454 GEN_INT (GET_MODE_BITSIZE (smode)),
26455 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26456 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
26458 x = gen_reg_rtx (wvmode);
26459 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
26460 gcc_unreachable ();
26461 emit_move_insn (target, gen_lowpart (mode, x));
26484 rtx tmp = gen_reg_rtx (hmode);
26485 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
26486 emit_insn (gen_rtx_SET (VOIDmode, target,
26487 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
26496 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26497 whose ONE_VAR element is VAR, and other elements are zero. Return true
26501 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
26502 rtx target, rtx var, int one_var)
26504 enum machine_mode vsimode;
26507 bool use_vector_set = false;
26512 /* For SSE4.1, we normally use vector set. But if the second
26513 element is zero and inter-unit moves are OK, we use movq
26515 use_vector_set = (TARGET_64BIT
26517 && !(TARGET_INTER_UNIT_MOVES
26523 use_vector_set = TARGET_SSE4_1;
26526 use_vector_set = TARGET_SSE2;
26529 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
26537 use_vector_set = TARGET_AVX;
26543 if (use_vector_set)
26545 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
26546 var = force_reg (GET_MODE_INNER (mode), var);
26547 ix86_expand_vector_set (mmx_ok, target, var, one_var);
26563 var = force_reg (GET_MODE_INNER (mode), var);
26564 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
26565 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26570 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
26571 new_target = gen_reg_rtx (mode);
26573 new_target = target;
26574 var = force_reg (GET_MODE_INNER (mode), var);
26575 x = gen_rtx_VEC_DUPLICATE (mode, var);
26576 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
26577 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
26580 /* We need to shuffle the value to the correct position, so
26581 create a new pseudo to store the intermediate result. */
26583 /* With SSE2, we can use the integer shuffle insns. */
26584 if (mode != V4SFmode && TARGET_SSE2)
26586 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
26588 GEN_INT (one_var == 1 ? 0 : 1),
26589 GEN_INT (one_var == 2 ? 0 : 1),
26590 GEN_INT (one_var == 3 ? 0 : 1)));
26591 if (target != new_target)
26592 emit_move_insn (target, new_target);
26596 /* Otherwise convert the intermediate result to V4SFmode and
26597 use the SSE1 shuffle instructions. */
26598 if (mode != V4SFmode)
26600 tmp = gen_reg_rtx (V4SFmode);
26601 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
26606 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
26608 GEN_INT (one_var == 1 ? 0 : 1),
26609 GEN_INT (one_var == 2 ? 0+4 : 1+4),
26610 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
26612 if (mode != V4SFmode)
26613 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
26614 else if (tmp != target)
26615 emit_move_insn (target, tmp);
26617 else if (target != new_target)
26618 emit_move_insn (target, new_target);
26623 vsimode = V4SImode;
26629 vsimode = V2SImode;
26635 /* Zero extend the variable element to SImode and recurse. */
26636 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
26638 x = gen_reg_rtx (vsimode);
26639 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
26641 gcc_unreachable ();
26643 emit_move_insn (target, gen_lowpart (mode, x));
26651 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26652 consisting of the values in VALS. It is known that all elements
26653 except ONE_VAR are constants. Return true if successful. */
26656 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
26657 rtx target, rtx vals, int one_var)
26659 rtx var = XVECEXP (vals, 0, one_var);
26660 enum machine_mode wmode;
26663 const_vec = copy_rtx (vals);
26664 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
26665 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
26673 /* For the two element vectors, it's just as easy to use
26674 the general case. */
26698 /* There's no way to set one QImode entry easily. Combine
26699 the variable value with its adjacent constant value, and
26700 promote to an HImode set. */
26701 x = XVECEXP (vals, 0, one_var ^ 1);
26704 var = convert_modes (HImode, QImode, var, true);
26705 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
26706 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26707 x = GEN_INT (INTVAL (x) & 0xff);
26711 var = convert_modes (HImode, QImode, var, true);
26712 x = gen_int_mode (INTVAL (x) << 8, HImode);
26714 if (x != const0_rtx)
26715 var = expand_simple_binop (HImode, IOR, var, x, var,
26716 1, OPTAB_LIB_WIDEN);
26718 x = gen_reg_rtx (wmode);
26719 emit_move_insn (x, gen_lowpart (wmode, const_vec));
26720 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
26722 emit_move_insn (target, gen_lowpart (mode, x));
26729 emit_move_insn (target, const_vec);
26730 ix86_expand_vector_set (mmx_ok, target, var, one_var);
26734 /* A subroutine of ix86_expand_vector_init_general. Use vector
26735 concatenate to handle the most general case: all values variable,
26736 and none identical. */
26739 ix86_expand_vector_init_concat (enum machine_mode mode,
26740 rtx target, rtx *ops, int n)
26742 enum machine_mode cmode, hmode = VOIDmode;
26743 rtx first[8], second[4];
26783 gcc_unreachable ();
26786 if (!register_operand (ops[1], cmode))
26787 ops[1] = force_reg (cmode, ops[1]);
26788 if (!register_operand (ops[0], cmode))
26789 ops[0] = force_reg (cmode, ops[0]);
26790 emit_insn (gen_rtx_SET (VOIDmode, target,
26791 gen_rtx_VEC_CONCAT (mode, ops[0],
26811 gcc_unreachable ();
26827 gcc_unreachable ();
26832 /* FIXME: We process inputs backward to help RA. PR 36222. */
26835 for (; i > 0; i -= 2, j--)
26837 first[j] = gen_reg_rtx (cmode);
26838 v = gen_rtvec (2, ops[i - 1], ops[i]);
26839 ix86_expand_vector_init (false, first[j],
26840 gen_rtx_PARALLEL (cmode, v));
26846 gcc_assert (hmode != VOIDmode);
26847 for (i = j = 0; i < n; i += 2, j++)
26849 second[j] = gen_reg_rtx (hmode);
26850 ix86_expand_vector_init_concat (hmode, second [j],
26854 ix86_expand_vector_init_concat (mode, target, second, n);
26857 ix86_expand_vector_init_concat (mode, target, first, n);
26861 gcc_unreachable ();
26865 /* A subroutine of ix86_expand_vector_init_general. Use vector
26866 interleave to handle the most general case: all values variable,
26867 and none identical. */
26870 ix86_expand_vector_init_interleave (enum machine_mode mode,
26871 rtx target, rtx *ops, int n)
26873 enum machine_mode first_imode, second_imode, third_imode;
26876 rtx (*gen_load_even) (rtx, rtx, rtx);
26877 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
26878 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
26883 gen_load_even = gen_vec_setv8hi;
26884 gen_interleave_first_low = gen_vec_interleave_lowv4si;
26885 gen_interleave_second_low = gen_vec_interleave_lowv2di;
26886 first_imode = V4SImode;
26887 second_imode = V2DImode;
26888 third_imode = VOIDmode;
26891 gen_load_even = gen_vec_setv16qi;
26892 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
26893 gen_interleave_second_low = gen_vec_interleave_lowv4si;
26894 first_imode = V8HImode;
26895 second_imode = V4SImode;
26896 third_imode = V2DImode;
26899 gcc_unreachable ();
26902 for (i = 0; i < n; i++)
26904 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
26905 op0 = gen_reg_rtx (SImode);
26906 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
26908 /* Insert the SImode value as low element of V4SImode vector. */
26909 op1 = gen_reg_rtx (V4SImode);
26910 op0 = gen_rtx_VEC_MERGE (V4SImode,
26911 gen_rtx_VEC_DUPLICATE (V4SImode,
26913 CONST0_RTX (V4SImode),
26915 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
26917 /* Cast the V4SImode vector back to a vector in orignal mode. */
26918 op0 = gen_reg_rtx (mode);
26919 emit_move_insn (op0, gen_lowpart (mode, op1));
26921 /* Load even elements into the second positon. */
26922 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
26925 /* Cast vector to FIRST_IMODE vector. */
26926 ops[i] = gen_reg_rtx (first_imode);
26927 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
26930 /* Interleave low FIRST_IMODE vectors. */
26931 for (i = j = 0; i < n; i += 2, j++)
26933 op0 = gen_reg_rtx (first_imode);
26934 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
26936 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
26937 ops[j] = gen_reg_rtx (second_imode);
26938 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
26941 /* Interleave low SECOND_IMODE vectors. */
26942 switch (second_imode)
26945 for (i = j = 0; i < n / 2; i += 2, j++)
26947 op0 = gen_reg_rtx (second_imode);
26948 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
26951 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
26953 ops[j] = gen_reg_rtx (third_imode);
26954 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
26956 second_imode = V2DImode;
26957 gen_interleave_second_low = gen_vec_interleave_lowv2di;
26961 op0 = gen_reg_rtx (second_imode);
26962 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
26965 /* Cast the SECOND_IMODE vector back to a vector on original
26967 emit_insn (gen_rtx_SET (VOIDmode, target,
26968 gen_lowpart (mode, op0)));
26972 gcc_unreachable ();
26976 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
26977 all values variable, and none identical. */
26980 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
26981 rtx target, rtx vals)
26983 rtx ops[32], op0, op1;
26984 enum machine_mode half_mode = VOIDmode;
26991 if (!mmx_ok && !TARGET_SSE)
27003 n = GET_MODE_NUNITS (mode);
27004 for (i = 0; i < n; i++)
27005 ops[i] = XVECEXP (vals, 0, i);
27006 ix86_expand_vector_init_concat (mode, target, ops, n);
27010 half_mode = V16QImode;
27014 half_mode = V8HImode;
27018 n = GET_MODE_NUNITS (mode);
27019 for (i = 0; i < n; i++)
27020 ops[i] = XVECEXP (vals, 0, i);
27021 op0 = gen_reg_rtx (half_mode);
27022 op1 = gen_reg_rtx (half_mode);
27023 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27025 ix86_expand_vector_init_interleave (half_mode, op1,
27026 &ops [n >> 1], n >> 2);
27027 emit_insn (gen_rtx_SET (VOIDmode, target,
27028 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27032 if (!TARGET_SSE4_1)
27040 n = GET_MODE_NUNITS (mode);
27041 for (i = 0; i < n; i++)
27042 ops[i] = XVECEXP (vals, 0, i);
27043 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27051 gcc_unreachable ();
27055 int i, j, n_elts, n_words, n_elt_per_word;
27056 enum machine_mode inner_mode;
27057 rtx words[4], shift;
27059 inner_mode = GET_MODE_INNER (mode);
27060 n_elts = GET_MODE_NUNITS (mode);
27061 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27062 n_elt_per_word = n_elts / n_words;
27063 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27065 for (i = 0; i < n_words; ++i)
27067 rtx word = NULL_RTX;
27069 for (j = 0; j < n_elt_per_word; ++j)
27071 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27072 elt = convert_modes (word_mode, inner_mode, elt, true);
27078 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27079 word, 1, OPTAB_LIB_WIDEN);
27080 word = expand_simple_binop (word_mode, IOR, word, elt,
27081 word, 1, OPTAB_LIB_WIDEN);
27089 emit_move_insn (target, gen_lowpart (mode, words[0]));
27090 else if (n_words == 2)
27092 rtx tmp = gen_reg_rtx (mode);
27093 emit_clobber (tmp);
27094 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27095 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27096 emit_move_insn (target, tmp);
27098 else if (n_words == 4)
27100 rtx tmp = gen_reg_rtx (V4SImode);
27101 gcc_assert (word_mode == SImode);
27102 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27103 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27104 emit_move_insn (target, gen_lowpart (mode, tmp));
27107 gcc_unreachable ();
27111 /* Initialize vector TARGET via VALS. Suppress the use of MMX
27112 instructions unless MMX_OK is true. */
27115 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27117 enum machine_mode mode = GET_MODE (target);
27118 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27119 int n_elts = GET_MODE_NUNITS (mode);
27120 int n_var = 0, one_var = -1;
27121 bool all_same = true, all_const_zero = true;
27125 for (i = 0; i < n_elts; ++i)
27127 x = XVECEXP (vals, 0, i);
27128 if (!(CONST_INT_P (x)
27129 || GET_CODE (x) == CONST_DOUBLE
27130 || GET_CODE (x) == CONST_FIXED))
27131 n_var++, one_var = i;
27132 else if (x != CONST0_RTX (inner_mode))
27133 all_const_zero = false;
27134 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27138 /* Constants are best loaded from the constant pool. */
27141 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27145 /* If all values are identical, broadcast the value. */
27147 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27148 XVECEXP (vals, 0, 0)))
27151 /* Values where only one field is non-constant are best loaded from
27152 the pool and overwritten via move later. */
27156 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27157 XVECEXP (vals, 0, one_var),
27161 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27165 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27169 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27171 enum machine_mode mode = GET_MODE (target);
27172 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27173 enum machine_mode half_mode;
27174 bool use_vec_merge = false;
27176 static rtx (*gen_extract[6][2]) (rtx, rtx)
27178 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27179 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27180 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27181 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27182 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27183 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27185 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27187 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27188 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27189 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27190 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27191 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27192 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27202 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27203 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27205 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27207 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27208 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27214 use_vec_merge = TARGET_SSE4_1;
27222 /* For the two element vectors, we implement a VEC_CONCAT with
27223 the extraction of the other element. */
27225 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27226 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27229 op0 = val, op1 = tmp;
27231 op0 = tmp, op1 = val;
27233 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27234 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27239 use_vec_merge = TARGET_SSE4_1;
27246 use_vec_merge = true;
27250 /* tmp = target = A B C D */
27251 tmp = copy_to_reg (target);
27252 /* target = A A B B */
27253 emit_insn (gen_sse_unpcklps (target, target, target));
27254 /* target = X A B B */
27255 ix86_expand_vector_set (false, target, val, 0);
27256 /* target = A X C D */
27257 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27258 GEN_INT (1), GEN_INT (0),
27259 GEN_INT (2+4), GEN_INT (3+4)));
27263 /* tmp = target = A B C D */
27264 tmp = copy_to_reg (target);
27265 /* tmp = X B C D */
27266 ix86_expand_vector_set (false, tmp, val, 0);
27267 /* target = A B X D */
27268 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27269 GEN_INT (0), GEN_INT (1),
27270 GEN_INT (0+4), GEN_INT (3+4)));
27274 /* tmp = target = A B C D */
27275 tmp = copy_to_reg (target);
27276 /* tmp = X B C D */
27277 ix86_expand_vector_set (false, tmp, val, 0);
27278 /* target = A B X D */
27279 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27280 GEN_INT (0), GEN_INT (1),
27281 GEN_INT (2+4), GEN_INT (0+4)));
27285 gcc_unreachable ();
27290 use_vec_merge = TARGET_SSE4_1;
27294 /* Element 0 handled by vec_merge below. */
27297 use_vec_merge = true;
27303 /* With SSE2, use integer shuffles to swap element 0 and ELT,
27304 store into element 0, then shuffle them back. */
27308 order[0] = GEN_INT (elt);
27309 order[1] = const1_rtx;
27310 order[2] = const2_rtx;
27311 order[3] = GEN_INT (3);
27312 order[elt] = const0_rtx;
27314 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27315 order[1], order[2], order[3]));
27317 ix86_expand_vector_set (false, target, val, 0);
27319 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27320 order[1], order[2], order[3]));
27324 /* For SSE1, we have to reuse the V4SF code. */
27325 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
27326 gen_lowpart (SFmode, val), elt);
27331 use_vec_merge = TARGET_SSE2;
27334 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27338 use_vec_merge = TARGET_SSE4_1;
27345 half_mode = V16QImode;
27351 half_mode = V8HImode;
27357 half_mode = V4SImode;
27363 half_mode = V2DImode;
27369 half_mode = V4SFmode;
27375 half_mode = V2DFmode;
27381 /* Compute offset. */
27385 gcc_assert (i <= 1);
27387 /* Extract the half. */
27388 tmp = gen_reg_rtx (half_mode);
27389 emit_insn ((*gen_extract[j][i]) (tmp, target));
27391 /* Put val in tmp at elt. */
27392 ix86_expand_vector_set (false, tmp, val, elt);
27395 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
27404 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
27405 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
27406 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27410 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27412 emit_move_insn (mem, target);
27414 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27415 emit_move_insn (tmp, val);
27417 emit_move_insn (target, mem);
27422 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
27424 enum machine_mode mode = GET_MODE (vec);
27425 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27426 bool use_vec_extr = false;
27439 use_vec_extr = true;
27443 use_vec_extr = TARGET_SSE4_1;
27455 tmp = gen_reg_rtx (mode);
27456 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
27457 GEN_INT (elt), GEN_INT (elt),
27458 GEN_INT (elt+4), GEN_INT (elt+4)));
27462 tmp = gen_reg_rtx (mode);
27463 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
27467 gcc_unreachable ();
27470 use_vec_extr = true;
27475 use_vec_extr = TARGET_SSE4_1;
27489 tmp = gen_reg_rtx (mode);
27490 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
27491 GEN_INT (elt), GEN_INT (elt),
27492 GEN_INT (elt), GEN_INT (elt)));
27496 tmp = gen_reg_rtx (mode);
27497 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
27501 gcc_unreachable ();
27504 use_vec_extr = true;
27509 /* For SSE1, we have to reuse the V4SF code. */
27510 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
27511 gen_lowpart (V4SFmode, vec), elt);
27517 use_vec_extr = TARGET_SSE2;
27520 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27524 use_vec_extr = TARGET_SSE4_1;
27528 /* ??? Could extract the appropriate HImode element and shift. */
27535 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
27536 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
27538 /* Let the rtl optimizers know about the zero extension performed. */
27539 if (inner_mode == QImode || inner_mode == HImode)
27541 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
27542 target = gen_lowpart (SImode, target);
27545 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27549 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27551 emit_move_insn (mem, vec);
27553 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27554 emit_move_insn (target, tmp);
27558 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
27559 pattern to reduce; DEST is the destination; IN is the input vector. */
27562 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
27564 rtx tmp1, tmp2, tmp3;
27566 tmp1 = gen_reg_rtx (V4SFmode);
27567 tmp2 = gen_reg_rtx (V4SFmode);
27568 tmp3 = gen_reg_rtx (V4SFmode);
27570 emit_insn (gen_sse_movhlps (tmp1, in, in));
27571 emit_insn (fn (tmp2, tmp1, in));
27573 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
27574 GEN_INT (1), GEN_INT (1),
27575 GEN_INT (1+4), GEN_INT (1+4)));
27576 emit_insn (fn (dest, tmp2, tmp3));
27579 /* Target hook for scalar_mode_supported_p. */
27581 ix86_scalar_mode_supported_p (enum machine_mode mode)
27583 if (DECIMAL_FLOAT_MODE_P (mode))
27585 else if (mode == TFmode)
27588 return default_scalar_mode_supported_p (mode);
27591 /* Implements target hook vector_mode_supported_p. */
27593 ix86_vector_mode_supported_p (enum machine_mode mode)
27595 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
27597 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
27599 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
27601 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
27603 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
27608 /* Target hook for c_mode_for_suffix. */
27609 static enum machine_mode
27610 ix86_c_mode_for_suffix (char suffix)
27620 /* Worker function for TARGET_MD_ASM_CLOBBERS.
27622 We do this in the new i386 backend to maintain source compatibility
27623 with the old cc0-based compiler. */
27626 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
27627 tree inputs ATTRIBUTE_UNUSED,
27630 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
27632 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
27637 /* Implements target vector targetm.asm.encode_section_info. This
27638 is not used by netware. */
27640 static void ATTRIBUTE_UNUSED
27641 ix86_encode_section_info (tree decl, rtx rtl, int first)
27643 default_encode_section_info (decl, rtl, first);
27645 if (TREE_CODE (decl) == VAR_DECL
27646 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
27647 && ix86_in_large_data_p (decl))
27648 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
27651 /* Worker function for REVERSE_CONDITION. */
27654 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
27656 return (mode != CCFPmode && mode != CCFPUmode
27657 ? reverse_condition (code)
27658 : reverse_condition_maybe_unordered (code));
27661 /* Output code to perform an x87 FP register move, from OPERANDS[1]
27665 output_387_reg_move (rtx insn, rtx *operands)
27667 if (REG_P (operands[0]))
27669 if (REG_P (operands[1])
27670 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27672 if (REGNO (operands[0]) == FIRST_STACK_REG)
27673 return output_387_ffreep (operands, 0);
27674 return "fstp\t%y0";
27676 if (STACK_TOP_P (operands[0]))
27677 return "fld%z1\t%y1";
27680 else if (MEM_P (operands[0]))
27682 gcc_assert (REG_P (operands[1]));
27683 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27684 return "fstp%z0\t%y0";
27687 /* There is no non-popping store to memory for XFmode.
27688 So if we need one, follow the store with a load. */
27689 if (GET_MODE (operands[0]) == XFmode)
27690 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
27692 return "fst%z0\t%y0";
27699 /* Output code to perform a conditional jump to LABEL, if C2 flag in
27700 FP status register is set. */
27703 ix86_emit_fp_unordered_jump (rtx label)
27705 rtx reg = gen_reg_rtx (HImode);
27708 emit_insn (gen_x86_fnstsw_1 (reg));
27710 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
27712 emit_insn (gen_x86_sahf_1 (reg));
27714 temp = gen_rtx_REG (CCmode, FLAGS_REG);
27715 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
27719 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
27721 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27722 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
27725 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
27726 gen_rtx_LABEL_REF (VOIDmode, label),
27728 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
27730 emit_jump_insn (temp);
27731 predict_jump (REG_BR_PROB_BASE * 10 / 100);
27734 /* Output code to perform a log1p XFmode calculation. */
27736 void ix86_emit_i387_log1p (rtx op0, rtx op1)
27738 rtx label1 = gen_label_rtx ();
27739 rtx label2 = gen_label_rtx ();
27741 rtx tmp = gen_reg_rtx (XFmode);
27742 rtx tmp2 = gen_reg_rtx (XFmode);
27744 emit_insn (gen_absxf2 (tmp, op1));
27745 emit_insn (gen_cmpxf (tmp,
27746 CONST_DOUBLE_FROM_REAL_VALUE (
27747 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
27749 emit_jump_insn (gen_bge (label1));
27751 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
27752 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
27753 emit_jump (label2);
27755 emit_label (label1);
27756 emit_move_insn (tmp, CONST1_RTX (XFmode));
27757 emit_insn (gen_addxf3 (tmp, op1, tmp));
27758 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
27759 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
27761 emit_label (label2);
27764 /* Output code to perform a Newton-Rhapson approximation of a single precision
27765 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
27767 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
27769 rtx x0, x1, e0, e1, two;
27771 x0 = gen_reg_rtx (mode);
27772 e0 = gen_reg_rtx (mode);
27773 e1 = gen_reg_rtx (mode);
27774 x1 = gen_reg_rtx (mode);
27776 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
27778 if (VECTOR_MODE_P (mode))
27779 two = ix86_build_const_vector (SFmode, true, two);
27781 two = force_reg (mode, two);
27783 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
27785 /* x0 = rcp(b) estimate */
27786 emit_insn (gen_rtx_SET (VOIDmode, x0,
27787 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
27790 emit_insn (gen_rtx_SET (VOIDmode, e0,
27791 gen_rtx_MULT (mode, x0, b)));
27793 emit_insn (gen_rtx_SET (VOIDmode, e1,
27794 gen_rtx_MINUS (mode, two, e0)));
27796 emit_insn (gen_rtx_SET (VOIDmode, x1,
27797 gen_rtx_MULT (mode, x0, e1)));
27799 emit_insn (gen_rtx_SET (VOIDmode, res,
27800 gen_rtx_MULT (mode, a, x1)));
27803 /* Output code to perform a Newton-Rhapson approximation of a
27804 single precision floating point [reciprocal] square root. */
27806 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
27809 rtx x0, e0, e1, e2, e3, mthree, mhalf;
27812 x0 = gen_reg_rtx (mode);
27813 e0 = gen_reg_rtx (mode);
27814 e1 = gen_reg_rtx (mode);
27815 e2 = gen_reg_rtx (mode);
27816 e3 = gen_reg_rtx (mode);
27818 real_from_integer (&r, VOIDmode, -3, -1, 0);
27819 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
27821 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
27822 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
27824 if (VECTOR_MODE_P (mode))
27826 mthree = ix86_build_const_vector (SFmode, true, mthree);
27827 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
27830 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
27831 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
27833 /* x0 = rsqrt(a) estimate */
27834 emit_insn (gen_rtx_SET (VOIDmode, x0,
27835 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
27838 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
27843 zero = gen_reg_rtx (mode);
27844 mask = gen_reg_rtx (mode);
27846 zero = force_reg (mode, CONST0_RTX(mode));
27847 emit_insn (gen_rtx_SET (VOIDmode, mask,
27848 gen_rtx_NE (mode, zero, a)));
27850 emit_insn (gen_rtx_SET (VOIDmode, x0,
27851 gen_rtx_AND (mode, x0, mask)));
27855 emit_insn (gen_rtx_SET (VOIDmode, e0,
27856 gen_rtx_MULT (mode, x0, a)));
27858 emit_insn (gen_rtx_SET (VOIDmode, e1,
27859 gen_rtx_MULT (mode, e0, x0)));
27862 mthree = force_reg (mode, mthree);
27863 emit_insn (gen_rtx_SET (VOIDmode, e2,
27864 gen_rtx_PLUS (mode, e1, mthree)));
27866 mhalf = force_reg (mode, mhalf);
27868 /* e3 = -.5 * x0 */
27869 emit_insn (gen_rtx_SET (VOIDmode, e3,
27870 gen_rtx_MULT (mode, x0, mhalf)));
27872 /* e3 = -.5 * e0 */
27873 emit_insn (gen_rtx_SET (VOIDmode, e3,
27874 gen_rtx_MULT (mode, e0, mhalf)));
27875 /* ret = e2 * e3 */
27876 emit_insn (gen_rtx_SET (VOIDmode, res,
27877 gen_rtx_MULT (mode, e2, e3)));
27880 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
27882 static void ATTRIBUTE_UNUSED
27883 i386_solaris_elf_named_section (const char *name, unsigned int flags,
27886 /* With Binutils 2.15, the "@unwind" marker must be specified on
27887 every occurrence of the ".eh_frame" section, not just the first
27890 && strcmp (name, ".eh_frame") == 0)
27892 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
27893 flags & SECTION_WRITE ? "aw" : "a");
27896 default_elf_asm_named_section (name, flags, decl);
27899 /* Return the mangling of TYPE if it is an extended fundamental type. */
27901 static const char *
27902 ix86_mangle_type (const_tree type)
27904 type = TYPE_MAIN_VARIANT (type);
27906 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
27907 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
27910 switch (TYPE_MODE (type))
27913 /* __float128 is "g". */
27916 /* "long double" or __float80 is "e". */
27923 /* For 32-bit code we can save PIC register setup by using
27924 __stack_chk_fail_local hidden function instead of calling
27925 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
27926 register, so it is better to call __stack_chk_fail directly. */
27929 ix86_stack_protect_fail (void)
27931 return TARGET_64BIT
27932 ? default_external_stack_protect_fail ()
27933 : default_hidden_stack_protect_fail ();
27936 /* Select a format to encode pointers in exception handling data. CODE
27937 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
27938 true if the symbol may be affected by dynamic relocations.
27940 ??? All x86 object file formats are capable of representing this.
27941 After all, the relocation needed is the same as for the call insn.
27942 Whether or not a particular assembler allows us to enter such, I
27943 guess we'll have to see. */
27945 asm_preferred_eh_data_format (int code, int global)
27949 int type = DW_EH_PE_sdata8;
27951 || ix86_cmodel == CM_SMALL_PIC
27952 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
27953 type = DW_EH_PE_sdata4;
27954 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
27956 if (ix86_cmodel == CM_SMALL
27957 || (ix86_cmodel == CM_MEDIUM && code))
27958 return DW_EH_PE_udata4;
27959 return DW_EH_PE_absptr;
27962 /* Expand copysign from SIGN to the positive value ABS_VALUE
27963 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
27966 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
27968 enum machine_mode mode = GET_MODE (sign);
27969 rtx sgn = gen_reg_rtx (mode);
27970 if (mask == NULL_RTX)
27972 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
27973 if (!VECTOR_MODE_P (mode))
27975 /* We need to generate a scalar mode mask in this case. */
27976 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
27977 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
27978 mask = gen_reg_rtx (mode);
27979 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
27983 mask = gen_rtx_NOT (mode, mask);
27984 emit_insn (gen_rtx_SET (VOIDmode, sgn,
27985 gen_rtx_AND (mode, mask, sign)));
27986 emit_insn (gen_rtx_SET (VOIDmode, result,
27987 gen_rtx_IOR (mode, abs_value, sgn)));
27990 /* Expand fabs (OP0) and return a new rtx that holds the result. The
27991 mask for masking out the sign-bit is stored in *SMASK, if that is
27994 ix86_expand_sse_fabs (rtx op0, rtx *smask)
27996 enum machine_mode mode = GET_MODE (op0);
27999 xa = gen_reg_rtx (mode);
28000 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28001 if (!VECTOR_MODE_P (mode))
28003 /* We need to generate a scalar mode mask in this case. */
28004 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28005 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28006 mask = gen_reg_rtx (mode);
28007 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28009 emit_insn (gen_rtx_SET (VOIDmode, xa,
28010 gen_rtx_AND (mode, op0, mask)));
28018 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28019 swapping the operands if SWAP_OPERANDS is true. The expanded
28020 code is a forward jump to a newly created label in case the
28021 comparison is true. The generated label rtx is returned. */
28023 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28024 bool swap_operands)
28035 label = gen_label_rtx ();
28036 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28037 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28038 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28039 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28040 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28041 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28042 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28043 JUMP_LABEL (tmp) = label;
28048 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28049 using comparison code CODE. Operands are swapped for the comparison if
28050 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28052 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28053 bool swap_operands)
28055 enum machine_mode mode = GET_MODE (op0);
28056 rtx mask = gen_reg_rtx (mode);
28065 if (mode == DFmode)
28066 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28067 gen_rtx_fmt_ee (code, mode, op0, op1)));
28069 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28070 gen_rtx_fmt_ee (code, mode, op0, op1)));
28075 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28076 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28078 ix86_gen_TWO52 (enum machine_mode mode)
28080 REAL_VALUE_TYPE TWO52r;
28083 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28084 TWO52 = const_double_from_real_value (TWO52r, mode);
28085 TWO52 = force_reg (mode, TWO52);
28090 /* Expand SSE sequence for computing lround from OP1 storing
28093 ix86_expand_lround (rtx op0, rtx op1)
28095 /* C code for the stuff we're doing below:
28096 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28099 enum machine_mode mode = GET_MODE (op1);
28100 const struct real_format *fmt;
28101 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28104 /* load nextafter (0.5, 0.0) */
28105 fmt = REAL_MODE_FORMAT (mode);
28106 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28107 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28109 /* adj = copysign (0.5, op1) */
28110 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28111 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28113 /* adj = op1 + adj */
28114 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28116 /* op0 = (imode)adj */
28117 expand_fix (op0, adj, 0);
28120 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28123 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28125 /* C code for the stuff we're doing below (for do_floor):
28127 xi -= (double)xi > op1 ? 1 : 0;
28130 enum machine_mode fmode = GET_MODE (op1);
28131 enum machine_mode imode = GET_MODE (op0);
28132 rtx ireg, freg, label, tmp;
28134 /* reg = (long)op1 */
28135 ireg = gen_reg_rtx (imode);
28136 expand_fix (ireg, op1, 0);
28138 /* freg = (double)reg */
28139 freg = gen_reg_rtx (fmode);
28140 expand_float (freg, ireg, 0);
28142 /* ireg = (freg > op1) ? ireg - 1 : ireg */
28143 label = ix86_expand_sse_compare_and_jump (UNLE,
28144 freg, op1, !do_floor);
28145 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28146 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28147 emit_move_insn (ireg, tmp);
28149 emit_label (label);
28150 LABEL_NUSES (label) = 1;
28152 emit_move_insn (op0, ireg);
28155 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28156 result in OPERAND0. */
28158 ix86_expand_rint (rtx operand0, rtx operand1)
28160 /* C code for the stuff we're doing below:
28161 xa = fabs (operand1);
28162 if (!isless (xa, 2**52))
28164 xa = xa + 2**52 - 2**52;
28165 return copysign (xa, operand1);
28167 enum machine_mode mode = GET_MODE (operand0);
28168 rtx res, xa, label, TWO52, mask;
28170 res = gen_reg_rtx (mode);
28171 emit_move_insn (res, operand1);
28173 /* xa = abs (operand1) */
28174 xa = ix86_expand_sse_fabs (res, &mask);
28176 /* if (!isless (xa, TWO52)) goto label; */
28177 TWO52 = ix86_gen_TWO52 (mode);
28178 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28180 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28181 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28183 ix86_sse_copysign_to_positive (res, xa, res, mask);
28185 emit_label (label);
28186 LABEL_NUSES (label) = 1;
28188 emit_move_insn (operand0, res);
28191 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28194 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28196 /* C code for the stuff we expand below.
28197 double xa = fabs (x), x2;
28198 if (!isless (xa, TWO52))
28200 xa = xa + TWO52 - TWO52;
28201 x2 = copysign (xa, x);
28210 enum machine_mode mode = GET_MODE (operand0);
28211 rtx xa, TWO52, tmp, label, one, res, mask;
28213 TWO52 = ix86_gen_TWO52 (mode);
28215 /* Temporary for holding the result, initialized to the input
28216 operand to ease control flow. */
28217 res = gen_reg_rtx (mode);
28218 emit_move_insn (res, operand1);
28220 /* xa = abs (operand1) */
28221 xa = ix86_expand_sse_fabs (res, &mask);
28223 /* if (!isless (xa, TWO52)) goto label; */
28224 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28226 /* xa = xa + TWO52 - TWO52; */
28227 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28228 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28230 /* xa = copysign (xa, operand1) */
28231 ix86_sse_copysign_to_positive (xa, xa, res, mask);
28233 /* generate 1.0 or -1.0 */
28234 one = force_reg (mode,
28235 const_double_from_real_value (do_floor
28236 ? dconst1 : dconstm1, mode));
28238 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28239 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28240 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28241 gen_rtx_AND (mode, one, tmp)));
28242 /* We always need to subtract here to preserve signed zero. */
28243 tmp = expand_simple_binop (mode, MINUS,
28244 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28245 emit_move_insn (res, tmp);
28247 emit_label (label);
28248 LABEL_NUSES (label) = 1;
28250 emit_move_insn (operand0, res);
28253 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28256 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28258 /* C code for the stuff we expand below.
28259 double xa = fabs (x), x2;
28260 if (!isless (xa, TWO52))
28262 x2 = (double)(long)x;
28269 if (HONOR_SIGNED_ZEROS (mode))
28270 return copysign (x2, x);
28273 enum machine_mode mode = GET_MODE (operand0);
28274 rtx xa, xi, TWO52, tmp, label, one, res, mask;
28276 TWO52 = ix86_gen_TWO52 (mode);
28278 /* Temporary for holding the result, initialized to the input
28279 operand to ease control flow. */
28280 res = gen_reg_rtx (mode);
28281 emit_move_insn (res, operand1);
28283 /* xa = abs (operand1) */
28284 xa = ix86_expand_sse_fabs (res, &mask);
28286 /* if (!isless (xa, TWO52)) goto label; */
28287 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28289 /* xa = (double)(long)x */
28290 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28291 expand_fix (xi, res, 0);
28292 expand_float (xa, xi, 0);
28295 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28297 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28298 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28299 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28300 gen_rtx_AND (mode, one, tmp)));
28301 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28302 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28303 emit_move_insn (res, tmp);
28305 if (HONOR_SIGNED_ZEROS (mode))
28306 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28308 emit_label (label);
28309 LABEL_NUSES (label) = 1;
28311 emit_move_insn (operand0, res);
28314 /* Expand SSE sequence for computing round from OPERAND1 storing
28315 into OPERAND0. Sequence that works without relying on DImode truncation
28316 via cvttsd2siq that is only available on 64bit targets. */
28318 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
28320 /* C code for the stuff we expand below.
28321 double xa = fabs (x), xa2, x2;
28322 if (!isless (xa, TWO52))
28324 Using the absolute value and copying back sign makes
28325 -0.0 -> -0.0 correct.
28326 xa2 = xa + TWO52 - TWO52;
28331 else if (dxa > 0.5)
28333 x2 = copysign (xa2, x);
28336 enum machine_mode mode = GET_MODE (operand0);
28337 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
28339 TWO52 = ix86_gen_TWO52 (mode);
28341 /* Temporary for holding the result, initialized to the input
28342 operand to ease control flow. */
28343 res = gen_reg_rtx (mode);
28344 emit_move_insn (res, operand1);
28346 /* xa = abs (operand1) */
28347 xa = ix86_expand_sse_fabs (res, &mask);
28349 /* if (!isless (xa, TWO52)) goto label; */
28350 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28352 /* xa2 = xa + TWO52 - TWO52; */
28353 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28354 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
28356 /* dxa = xa2 - xa; */
28357 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
28359 /* generate 0.5, 1.0 and -0.5 */
28360 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
28361 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
28362 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
28366 tmp = gen_reg_rtx (mode);
28367 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
28368 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
28369 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28370 gen_rtx_AND (mode, one, tmp)));
28371 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28372 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
28373 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
28374 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28375 gen_rtx_AND (mode, one, tmp)));
28376 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28378 /* res = copysign (xa2, operand1) */
28379 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
28381 emit_label (label);
28382 LABEL_NUSES (label) = 1;
28384 emit_move_insn (operand0, res);
28387 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28390 ix86_expand_trunc (rtx operand0, rtx operand1)
28392 /* C code for SSE variant we expand below.
28393 double xa = fabs (x), x2;
28394 if (!isless (xa, TWO52))
28396 x2 = (double)(long)x;
28397 if (HONOR_SIGNED_ZEROS (mode))
28398 return copysign (x2, x);
28401 enum machine_mode mode = GET_MODE (operand0);
28402 rtx xa, xi, TWO52, label, res, mask;
28404 TWO52 = ix86_gen_TWO52 (mode);
28406 /* Temporary for holding the result, initialized to the input
28407 operand to ease control flow. */
28408 res = gen_reg_rtx (mode);
28409 emit_move_insn (res, operand1);
28411 /* xa = abs (operand1) */
28412 xa = ix86_expand_sse_fabs (res, &mask);
28414 /* if (!isless (xa, TWO52)) goto label; */
28415 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28417 /* x = (double)(long)x */
28418 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28419 expand_fix (xi, res, 0);
28420 expand_float (res, xi, 0);
28422 if (HONOR_SIGNED_ZEROS (mode))
28423 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28425 emit_label (label);
28426 LABEL_NUSES (label) = 1;
28428 emit_move_insn (operand0, res);
28431 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28434 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
28436 enum machine_mode mode = GET_MODE (operand0);
28437 rtx xa, mask, TWO52, label, one, res, smask, tmp;
28439 /* C code for SSE variant we expand below.
28440 double xa = fabs (x), x2;
28441 if (!isless (xa, TWO52))
28443 xa2 = xa + TWO52 - TWO52;
28447 x2 = copysign (xa2, x);
28451 TWO52 = ix86_gen_TWO52 (mode);
28453 /* Temporary for holding the result, initialized to the input
28454 operand to ease control flow. */
28455 res = gen_reg_rtx (mode);
28456 emit_move_insn (res, operand1);
28458 /* xa = abs (operand1) */
28459 xa = ix86_expand_sse_fabs (res, &smask);
28461 /* if (!isless (xa, TWO52)) goto label; */
28462 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28464 /* res = xa + TWO52 - TWO52; */
28465 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28466 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
28467 emit_move_insn (res, tmp);
28470 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28472 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
28473 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
28474 emit_insn (gen_rtx_SET (VOIDmode, mask,
28475 gen_rtx_AND (mode, mask, one)));
28476 tmp = expand_simple_binop (mode, MINUS,
28477 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
28478 emit_move_insn (res, tmp);
28480 /* res = copysign (res, operand1) */
28481 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
28483 emit_label (label);
28484 LABEL_NUSES (label) = 1;
28486 emit_move_insn (operand0, res);
28489 /* Expand SSE sequence for computing round from OPERAND1 storing
28492 ix86_expand_round (rtx operand0, rtx operand1)
28494 /* C code for the stuff we're doing below:
28495 double xa = fabs (x);
28496 if (!isless (xa, TWO52))
28498 xa = (double)(long)(xa + nextafter (0.5, 0.0));
28499 return copysign (xa, x);
28501 enum machine_mode mode = GET_MODE (operand0);
28502 rtx res, TWO52, xa, label, xi, half, mask;
28503 const struct real_format *fmt;
28504 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28506 /* Temporary for holding the result, initialized to the input
28507 operand to ease control flow. */
28508 res = gen_reg_rtx (mode);
28509 emit_move_insn (res, operand1);
28511 TWO52 = ix86_gen_TWO52 (mode);
28512 xa = ix86_expand_sse_fabs (res, &mask);
28513 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28515 /* load nextafter (0.5, 0.0) */
28516 fmt = REAL_MODE_FORMAT (mode);
28517 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28518 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28520 /* xa = xa + 0.5 */
28521 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
28522 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
28524 /* xa = (double)(int64_t)xa */
28525 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28526 expand_fix (xi, xa, 0);
28527 expand_float (xa, xi, 0);
28529 /* res = copysign (xa, operand1) */
28530 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
28532 emit_label (label);
28533 LABEL_NUSES (label) = 1;
28535 emit_move_insn (operand0, res);
28539 /* Validate whether a SSE5 instruction is valid or not.
28540 OPERANDS is the array of operands.
28541 NUM is the number of operands.
28542 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
28543 NUM_MEMORY is the maximum number of memory operands to accept.
28544 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
28547 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
28548 bool uses_oc0, int num_memory, bool commutative)
28554 /* Count the number of memory arguments */
28557 for (i = 0; i < num; i++)
28559 enum machine_mode mode = GET_MODE (operands[i]);
28560 if (register_operand (operands[i], mode))
28563 else if (memory_operand (operands[i], mode))
28565 mem_mask |= (1 << i);
28571 rtx pattern = PATTERN (insn);
28573 /* allow 0 for pcmov */
28574 if (GET_CODE (pattern) != SET
28575 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
28577 || operands[i] != CONST0_RTX (mode))
28582 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
28583 a memory operation. */
28584 if (num_memory < 0)
28586 num_memory = -num_memory;
28587 if ((mem_mask & (1 << (num-1))) != 0)
28589 mem_mask &= ~(1 << (num-1));
28594 /* If there were no memory operations, allow the insn */
28598 /* Do not allow the destination register to be a memory operand. */
28599 else if (mem_mask & (1 << 0))
28602 /* If there are too many memory operations, disallow the instruction. While
28603 the hardware only allows 1 memory reference, before register allocation
28604 for some insns, we allow two memory operations sometimes in order to allow
28605 code like the following to be optimized:
28607 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
28609 or similar cases that are vectorized into using the fmaddss
28611 else if (mem_count > num_memory)
28614 /* Don't allow more than one memory operation if not optimizing. */
28615 else if (mem_count > 1 && !optimize)
28618 else if (num == 4 && mem_count == 1)
28620 /* formats (destination is the first argument), example fmaddss:
28621 xmm1, xmm1, xmm2, xmm3/mem
28622 xmm1, xmm1, xmm2/mem, xmm3
28623 xmm1, xmm2, xmm3/mem, xmm1
28624 xmm1, xmm2/mem, xmm3, xmm1 */
28626 return ((mem_mask == (1 << 1))
28627 || (mem_mask == (1 << 2))
28628 || (mem_mask == (1 << 3)));
28630 /* format, example pmacsdd:
28631 xmm1, xmm2, xmm3/mem, xmm1 */
28633 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
28635 return (mem_mask == (1 << 2));
28638 else if (num == 4 && num_memory == 2)
28640 /* If there are two memory operations, we can load one of the memory ops
28641 into the destination register. This is for optimizing the
28642 multiply/add ops, which the combiner has optimized both the multiply
28643 and the add insns to have a memory operation. We have to be careful
28644 that the destination doesn't overlap with the inputs. */
28645 rtx op0 = operands[0];
28647 if (reg_mentioned_p (op0, operands[1])
28648 || reg_mentioned_p (op0, operands[2])
28649 || reg_mentioned_p (op0, operands[3]))
28652 /* formats (destination is the first argument), example fmaddss:
28653 xmm1, xmm1, xmm2, xmm3/mem
28654 xmm1, xmm1, xmm2/mem, xmm3
28655 xmm1, xmm2, xmm3/mem, xmm1
28656 xmm1, xmm2/mem, xmm3, xmm1
28658 For the oc0 case, we will load either operands[1] or operands[3] into
28659 operands[0], so any combination of 2 memory operands is ok. */
28663 /* format, example pmacsdd:
28664 xmm1, xmm2, xmm3/mem, xmm1
28666 For the integer multiply/add instructions be more restrictive and
28667 require operands[2] and operands[3] to be the memory operands. */
28669 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
28671 return (mem_mask == ((1 << 2) | (1 << 3)));
28674 else if (num == 3 && num_memory == 1)
28676 /* formats, example protb:
28677 xmm1, xmm2, xmm3/mem
28678 xmm1, xmm2/mem, xmm3 */
28680 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
28682 /* format, example comeq:
28683 xmm1, xmm2, xmm3/mem */
28685 return (mem_mask == (1 << 2));
28689 gcc_unreachable ();
28695 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
28696 hardware will allow by using the destination register to load one of the
28697 memory operations. Presently this is used by the multiply/add routines to
28698 allow 2 memory references. */
28701 ix86_expand_sse5_multiple_memory (rtx operands[],
28703 enum machine_mode mode)
28705 rtx op0 = operands[0];
28707 || memory_operand (op0, mode)
28708 || reg_mentioned_p (op0, operands[1])
28709 || reg_mentioned_p (op0, operands[2])
28710 || reg_mentioned_p (op0, operands[3]))
28711 gcc_unreachable ();
28713 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
28714 the destination register. */
28715 if (memory_operand (operands[1], mode))
28717 emit_move_insn (op0, operands[1]);
28720 else if (memory_operand (operands[3], mode))
28722 emit_move_insn (op0, operands[3]);
28726 gcc_unreachable ();
28732 /* Table of valid machine attributes. */
28733 static const struct attribute_spec ix86_attribute_table[] =
28735 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
28736 /* Stdcall attribute says callee is responsible for popping arguments
28737 if they are not variable. */
28738 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28739 /* Fastcall attribute says callee is responsible for popping arguments
28740 if they are not variable. */
28741 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28742 /* Cdecl attribute says the callee is a normal C declaration */
28743 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28744 /* Regparm attribute specifies how many integer arguments are to be
28745 passed in registers. */
28746 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
28747 /* Sseregparm attribute says we are using x86_64 calling conventions
28748 for FP arguments. */
28749 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28750 /* force_align_arg_pointer says this function realigns the stack at entry. */
28751 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
28752 false, true, true, ix86_handle_cconv_attribute },
28753 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28754 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
28755 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
28756 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
28758 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28759 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28760 #ifdef SUBTARGET_ATTRIBUTE_TABLE
28761 SUBTARGET_ATTRIBUTE_TABLE,
28763 /* ms_abi and sysv_abi calling convention function attributes. */
28764 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28765 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28767 { NULL, 0, 0, false, false, false, NULL }
28770 /* Implement targetm.vectorize.builtin_vectorization_cost. */
28772 x86_builtin_vectorization_cost (bool runtime_test)
28774 /* If the branch of the runtime test is taken - i.e. - the vectorized
28775 version is skipped - this incurs a misprediction cost (because the
28776 vectorized version is expected to be the fall-through). So we subtract
28777 the latency of a mispredicted branch from the costs that are incured
28778 when the vectorized version is executed.
28780 TODO: The values in individual target tables have to be tuned or new
28781 fields may be needed. For eg. on K8, the default branch path is the
28782 not-taken path. If the taken path is predicted correctly, the minimum
28783 penalty of going down the taken-path is 1 cycle. If the taken-path is
28784 not predicted correctly, then the minimum penalty is 10 cycles. */
28788 return (-(ix86_cost->cond_taken_branch_cost));
28794 /* This function returns the calling abi specific va_list type node.
28795 It returns the FNDECL specific va_list type. */
28798 ix86_fn_abi_va_list (tree fndecl)
28803 return va_list_type_node;
28804 gcc_assert (fndecl != NULL_TREE);
28805 abi = ix86_function_abi ((const_tree) fndecl);
28808 return ms_va_list_type_node;
28810 return sysv_va_list_type_node;
28813 /* Returns the canonical va_list type specified by TYPE. If there
28814 is no valid TYPE provided, it return NULL_TREE. */
28817 ix86_canonical_va_list_type (tree type)
28821 /* Resolve references and pointers to va_list type. */
28822 if (INDIRECT_REF_P (type))
28823 type = TREE_TYPE (type);
28824 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
28825 type = TREE_TYPE (type);
28829 wtype = va_list_type_node;
28830 gcc_assert (wtype != NULL_TREE);
28832 if (TREE_CODE (wtype) == ARRAY_TYPE)
28834 /* If va_list is an array type, the argument may have decayed
28835 to a pointer type, e.g. by being passed to another function.
28836 In that case, unwrap both types so that we can compare the
28837 underlying records. */
28838 if (TREE_CODE (htype) == ARRAY_TYPE
28839 || POINTER_TYPE_P (htype))
28841 wtype = TREE_TYPE (wtype);
28842 htype = TREE_TYPE (htype);
28845 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28846 return va_list_type_node;
28847 wtype = sysv_va_list_type_node;
28848 gcc_assert (wtype != NULL_TREE);
28850 if (TREE_CODE (wtype) == ARRAY_TYPE)
28852 /* If va_list is an array type, the argument may have decayed
28853 to a pointer type, e.g. by being passed to another function.
28854 In that case, unwrap both types so that we can compare the
28855 underlying records. */
28856 if (TREE_CODE (htype) == ARRAY_TYPE
28857 || POINTER_TYPE_P (htype))
28859 wtype = TREE_TYPE (wtype);
28860 htype = TREE_TYPE (htype);
28863 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28864 return sysv_va_list_type_node;
28865 wtype = ms_va_list_type_node;
28866 gcc_assert (wtype != NULL_TREE);
28868 if (TREE_CODE (wtype) == ARRAY_TYPE)
28870 /* If va_list is an array type, the argument may have decayed
28871 to a pointer type, e.g. by being passed to another function.
28872 In that case, unwrap both types so that we can compare the
28873 underlying records. */
28874 if (TREE_CODE (htype) == ARRAY_TYPE
28875 || POINTER_TYPE_P (htype))
28877 wtype = TREE_TYPE (wtype);
28878 htype = TREE_TYPE (htype);
28881 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
28882 return ms_va_list_type_node;
28885 return std_canonical_va_list_type (type);
28888 /* Iterate through the target-specific builtin types for va_list.
28889 IDX denotes the iterator, *PTREE is set to the result type of
28890 the va_list builtin, and *PNAME to its internal type.
28891 Returns zero if there is no element for this index, otherwise
28892 IDX should be increased upon the next call.
28893 Note, do not iterate a base builtin's name like __builtin_va_list.
28894 Used from c_common_nodes_and_builtins. */
28897 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
28903 *ptree = ms_va_list_type_node;
28904 *pname = "__builtin_ms_va_list";
28907 *ptree = sysv_va_list_type_node;
28908 *pname = "__builtin_sysv_va_list";
28916 /* Initialize the GCC target structure. */
28917 #undef TARGET_RETURN_IN_MEMORY
28918 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
28920 #undef TARGET_ATTRIBUTE_TABLE
28921 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
28922 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28923 # undef TARGET_MERGE_DECL_ATTRIBUTES
28924 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
28927 #undef TARGET_COMP_TYPE_ATTRIBUTES
28928 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
28930 #undef TARGET_INIT_BUILTINS
28931 #define TARGET_INIT_BUILTINS ix86_init_builtins
28932 #undef TARGET_EXPAND_BUILTIN
28933 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
28935 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
28936 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
28937 ix86_builtin_vectorized_function
28939 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
28940 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
28942 #undef TARGET_BUILTIN_RECIPROCAL
28943 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
28945 #undef TARGET_ASM_FUNCTION_EPILOGUE
28946 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
28948 #undef TARGET_ENCODE_SECTION_INFO
28949 #ifndef SUBTARGET_ENCODE_SECTION_INFO
28950 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
28952 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
28955 #undef TARGET_ASM_OPEN_PAREN
28956 #define TARGET_ASM_OPEN_PAREN ""
28957 #undef TARGET_ASM_CLOSE_PAREN
28958 #define TARGET_ASM_CLOSE_PAREN ""
28960 #undef TARGET_ASM_ALIGNED_HI_OP
28961 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
28962 #undef TARGET_ASM_ALIGNED_SI_OP
28963 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
28965 #undef TARGET_ASM_ALIGNED_DI_OP
28966 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
28969 #undef TARGET_ASM_UNALIGNED_HI_OP
28970 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
28971 #undef TARGET_ASM_UNALIGNED_SI_OP
28972 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
28973 #undef TARGET_ASM_UNALIGNED_DI_OP
28974 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
28976 #undef TARGET_SCHED_ADJUST_COST
28977 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
28978 #undef TARGET_SCHED_ISSUE_RATE
28979 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
28980 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
28981 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
28982 ia32_multipass_dfa_lookahead
28984 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
28985 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
28988 #undef TARGET_HAVE_TLS
28989 #define TARGET_HAVE_TLS true
28991 #undef TARGET_CANNOT_FORCE_CONST_MEM
28992 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
28993 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
28994 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
28996 #undef TARGET_DELEGITIMIZE_ADDRESS
28997 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
28999 #undef TARGET_MS_BITFIELD_LAYOUT_P
29000 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29003 #undef TARGET_BINDS_LOCAL_P
29004 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29006 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29007 #undef TARGET_BINDS_LOCAL_P
29008 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29011 #undef TARGET_ASM_OUTPUT_MI_THUNK
29012 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29013 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29014 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29016 #undef TARGET_ASM_FILE_START
29017 #define TARGET_ASM_FILE_START x86_file_start
29019 #undef TARGET_DEFAULT_TARGET_FLAGS
29020 #define TARGET_DEFAULT_TARGET_FLAGS \
29022 | TARGET_SUBTARGET_DEFAULT \
29023 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29025 #undef TARGET_HANDLE_OPTION
29026 #define TARGET_HANDLE_OPTION ix86_handle_option
29028 #undef TARGET_RTX_COSTS
29029 #define TARGET_RTX_COSTS ix86_rtx_costs
29030 #undef TARGET_ADDRESS_COST
29031 #define TARGET_ADDRESS_COST ix86_address_cost
29033 #undef TARGET_FIXED_CONDITION_CODE_REGS
29034 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29035 #undef TARGET_CC_MODES_COMPATIBLE
29036 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29038 #undef TARGET_MACHINE_DEPENDENT_REORG
29039 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29041 #undef TARGET_BUILD_BUILTIN_VA_LIST
29042 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29044 #undef TARGET_FN_ABI_VA_LIST
29045 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29047 #undef TARGET_CANONICAL_VA_LIST_TYPE
29048 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29050 #undef TARGET_EXPAND_BUILTIN_VA_START
29051 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29053 #undef TARGET_MD_ASM_CLOBBERS
29054 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29056 #undef TARGET_PROMOTE_PROTOTYPES
29057 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29058 #undef TARGET_STRUCT_VALUE_RTX
29059 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29060 #undef TARGET_SETUP_INCOMING_VARARGS
29061 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29062 #undef TARGET_MUST_PASS_IN_STACK
29063 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29064 #undef TARGET_PASS_BY_REFERENCE
29065 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29066 #undef TARGET_INTERNAL_ARG_POINTER
29067 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29068 #undef TARGET_UPDATE_STACK_BOUNDARY
29069 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29070 #undef TARGET_GET_DRAP_RTX
29071 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29072 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29073 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29074 #undef TARGET_STRICT_ARGUMENT_NAMING
29075 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29077 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29078 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29080 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29081 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29083 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29084 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29086 #undef TARGET_C_MODE_FOR_SUFFIX
29087 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29090 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29091 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29094 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29095 #undef TARGET_INSERT_ATTRIBUTES
29096 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
29099 #undef TARGET_MANGLE_TYPE
29100 #define TARGET_MANGLE_TYPE ix86_mangle_type
29102 #undef TARGET_STACK_PROTECT_FAIL
29103 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
29105 #undef TARGET_FUNCTION_VALUE
29106 #define TARGET_FUNCTION_VALUE ix86_function_value
29108 #undef TARGET_SECONDARY_RELOAD
29109 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
29111 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
29112 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
29114 #undef TARGET_SET_CURRENT_FUNCTION
29115 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
29117 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
29118 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
29120 #undef TARGET_OPTION_SAVE
29121 #define TARGET_OPTION_SAVE ix86_function_specific_save
29123 #undef TARGET_OPTION_RESTORE
29124 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
29126 #undef TARGET_OPTION_PRINT
29127 #define TARGET_OPTION_PRINT ix86_function_specific_print
29129 #undef TARGET_OPTION_CAN_INLINE_P
29130 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
29132 struct gcc_target targetm = TARGET_INITIALIZER;
29134 #include "gt-i386.h"