1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1761 HOST_WIDE_INT frame;
1763 int outgoing_arguments_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1973 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1975 /* Define a set of ISAs which aren't available when a given ISA is
1976 disabled. MMX and SSE ISAs are handled separately. */
1978 #define OPTION_MASK_ISA_MMX_UNSET \
1979 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1980 #define OPTION_MASK_ISA_3DNOW_UNSET \
1981 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1982 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1984 #define OPTION_MASK_ISA_SSE_UNSET \
1985 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1986 #define OPTION_MASK_ISA_SSE2_UNSET \
1987 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1988 #define OPTION_MASK_ISA_SSE3_UNSET \
1989 (OPTION_MASK_ISA_SSE3 \
1990 | OPTION_MASK_ISA_SSSE3_UNSET \
1991 | OPTION_MASK_ISA_SSE4A_UNSET )
1992 #define OPTION_MASK_ISA_SSSE3_UNSET \
1993 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1994 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1995 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1996 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1997 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1998 #define OPTION_MASK_ISA_AVX_UNSET \
1999 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
2000 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2002 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2004 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2006 #define OPTION_MASK_ISA_SSE4A_UNSET \
2007 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2008 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2009 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2010 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2011 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2012 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2013 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2014 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2015 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2016 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2018 /* Vectorization library interface and handlers. */
2019 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2020 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2021 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2023 /* Processor target table, indexed by processor number */
2026 const struct processor_costs *cost; /* Processor costs */
2027 const int align_loop; /* Default alignments. */
2028 const int align_loop_max_skip;
2029 const int align_jump;
2030 const int align_jump_max_skip;
2031 const int align_func;
2034 static const struct ptt processor_target_table[PROCESSOR_max] =
2036 {&i386_cost, 4, 3, 4, 3, 4},
2037 {&i486_cost, 16, 15, 16, 15, 16},
2038 {&pentium_cost, 16, 7, 16, 7, 16},
2039 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2040 {&geode_cost, 0, 0, 0, 0, 0},
2041 {&k6_cost, 32, 7, 32, 7, 32},
2042 {&athlon_cost, 16, 7, 16, 7, 16},
2043 {&pentium4_cost, 0, 0, 0, 0, 0},
2044 {&k8_cost, 16, 7, 16, 7, 16},
2045 {&nocona_cost, 0, 0, 0, 0, 0},
2046 {&core2_cost, 16, 10, 16, 10, 16},
2047 {&generic32_cost, 16, 7, 16, 7, 16},
2048 {&generic64_cost, 16, 10, 16, 10, 16},
2049 {&amdfam10_cost, 32, 24, 32, 7, 32},
2050 {&atom_cost, 16, 7, 16, 7, 16}
2053 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2079 /* Implement TARGET_HANDLE_OPTION. */
2082 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2089 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2090 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2094 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2095 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2102 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2103 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2107 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2108 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2118 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2123 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2124 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2131 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2132 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2136 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2137 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2144 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2145 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2149 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2150 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2157 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2158 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2162 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2163 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2170 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2171 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2175 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2176 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2183 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2184 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2188 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2189 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2196 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2197 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2201 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2202 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2209 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2210 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2214 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2215 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2220 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2232 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2237 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2238 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2245 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2246 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2250 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2251 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2258 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2259 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2263 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2264 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2271 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2272 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2276 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2277 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2284 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2285 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2289 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2290 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2297 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2298 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2302 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2303 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2310 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2311 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2315 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2316 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2323 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2324 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2328 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2329 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2336 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2337 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2341 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2342 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2349 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2350 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2354 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2355 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2364 /* Return a string the documents the current -m options. The caller is
2365 responsible for freeing the string. */
2368 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2369 const char *fpmath, bool add_nl_p)
2371 struct ix86_target_opts
2373 const char *option; /* option string */
2374 int mask; /* isa mask options */
2377 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2378 preceding options while match those first. */
2379 static struct ix86_target_opts isa_opts[] =
2381 { "-m64", OPTION_MASK_ISA_64BIT },
2382 { "-msse5", OPTION_MASK_ISA_SSE5 },
2383 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2384 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2385 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2386 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2387 { "-msse3", OPTION_MASK_ISA_SSE3 },
2388 { "-msse2", OPTION_MASK_ISA_SSE2 },
2389 { "-msse", OPTION_MASK_ISA_SSE },
2390 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2391 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2392 { "-mmmx", OPTION_MASK_ISA_MMX },
2393 { "-mabm", OPTION_MASK_ISA_ABM },
2394 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2395 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2396 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2397 { "-maes", OPTION_MASK_ISA_AES },
2398 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2402 static struct ix86_target_opts flag_opts[] =
2404 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2405 { "-m80387", MASK_80387 },
2406 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2407 { "-malign-double", MASK_ALIGN_DOUBLE },
2408 { "-mcld", MASK_CLD },
2409 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2410 { "-mieee-fp", MASK_IEEE_FP },
2411 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2412 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2413 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2414 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2415 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2416 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2417 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2418 { "-mno-red-zone", MASK_NO_RED_ZONE },
2419 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2420 { "-mrecip", MASK_RECIP },
2421 { "-mrtd", MASK_RTD },
2422 { "-msseregparm", MASK_SSEREGPARM },
2423 { "-mstack-arg-probe", MASK_STACK_PROBE },
2424 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2427 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2430 char target_other[40];
2439 memset (opts, '\0', sizeof (opts));
2441 /* Add -march= option. */
2444 opts[num][0] = "-march=";
2445 opts[num++][1] = arch;
2448 /* Add -mtune= option. */
2451 opts[num][0] = "-mtune=";
2452 opts[num++][1] = tune;
2455 /* Pick out the options in isa options. */
2456 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2458 if ((isa & isa_opts[i].mask) != 0)
2460 opts[num++][0] = isa_opts[i].option;
2461 isa &= ~ isa_opts[i].mask;
2465 if (isa && add_nl_p)
2467 opts[num++][0] = isa_other;
2468 sprintf (isa_other, "(other isa: 0x%x)", isa);
2471 /* Add flag options. */
2472 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2474 if ((flags & flag_opts[i].mask) != 0)
2476 opts[num++][0] = flag_opts[i].option;
2477 flags &= ~ flag_opts[i].mask;
2481 if (flags && add_nl_p)
2483 opts[num++][0] = target_other;
2484 sprintf (target_other, "(other flags: 0x%x)", isa);
2487 /* Add -fpmath= option. */
2490 opts[num][0] = "-mfpmath=";
2491 opts[num++][1] = fpmath;
2498 gcc_assert (num < ARRAY_SIZE (opts));
2500 /* Size the string. */
2502 sep_len = (add_nl_p) ? 3 : 1;
2503 for (i = 0; i < num; i++)
2506 for (j = 0; j < 2; j++)
2508 len += strlen (opts[i][j]);
2511 /* Build the string. */
2512 ret = ptr = (char *) xmalloc (len);
2515 for (i = 0; i < num; i++)
2519 for (j = 0; j < 2; j++)
2520 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2527 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2535 for (j = 0; j < 2; j++)
2538 memcpy (ptr, opts[i][j], len2[j]);
2540 line_len += len2[j];
2545 gcc_assert (ret + len >= ptr);
2550 /* Function that is callable from the debugger to print the current
2553 ix86_debug_options (void)
2555 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2556 ix86_arch_string, ix86_tune_string,
2557 ix86_fpmath_string, true);
2561 fprintf (stderr, "%s\n\n", opts);
2565 fprintf (stderr, "<no options>\n\n");
2570 /* Sometimes certain combinations of command options do not make
2571 sense on a particular target machine. You can define a macro
2572 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2573 defined, is executed once just after all the command options have
2576 Don't use this macro to turn on various extra optimizations for
2577 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2580 override_options (bool main_args_p)
2583 unsigned int ix86_arch_mask, ix86_tune_mask;
2588 /* Comes from final.c -- no real reason to change it. */
2589 #define MAX_CODE_ALIGN 16
2597 PTA_PREFETCH_SSE = 1 << 4,
2599 PTA_3DNOW_A = 1 << 6,
2603 PTA_POPCNT = 1 << 10,
2605 PTA_SSE4A = 1 << 12,
2606 PTA_NO_SAHF = 1 << 13,
2607 PTA_SSE4_1 = 1 << 14,
2608 PTA_SSE4_2 = 1 << 15,
2611 PTA_PCLMUL = 1 << 18,
2619 const char *const name; /* processor name or nickname. */
2620 const enum processor_type processor;
2621 const enum attr_cpu schedule;
2622 const unsigned /*enum pta_flags*/ flags;
2624 const processor_alias_table[] =
2626 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2627 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2628 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2629 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2630 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2631 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2632 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2633 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2634 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2635 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2636 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2637 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2638 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2640 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2642 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2643 PTA_MMX | PTA_SSE | PTA_SSE2},
2644 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2645 PTA_MMX |PTA_SSE | PTA_SSE2},
2646 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2647 PTA_MMX | PTA_SSE | PTA_SSE2},
2648 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2649 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2650 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2651 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2652 | PTA_CX16 | PTA_NO_SAHF},
2653 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2654 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2655 | PTA_SSSE3 | PTA_CX16},
2656 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2657 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2658 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2659 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2660 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2661 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2662 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2663 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2664 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2665 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2666 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2667 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2668 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2669 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2670 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2671 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2672 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2673 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2674 {"x86-64", PROCESSOR_K8, CPU_K8,
2675 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2676 {"k8", PROCESSOR_K8, CPU_K8,
2677 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2678 | PTA_SSE2 | PTA_NO_SAHF},
2679 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2680 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2681 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2682 {"opteron", PROCESSOR_K8, CPU_K8,
2683 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2684 | PTA_SSE2 | PTA_NO_SAHF},
2685 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2686 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2687 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2688 {"athlon64", PROCESSOR_K8, CPU_K8,
2689 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2690 | PTA_SSE2 | PTA_NO_SAHF},
2691 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2692 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2693 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2694 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2695 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2696 | PTA_SSE2 | PTA_NO_SAHF},
2697 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2698 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2699 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2700 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2701 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2702 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2703 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2704 0 /* flags are only used for -march switch. */ },
2705 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2706 PTA_64BIT /* flags are only used for -march switch. */ },
2709 int const pta_size = ARRAY_SIZE (processor_alias_table);
2711 /* Set up prefix/suffix so the error messages refer to either the command
2712 line argument, or the attribute(target). */
2721 prefix = "option(\"";
2726 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2727 SUBTARGET_OVERRIDE_OPTIONS;
2730 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2731 SUBSUBTARGET_OVERRIDE_OPTIONS;
2734 /* -fPIC is the default for x86_64. */
2735 if (TARGET_MACHO && TARGET_64BIT)
2738 /* Set the default values for switches whose default depends on TARGET_64BIT
2739 in case they weren't overwritten by command line options. */
2742 /* Mach-O doesn't support omitting the frame pointer for now. */
2743 if (flag_omit_frame_pointer == 2)
2744 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2745 if (flag_asynchronous_unwind_tables == 2)
2746 flag_asynchronous_unwind_tables = 1;
2747 if (flag_pcc_struct_return == 2)
2748 flag_pcc_struct_return = 0;
2752 if (flag_omit_frame_pointer == 2)
2753 flag_omit_frame_pointer = 0;
2754 if (flag_asynchronous_unwind_tables == 2)
2755 flag_asynchronous_unwind_tables = 0;
2756 if (flag_pcc_struct_return == 2)
2757 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2760 /* Need to check -mtune=generic first. */
2761 if (ix86_tune_string)
2763 if (!strcmp (ix86_tune_string, "generic")
2764 || !strcmp (ix86_tune_string, "i686")
2765 /* As special support for cross compilers we read -mtune=native
2766 as -mtune=generic. With native compilers we won't see the
2767 -mtune=native, as it was changed by the driver. */
2768 || !strcmp (ix86_tune_string, "native"))
2771 ix86_tune_string = "generic64";
2773 ix86_tune_string = "generic32";
2775 /* If this call is for setting the option attribute, allow the
2776 generic32/generic64 that was previously set. */
2777 else if (!main_args_p
2778 && (!strcmp (ix86_tune_string, "generic32")
2779 || !strcmp (ix86_tune_string, "generic64")))
2781 else if (!strncmp (ix86_tune_string, "generic", 7))
2782 error ("bad value (%s) for %stune=%s %s",
2783 ix86_tune_string, prefix, suffix, sw);
2787 if (ix86_arch_string)
2788 ix86_tune_string = ix86_arch_string;
2789 if (!ix86_tune_string)
2791 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2792 ix86_tune_defaulted = 1;
2795 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2796 need to use a sensible tune option. */
2797 if (!strcmp (ix86_tune_string, "generic")
2798 || !strcmp (ix86_tune_string, "x86-64")
2799 || !strcmp (ix86_tune_string, "i686"))
2802 ix86_tune_string = "generic64";
2804 ix86_tune_string = "generic32";
2807 if (ix86_stringop_string)
2809 if (!strcmp (ix86_stringop_string, "rep_byte"))
2810 stringop_alg = rep_prefix_1_byte;
2811 else if (!strcmp (ix86_stringop_string, "libcall"))
2812 stringop_alg = libcall;
2813 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2814 stringop_alg = rep_prefix_4_byte;
2815 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2817 /* rep; movq isn't available in 32-bit code. */
2818 stringop_alg = rep_prefix_8_byte;
2819 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2820 stringop_alg = loop_1_byte;
2821 else if (!strcmp (ix86_stringop_string, "loop"))
2822 stringop_alg = loop;
2823 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2824 stringop_alg = unrolled_loop;
2826 error ("bad value (%s) for %sstringop-strategy=%s %s",
2827 ix86_stringop_string, prefix, suffix, sw);
2829 if (!strcmp (ix86_tune_string, "x86-64"))
2830 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2831 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2832 prefix, suffix, prefix, suffix, prefix, suffix);
2834 if (!ix86_arch_string)
2835 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2837 ix86_arch_specified = 1;
2839 if (!strcmp (ix86_arch_string, "generic"))
2840 error ("generic CPU can be used only for %stune=%s %s",
2841 prefix, suffix, sw);
2842 if (!strncmp (ix86_arch_string, "generic", 7))
2843 error ("bad value (%s) for %sarch=%s %s",
2844 ix86_arch_string, prefix, suffix, sw);
2846 /* Validate -mabi= value. */
2847 if (ix86_abi_string)
2849 if (strcmp (ix86_abi_string, "sysv") == 0)
2850 ix86_abi = SYSV_ABI;
2851 else if (strcmp (ix86_abi_string, "ms") == 0)
2854 error ("unknown ABI (%s) for %sabi=%s %s",
2855 ix86_abi_string, prefix, suffix, sw);
2858 ix86_abi = DEFAULT_ABI;
2860 if (ix86_cmodel_string != 0)
2862 if (!strcmp (ix86_cmodel_string, "small"))
2863 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2864 else if (!strcmp (ix86_cmodel_string, "medium"))
2865 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2866 else if (!strcmp (ix86_cmodel_string, "large"))
2867 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2869 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2870 else if (!strcmp (ix86_cmodel_string, "32"))
2871 ix86_cmodel = CM_32;
2872 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2873 ix86_cmodel = CM_KERNEL;
2875 error ("bad value (%s) for %scmodel=%s %s",
2876 ix86_cmodel_string, prefix, suffix, sw);
2880 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2881 use of rip-relative addressing. This eliminates fixups that
2882 would otherwise be needed if this object is to be placed in a
2883 DLL, and is essentially just as efficient as direct addressing. */
2884 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2885 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2886 else if (TARGET_64BIT)
2887 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2889 ix86_cmodel = CM_32;
2891 if (ix86_asm_string != 0)
2894 && !strcmp (ix86_asm_string, "intel"))
2895 ix86_asm_dialect = ASM_INTEL;
2896 else if (!strcmp (ix86_asm_string, "att"))
2897 ix86_asm_dialect = ASM_ATT;
2899 error ("bad value (%s) for %sasm=%s %s",
2900 ix86_asm_string, prefix, suffix, sw);
2902 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2903 error ("code model %qs not supported in the %s bit mode",
2904 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2905 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2906 sorry ("%i-bit mode not compiled in",
2907 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2909 for (i = 0; i < pta_size; i++)
2910 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2912 ix86_schedule = processor_alias_table[i].schedule;
2913 ix86_arch = processor_alias_table[i].processor;
2914 /* Default cpu tuning to the architecture. */
2915 ix86_tune = ix86_arch;
2917 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2918 error ("CPU you selected does not support x86-64 "
2921 if (processor_alias_table[i].flags & PTA_MMX
2922 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2923 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2924 if (processor_alias_table[i].flags & PTA_3DNOW
2925 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2926 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2927 if (processor_alias_table[i].flags & PTA_3DNOW_A
2928 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2929 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2930 if (processor_alias_table[i].flags & PTA_SSE
2931 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2932 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2933 if (processor_alias_table[i].flags & PTA_SSE2
2934 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2935 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2936 if (processor_alias_table[i].flags & PTA_SSE3
2937 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2938 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2939 if (processor_alias_table[i].flags & PTA_SSSE3
2940 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2941 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2942 if (processor_alias_table[i].flags & PTA_SSE4_1
2943 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2944 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2945 if (processor_alias_table[i].flags & PTA_SSE4_2
2946 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2947 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2948 if (processor_alias_table[i].flags & PTA_AVX
2949 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2950 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2951 if (processor_alias_table[i].flags & PTA_FMA
2952 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2953 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2954 if (processor_alias_table[i].flags & PTA_SSE4A
2955 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2956 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2957 if (processor_alias_table[i].flags & PTA_SSE5
2958 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2959 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2960 if (processor_alias_table[i].flags & PTA_ABM
2961 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2962 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2963 if (processor_alias_table[i].flags & PTA_CX16
2964 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2965 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2966 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2967 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2968 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2969 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2970 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2971 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2972 if (processor_alias_table[i].flags & PTA_MOVBE
2973 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
2974 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
2975 if (processor_alias_table[i].flags & PTA_AES
2976 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2977 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2978 if (processor_alias_table[i].flags & PTA_PCLMUL
2979 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2980 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2981 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2982 x86_prefetch_sse = true;
2988 error ("bad value (%s) for %sarch=%s %s",
2989 ix86_arch_string, prefix, suffix, sw);
2991 ix86_arch_mask = 1u << ix86_arch;
2992 for (i = 0; i < X86_ARCH_LAST; ++i)
2993 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2995 for (i = 0; i < pta_size; i++)
2996 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2998 ix86_schedule = processor_alias_table[i].schedule;
2999 ix86_tune = processor_alias_table[i].processor;
3000 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3002 if (ix86_tune_defaulted)
3004 ix86_tune_string = "x86-64";
3005 for (i = 0; i < pta_size; i++)
3006 if (! strcmp (ix86_tune_string,
3007 processor_alias_table[i].name))
3009 ix86_schedule = processor_alias_table[i].schedule;
3010 ix86_tune = processor_alias_table[i].processor;
3013 error ("CPU you selected does not support x86-64 "
3016 /* Intel CPUs have always interpreted SSE prefetch instructions as
3017 NOPs; so, we can enable SSE prefetch instructions even when
3018 -mtune (rather than -march) points us to a processor that has them.
3019 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3020 higher processors. */
3022 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3023 x86_prefetch_sse = true;
3027 error ("bad value (%s) for %stune=%s %s",
3028 ix86_tune_string, prefix, suffix, sw);
3030 ix86_tune_mask = 1u << ix86_tune;
3031 for (i = 0; i < X86_TUNE_LAST; ++i)
3032 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3035 ix86_cost = &ix86_size_cost;
3037 ix86_cost = processor_target_table[ix86_tune].cost;
3039 /* Arrange to set up i386_stack_locals for all functions. */
3040 init_machine_status = ix86_init_machine_status;
3042 /* Validate -mregparm= value. */
3043 if (ix86_regparm_string)
3046 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3047 i = atoi (ix86_regparm_string);
3048 if (i < 0 || i > REGPARM_MAX)
3049 error ("%sregparm=%d%s is not between 0 and %d",
3050 prefix, i, suffix, REGPARM_MAX);
3055 ix86_regparm = REGPARM_MAX;
3057 /* If the user has provided any of the -malign-* options,
3058 warn and use that value only if -falign-* is not set.
3059 Remove this code in GCC 3.2 or later. */
3060 if (ix86_align_loops_string)
3062 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3063 prefix, suffix, suffix);
3064 if (align_loops == 0)
3066 i = atoi (ix86_align_loops_string);
3067 if (i < 0 || i > MAX_CODE_ALIGN)
3068 error ("%salign-loops=%d%s is not between 0 and %d",
3069 prefix, i, suffix, MAX_CODE_ALIGN);
3071 align_loops = 1 << i;
3075 if (ix86_align_jumps_string)
3077 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3078 prefix, suffix, suffix);
3079 if (align_jumps == 0)
3081 i = atoi (ix86_align_jumps_string);
3082 if (i < 0 || i > MAX_CODE_ALIGN)
3083 error ("%salign-loops=%d%s is not between 0 and %d",
3084 prefix, i, suffix, MAX_CODE_ALIGN);
3086 align_jumps = 1 << i;
3090 if (ix86_align_funcs_string)
3092 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3093 prefix, suffix, suffix);
3094 if (align_functions == 0)
3096 i = atoi (ix86_align_funcs_string);
3097 if (i < 0 || i > MAX_CODE_ALIGN)
3098 error ("%salign-loops=%d%s is not between 0 and %d",
3099 prefix, i, suffix, MAX_CODE_ALIGN);
3101 align_functions = 1 << i;
3105 /* Default align_* from the processor table. */
3106 if (align_loops == 0)
3108 align_loops = processor_target_table[ix86_tune].align_loop;
3109 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3111 if (align_jumps == 0)
3113 align_jumps = processor_target_table[ix86_tune].align_jump;
3114 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3116 if (align_functions == 0)
3118 align_functions = processor_target_table[ix86_tune].align_func;
3121 /* Validate -mbranch-cost= value, or provide default. */
3122 ix86_branch_cost = ix86_cost->branch_cost;
3123 if (ix86_branch_cost_string)
3125 i = atoi (ix86_branch_cost_string);
3127 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3129 ix86_branch_cost = i;
3131 if (ix86_section_threshold_string)
3133 i = atoi (ix86_section_threshold_string);
3135 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3137 ix86_section_threshold = i;
3140 if (ix86_tls_dialect_string)
3142 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3143 ix86_tls_dialect = TLS_DIALECT_GNU;
3144 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3145 ix86_tls_dialect = TLS_DIALECT_GNU2;
3146 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3147 ix86_tls_dialect = TLS_DIALECT_SUN;
3149 error ("bad value (%s) for %stls-dialect=%s %s",
3150 ix86_tls_dialect_string, prefix, suffix, sw);
3153 if (ix87_precision_string)
3155 i = atoi (ix87_precision_string);
3156 if (i != 32 && i != 64 && i != 80)
3157 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3162 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3164 /* Enable by default the SSE and MMX builtins. Do allow the user to
3165 explicitly disable any of these. In particular, disabling SSE and
3166 MMX for kernel code is extremely useful. */
3167 if (!ix86_arch_specified)
3169 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3170 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3173 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3177 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3179 if (!ix86_arch_specified)
3181 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3183 /* i386 ABI does not specify red zone. It still makes sense to use it
3184 when programmer takes care to stack from being destroyed. */
3185 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3186 target_flags |= MASK_NO_RED_ZONE;
3189 /* Keep nonleaf frame pointers. */
3190 if (flag_omit_frame_pointer)
3191 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3192 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3193 flag_omit_frame_pointer = 1;
3195 /* If we're doing fast math, we don't care about comparison order
3196 wrt NaNs. This lets us use a shorter comparison sequence. */
3197 if (flag_finite_math_only)
3198 target_flags &= ~MASK_IEEE_FP;
3200 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3201 since the insns won't need emulation. */
3202 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3203 target_flags &= ~MASK_NO_FANCY_MATH_387;
3205 /* Likewise, if the target doesn't have a 387, or we've specified
3206 software floating point, don't use 387 inline intrinsics. */
3208 target_flags |= MASK_NO_FANCY_MATH_387;
3210 /* Turn on MMX builtins for -msse. */
3213 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3214 x86_prefetch_sse = true;
3217 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3218 if (TARGET_SSE4_2 || TARGET_ABM)
3219 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3221 /* Validate -mpreferred-stack-boundary= value or default it to
3222 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3223 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3224 if (ix86_preferred_stack_boundary_string)
3226 i = atoi (ix86_preferred_stack_boundary_string);
3227 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3228 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3229 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3231 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3234 /* Set the default value for -mstackrealign. */
3235 if (ix86_force_align_arg_pointer == -1)
3236 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3238 /* Validate -mincoming-stack-boundary= value or default it to
3239 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3240 if (ix86_force_align_arg_pointer)
3241 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3243 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3244 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3245 if (ix86_incoming_stack_boundary_string)
3247 i = atoi (ix86_incoming_stack_boundary_string);
3248 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3249 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3250 i, TARGET_64BIT ? 4 : 2);
3253 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3254 ix86_incoming_stack_boundary
3255 = ix86_user_incoming_stack_boundary;
3259 /* Accept -msseregparm only if at least SSE support is enabled. */
3260 if (TARGET_SSEREGPARM
3262 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3264 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3265 if (ix86_fpmath_string != 0)
3267 if (! strcmp (ix86_fpmath_string, "387"))
3268 ix86_fpmath = FPMATH_387;
3269 else if (! strcmp (ix86_fpmath_string, "sse"))
3273 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3274 ix86_fpmath = FPMATH_387;
3277 ix86_fpmath = FPMATH_SSE;
3279 else if (! strcmp (ix86_fpmath_string, "387,sse")
3280 || ! strcmp (ix86_fpmath_string, "387+sse")
3281 || ! strcmp (ix86_fpmath_string, "sse,387")
3282 || ! strcmp (ix86_fpmath_string, "sse+387")
3283 || ! strcmp (ix86_fpmath_string, "both"))
3287 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3288 ix86_fpmath = FPMATH_387;
3290 else if (!TARGET_80387)
3292 warning (0, "387 instruction set disabled, using SSE arithmetics");
3293 ix86_fpmath = FPMATH_SSE;
3296 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3299 error ("bad value (%s) for %sfpmath=%s %s",
3300 ix86_fpmath_string, prefix, suffix, sw);
3303 /* If the i387 is disabled, then do not return values in it. */
3305 target_flags &= ~MASK_FLOAT_RETURNS;
3307 /* Use external vectorized library in vectorizing intrinsics. */
3308 if (ix86_veclibabi_string)
3310 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3311 ix86_veclib_handler = ix86_veclibabi_svml;
3312 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3313 ix86_veclib_handler = ix86_veclibabi_acml;
3315 error ("unknown vectorization library ABI type (%s) for "
3316 "%sveclibabi=%s %s", ix86_veclibabi_string,
3317 prefix, suffix, sw);
3320 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3321 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3323 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3325 /* ??? Unwind info is not correct around the CFG unless either a frame
3326 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3327 unwind info generation to be aware of the CFG and propagating states
3329 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3330 || flag_exceptions || flag_non_call_exceptions)
3331 && flag_omit_frame_pointer
3332 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3334 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3335 warning (0, "unwind tables currently require either a frame pointer "
3336 "or %saccumulate-outgoing-args%s for correctness",
3338 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3341 /* If stack probes are required, the space used for large function
3342 arguments on the stack must also be probed, so enable
3343 -maccumulate-outgoing-args so this happens in the prologue. */
3344 if (TARGET_STACK_PROBE
3345 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3347 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3348 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3349 "for correctness", prefix, suffix);
3350 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3353 /* For sane SSE instruction set generation we need fcomi instruction.
3354 It is safe to enable all CMOVE instructions. */
3358 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3361 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3362 p = strchr (internal_label_prefix, 'X');
3363 internal_label_prefix_len = p - internal_label_prefix;
3367 /* When scheduling description is not available, disable scheduler pass
3368 so it won't slow down the compilation and make x87 code slower. */
3369 if (!TARGET_SCHEDULE)
3370 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3372 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3373 set_param_value ("simultaneous-prefetches",
3374 ix86_cost->simultaneous_prefetches);
3375 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3376 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3377 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3378 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3379 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3380 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3382 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3383 can be optimized to ap = __builtin_next_arg (0). */
3385 targetm.expand_builtin_va_start = NULL;
3389 ix86_gen_leave = gen_leave_rex64;
3390 ix86_gen_pop1 = gen_popdi1;
3391 ix86_gen_add3 = gen_adddi3;
3392 ix86_gen_sub3 = gen_subdi3;
3393 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3394 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3395 ix86_gen_monitor = gen_sse3_monitor64;
3396 ix86_gen_andsp = gen_anddi3;
3400 ix86_gen_leave = gen_leave;
3401 ix86_gen_pop1 = gen_popsi1;
3402 ix86_gen_add3 = gen_addsi3;
3403 ix86_gen_sub3 = gen_subsi3;
3404 ix86_gen_sub3_carry = gen_subsi3_carry;
3405 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3406 ix86_gen_monitor = gen_sse3_monitor;
3407 ix86_gen_andsp = gen_andsi3;
3411 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3413 target_flags |= MASK_CLD & ~target_flags_explicit;
3416 /* Save the initial options in case the user does function specific options */
3418 target_option_default_node = target_option_current_node
3419 = build_target_option_node ();
3422 /* Save the current options */
3425 ix86_function_specific_save (struct cl_target_option *ptr)
3427 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3428 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3429 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3430 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3431 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3433 ptr->arch = ix86_arch;
3434 ptr->schedule = ix86_schedule;
3435 ptr->tune = ix86_tune;
3436 ptr->fpmath = ix86_fpmath;
3437 ptr->branch_cost = ix86_branch_cost;
3438 ptr->tune_defaulted = ix86_tune_defaulted;
3439 ptr->arch_specified = ix86_arch_specified;
3440 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3441 ptr->target_flags_explicit = target_flags_explicit;
3444 /* Restore the current options */
3447 ix86_function_specific_restore (struct cl_target_option *ptr)
3449 enum processor_type old_tune = ix86_tune;
3450 enum processor_type old_arch = ix86_arch;
3451 unsigned int ix86_arch_mask, ix86_tune_mask;
3454 ix86_arch = (enum processor_type) ptr->arch;
3455 ix86_schedule = (enum attr_cpu) ptr->schedule;
3456 ix86_tune = (enum processor_type) ptr->tune;
3457 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3458 ix86_branch_cost = ptr->branch_cost;
3459 ix86_tune_defaulted = ptr->tune_defaulted;
3460 ix86_arch_specified = ptr->arch_specified;
3461 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3462 target_flags_explicit = ptr->target_flags_explicit;
3464 /* Recreate the arch feature tests if the arch changed */
3465 if (old_arch != ix86_arch)
3467 ix86_arch_mask = 1u << ix86_arch;
3468 for (i = 0; i < X86_ARCH_LAST; ++i)
3469 ix86_arch_features[i]
3470 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3473 /* Recreate the tune optimization tests */
3474 if (old_tune != ix86_tune)
3476 ix86_tune_mask = 1u << ix86_tune;
3477 for (i = 0; i < X86_TUNE_LAST; ++i)
3478 ix86_tune_features[i]
3479 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3483 /* Print the current options */
3486 ix86_function_specific_print (FILE *file, int indent,
3487 struct cl_target_option *ptr)
3490 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3491 NULL, NULL, NULL, false);
3493 fprintf (file, "%*sarch = %d (%s)\n",
3496 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3497 ? cpu_names[ptr->arch]
3500 fprintf (file, "%*stune = %d (%s)\n",
3503 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3504 ? cpu_names[ptr->tune]
3507 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3508 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3509 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3510 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3514 fprintf (file, "%*s%s\n", indent, "", target_string);
3515 free (target_string);
3520 /* Inner function to process the attribute((target(...))), take an argument and
3521 set the current options from the argument. If we have a list, recursively go
3525 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3530 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3531 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3532 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3533 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3548 enum ix86_opt_type type;
3553 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3554 IX86_ATTR_ISA ("abm", OPT_mabm),
3555 IX86_ATTR_ISA ("aes", OPT_maes),
3556 IX86_ATTR_ISA ("avx", OPT_mavx),
3557 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3558 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3559 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3560 IX86_ATTR_ISA ("sse", OPT_msse),
3561 IX86_ATTR_ISA ("sse2", OPT_msse2),
3562 IX86_ATTR_ISA ("sse3", OPT_msse3),
3563 IX86_ATTR_ISA ("sse4", OPT_msse4),
3564 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3565 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3566 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3567 IX86_ATTR_ISA ("sse5", OPT_msse5),
3568 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3570 /* string options */
3571 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3572 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3573 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3576 IX86_ATTR_YES ("cld",
3580 IX86_ATTR_NO ("fancy-math-387",
3581 OPT_mfancy_math_387,
3582 MASK_NO_FANCY_MATH_387),
3584 IX86_ATTR_NO ("fused-madd",
3586 MASK_NO_FUSED_MADD),
3588 IX86_ATTR_YES ("ieee-fp",
3592 IX86_ATTR_YES ("inline-all-stringops",
3593 OPT_minline_all_stringops,
3594 MASK_INLINE_ALL_STRINGOPS),
3596 IX86_ATTR_YES ("inline-stringops-dynamically",
3597 OPT_minline_stringops_dynamically,
3598 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3600 IX86_ATTR_NO ("align-stringops",
3601 OPT_mno_align_stringops,
3602 MASK_NO_ALIGN_STRINGOPS),
3604 IX86_ATTR_YES ("recip",
3610 /* If this is a list, recurse to get the options. */
3611 if (TREE_CODE (args) == TREE_LIST)
3615 for (; args; args = TREE_CHAIN (args))
3616 if (TREE_VALUE (args)
3617 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3623 else if (TREE_CODE (args) != STRING_CST)
3626 /* Handle multiple arguments separated by commas. */
3627 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3629 while (next_optstr && *next_optstr != '\0')
3631 char *p = next_optstr;
3633 char *comma = strchr (next_optstr, ',');
3634 const char *opt_string;
3635 size_t len, opt_len;
3640 enum ix86_opt_type type = ix86_opt_unknown;
3646 len = comma - next_optstr;
3647 next_optstr = comma + 1;
3655 /* Recognize no-xxx. */
3656 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3665 /* Find the option. */
3668 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3670 type = attrs[i].type;
3671 opt_len = attrs[i].len;
3672 if (ch == attrs[i].string[0]
3673 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3674 && memcmp (p, attrs[i].string, opt_len) == 0)
3677 mask = attrs[i].mask;
3678 opt_string = attrs[i].string;
3683 /* Process the option. */
3686 error ("attribute(target(\"%s\")) is unknown", orig_p);
3690 else if (type == ix86_opt_isa)
3691 ix86_handle_option (opt, p, opt_set_p);
3693 else if (type == ix86_opt_yes || type == ix86_opt_no)
3695 if (type == ix86_opt_no)
3696 opt_set_p = !opt_set_p;
3699 target_flags |= mask;
3701 target_flags &= ~mask;
3704 else if (type == ix86_opt_str)
3708 error ("option(\"%s\") was already specified", opt_string);
3712 p_strings[opt] = xstrdup (p + opt_len);
3722 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3725 ix86_valid_target_attribute_tree (tree args)
3727 const char *orig_arch_string = ix86_arch_string;
3728 const char *orig_tune_string = ix86_tune_string;
3729 const char *orig_fpmath_string = ix86_fpmath_string;
3730 int orig_tune_defaulted = ix86_tune_defaulted;
3731 int orig_arch_specified = ix86_arch_specified;
3732 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3735 struct cl_target_option *def
3736 = TREE_TARGET_OPTION (target_option_default_node);
3738 /* Process each of the options on the chain. */
3739 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3742 /* If the changed options are different from the default, rerun override_options,
3743 and then save the options away. The string options are are attribute options,
3744 and will be undone when we copy the save structure. */
3745 if (ix86_isa_flags != def->ix86_isa_flags
3746 || target_flags != def->target_flags
3747 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3748 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3749 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3751 /* If we are using the default tune= or arch=, undo the string assigned,
3752 and use the default. */
3753 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3754 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3755 else if (!orig_arch_specified)
3756 ix86_arch_string = NULL;
3758 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3759 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3760 else if (orig_tune_defaulted)
3761 ix86_tune_string = NULL;
3763 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3764 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3765 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3766 else if (!TARGET_64BIT && TARGET_SSE)
3767 ix86_fpmath_string = "sse,387";
3769 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3770 override_options (false);
3772 /* Add any builtin functions with the new isa if any. */
3773 ix86_add_new_builtins (ix86_isa_flags);
3775 /* Save the current options unless we are validating options for
3777 t = build_target_option_node ();
3779 ix86_arch_string = orig_arch_string;
3780 ix86_tune_string = orig_tune_string;
3781 ix86_fpmath_string = orig_fpmath_string;
3783 /* Free up memory allocated to hold the strings */
3784 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3785 if (option_strings[i])
3786 free (option_strings[i]);
3792 /* Hook to validate attribute((target("string"))). */
3795 ix86_valid_target_attribute_p (tree fndecl,
3796 tree ARG_UNUSED (name),
3798 int ARG_UNUSED (flags))
3800 struct cl_target_option cur_target;
3802 tree old_optimize = build_optimization_node ();
3803 tree new_target, new_optimize;
3804 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3806 /* If the function changed the optimization levels as well as setting target
3807 options, start with the optimizations specified. */
3808 if (func_optimize && func_optimize != old_optimize)
3809 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3811 /* The target attributes may also change some optimization flags, so update
3812 the optimization options if necessary. */
3813 cl_target_option_save (&cur_target);
3814 new_target = ix86_valid_target_attribute_tree (args);
3815 new_optimize = build_optimization_node ();
3822 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3824 if (old_optimize != new_optimize)
3825 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3828 cl_target_option_restore (&cur_target);
3830 if (old_optimize != new_optimize)
3831 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3837 /* Hook to determine if one function can safely inline another. */
3840 ix86_can_inline_p (tree caller, tree callee)
3843 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3844 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3846 /* If callee has no option attributes, then it is ok to inline. */
3850 /* If caller has no option attributes, but callee does then it is not ok to
3852 else if (!caller_tree)
3857 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3858 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3860 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3861 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3863 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3864 != callee_opts->ix86_isa_flags)
3867 /* See if we have the same non-isa options. */
3868 else if (caller_opts->target_flags != callee_opts->target_flags)
3871 /* See if arch, tune, etc. are the same. */
3872 else if (caller_opts->arch != callee_opts->arch)
3875 else if (caller_opts->tune != callee_opts->tune)
3878 else if (caller_opts->fpmath != callee_opts->fpmath)
3881 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3892 /* Remember the last target of ix86_set_current_function. */
3893 static GTY(()) tree ix86_previous_fndecl;
3895 /* Establish appropriate back-end context for processing the function
3896 FNDECL. The argument might be NULL to indicate processing at top
3897 level, outside of any function scope. */
3899 ix86_set_current_function (tree fndecl)
3901 /* Only change the context if the function changes. This hook is called
3902 several times in the course of compiling a function, and we don't want to
3903 slow things down too much or call target_reinit when it isn't safe. */
3904 if (fndecl && fndecl != ix86_previous_fndecl)
3906 tree old_tree = (ix86_previous_fndecl
3907 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3910 tree new_tree = (fndecl
3911 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3914 ix86_previous_fndecl = fndecl;
3915 if (old_tree == new_tree)
3920 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3926 struct cl_target_option *def
3927 = TREE_TARGET_OPTION (target_option_current_node);
3929 cl_target_option_restore (def);
3936 /* Return true if this goes in large data/bss. */
3939 ix86_in_large_data_p (tree exp)
3941 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3944 /* Functions are never large data. */
3945 if (TREE_CODE (exp) == FUNCTION_DECL)
3948 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3950 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3951 if (strcmp (section, ".ldata") == 0
3952 || strcmp (section, ".lbss") == 0)
3958 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3960 /* If this is an incomplete type with size 0, then we can't put it
3961 in data because it might be too big when completed. */
3962 if (!size || size > ix86_section_threshold)
3969 /* Switch to the appropriate section for output of DECL.
3970 DECL is either a `VAR_DECL' node or a constant of some sort.
3971 RELOC indicates whether forming the initial value of DECL requires
3972 link-time relocations. */
3974 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3978 x86_64_elf_select_section (tree decl, int reloc,
3979 unsigned HOST_WIDE_INT align)
3981 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3982 && ix86_in_large_data_p (decl))
3984 const char *sname = NULL;
3985 unsigned int flags = SECTION_WRITE;
3986 switch (categorize_decl_for_section (decl, reloc))
3991 case SECCAT_DATA_REL:
3992 sname = ".ldata.rel";
3994 case SECCAT_DATA_REL_LOCAL:
3995 sname = ".ldata.rel.local";
3997 case SECCAT_DATA_REL_RO:
3998 sname = ".ldata.rel.ro";
4000 case SECCAT_DATA_REL_RO_LOCAL:
4001 sname = ".ldata.rel.ro.local";
4005 flags |= SECTION_BSS;
4008 case SECCAT_RODATA_MERGE_STR:
4009 case SECCAT_RODATA_MERGE_STR_INIT:
4010 case SECCAT_RODATA_MERGE_CONST:
4014 case SECCAT_SRODATA:
4021 /* We don't split these for medium model. Place them into
4022 default sections and hope for best. */
4024 case SECCAT_EMUTLS_VAR:
4025 case SECCAT_EMUTLS_TMPL:
4030 /* We might get called with string constants, but get_named_section
4031 doesn't like them as they are not DECLs. Also, we need to set
4032 flags in that case. */
4034 return get_section (sname, flags, NULL);
4035 return get_named_section (decl, sname, reloc);
4038 return default_elf_select_section (decl, reloc, align);
4041 /* Build up a unique section name, expressed as a
4042 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4043 RELOC indicates whether the initial value of EXP requires
4044 link-time relocations. */
4046 static void ATTRIBUTE_UNUSED
4047 x86_64_elf_unique_section (tree decl, int reloc)
4049 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4050 && ix86_in_large_data_p (decl))
4052 const char *prefix = NULL;
4053 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4054 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4056 switch (categorize_decl_for_section (decl, reloc))
4059 case SECCAT_DATA_REL:
4060 case SECCAT_DATA_REL_LOCAL:
4061 case SECCAT_DATA_REL_RO:
4062 case SECCAT_DATA_REL_RO_LOCAL:
4063 prefix = one_only ? ".ld" : ".ldata";
4066 prefix = one_only ? ".lb" : ".lbss";
4069 case SECCAT_RODATA_MERGE_STR:
4070 case SECCAT_RODATA_MERGE_STR_INIT:
4071 case SECCAT_RODATA_MERGE_CONST:
4072 prefix = one_only ? ".lr" : ".lrodata";
4074 case SECCAT_SRODATA:
4081 /* We don't split these for medium model. Place them into
4082 default sections and hope for best. */
4084 case SECCAT_EMUTLS_VAR:
4085 prefix = targetm.emutls.var_section;
4087 case SECCAT_EMUTLS_TMPL:
4088 prefix = targetm.emutls.tmpl_section;
4093 const char *name, *linkonce;
4096 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4097 name = targetm.strip_name_encoding (name);
4099 /* If we're using one_only, then there needs to be a .gnu.linkonce
4100 prefix to the section name. */
4101 linkonce = one_only ? ".gnu.linkonce" : "";
4103 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4105 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4109 default_unique_section (decl, reloc);
4112 #ifdef COMMON_ASM_OP
4113 /* This says how to output assembler code to declare an
4114 uninitialized external linkage data object.
4116 For medium model x86-64 we need to use .largecomm opcode for
4119 x86_elf_aligned_common (FILE *file,
4120 const char *name, unsigned HOST_WIDE_INT size,
4123 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4124 && size > (unsigned int)ix86_section_threshold)
4125 fprintf (file, ".largecomm\t");
4127 fprintf (file, "%s", COMMON_ASM_OP);
4128 assemble_name (file, name);
4129 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4130 size, align / BITS_PER_UNIT);
4134 /* Utility function for targets to use in implementing
4135 ASM_OUTPUT_ALIGNED_BSS. */
4138 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4139 const char *name, unsigned HOST_WIDE_INT size,
4142 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4143 && size > (unsigned int)ix86_section_threshold)
4144 switch_to_section (get_named_section (decl, ".lbss", 0));
4146 switch_to_section (bss_section);
4147 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4148 #ifdef ASM_DECLARE_OBJECT_NAME
4149 last_assemble_variable_decl = decl;
4150 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4152 /* Standard thing is just output label for the object. */
4153 ASM_OUTPUT_LABEL (file, name);
4154 #endif /* ASM_DECLARE_OBJECT_NAME */
4155 ASM_OUTPUT_SKIP (file, size ? size : 1);
4159 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4161 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4162 make the problem with not enough registers even worse. */
4163 #ifdef INSN_SCHEDULING
4165 flag_schedule_insns = 0;
4169 /* The Darwin libraries never set errno, so we might as well
4170 avoid calling them when that's the only reason we would. */
4171 flag_errno_math = 0;
4173 /* The default values of these switches depend on the TARGET_64BIT
4174 that is not known at this moment. Mark these values with 2 and
4175 let user the to override these. In case there is no command line option
4176 specifying them, we will set the defaults in override_options. */
4178 flag_omit_frame_pointer = 2;
4179 flag_pcc_struct_return = 2;
4180 flag_asynchronous_unwind_tables = 2;
4181 flag_vect_cost_model = 1;
4182 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4183 SUBTARGET_OPTIMIZATION_OPTIONS;
4187 /* Decide whether we can make a sibling call to a function. DECL is the
4188 declaration of the function being targeted by the call and EXP is the
4189 CALL_EXPR representing the call. */
4192 ix86_function_ok_for_sibcall (tree decl, tree exp)
4197 /* If we are generating position-independent code, we cannot sibcall
4198 optimize any indirect call, or a direct call to a global function,
4199 as the PLT requires %ebx be live. */
4200 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4207 func = TREE_TYPE (CALL_EXPR_FN (exp));
4208 if (POINTER_TYPE_P (func))
4209 func = TREE_TYPE (func);
4212 /* Check that the return value locations are the same. Like
4213 if we are returning floats on the 80387 register stack, we cannot
4214 make a sibcall from a function that doesn't return a float to a
4215 function that does or, conversely, from a function that does return
4216 a float to a function that doesn't; the necessary stack adjustment
4217 would not be executed. This is also the place we notice
4218 differences in the return value ABI. Note that it is ok for one
4219 of the functions to have void return type as long as the return
4220 value of the other is passed in a register. */
4221 a = ix86_function_value (TREE_TYPE (exp), func, false);
4222 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4224 if (STACK_REG_P (a) || STACK_REG_P (b))
4226 if (!rtx_equal_p (a, b))
4229 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4231 else if (!rtx_equal_p (a, b))
4234 /* If this call is indirect, we'll need to be able to use a call-clobbered
4235 register for the address of the target function. Make sure that all
4236 such registers are not used for passing parameters. */
4237 if (!decl && !TARGET_64BIT)
4241 /* We're looking at the CALL_EXPR, we need the type of the function. */
4242 type = CALL_EXPR_FN (exp); /* pointer expression */
4243 type = TREE_TYPE (type); /* pointer type */
4244 type = TREE_TYPE (type); /* function type */
4246 if (ix86_function_regparm (type, NULL) >= 3)
4248 /* ??? Need to count the actual number of registers to be used,
4249 not the possible number of registers. Fix later. */
4254 /* Dllimport'd functions are also called indirectly. */
4255 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4257 && decl && DECL_DLLIMPORT_P (decl)
4258 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4261 /* If we need to align the outgoing stack, then sibcalling would
4262 unalign the stack, which may break the called function. */
4263 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4266 /* Otherwise okay. That also includes certain types of indirect calls. */
4270 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4271 calling convention attributes;
4272 arguments as in struct attribute_spec.handler. */
4275 ix86_handle_cconv_attribute (tree *node, tree name,
4277 int flags ATTRIBUTE_UNUSED,
4280 if (TREE_CODE (*node) != FUNCTION_TYPE
4281 && TREE_CODE (*node) != METHOD_TYPE
4282 && TREE_CODE (*node) != FIELD_DECL
4283 && TREE_CODE (*node) != TYPE_DECL)
4285 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4287 *no_add_attrs = true;
4291 /* Can combine regparm with all attributes but fastcall. */
4292 if (is_attribute_p ("regparm", name))
4296 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4298 error ("fastcall and regparm attributes are not compatible");
4301 cst = TREE_VALUE (args);
4302 if (TREE_CODE (cst) != INTEGER_CST)
4304 warning (OPT_Wattributes,
4305 "%qE attribute requires an integer constant argument",
4307 *no_add_attrs = true;
4309 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4311 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4313 *no_add_attrs = true;
4321 /* Do not warn when emulating the MS ABI. */
4322 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4323 warning (OPT_Wattributes, "%qE attribute ignored",
4325 *no_add_attrs = true;
4329 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4330 if (is_attribute_p ("fastcall", name))
4332 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4334 error ("fastcall and cdecl attributes are not compatible");
4336 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4338 error ("fastcall and stdcall attributes are not compatible");
4340 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4342 error ("fastcall and regparm attributes are not compatible");
4346 /* Can combine stdcall with fastcall (redundant), regparm and
4348 else if (is_attribute_p ("stdcall", name))
4350 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4352 error ("stdcall and cdecl attributes are not compatible");
4354 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4356 error ("stdcall and fastcall attributes are not compatible");
4360 /* Can combine cdecl with regparm and sseregparm. */
4361 else if (is_attribute_p ("cdecl", name))
4363 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4365 error ("stdcall and cdecl attributes are not compatible");
4367 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4369 error ("fastcall and cdecl attributes are not compatible");
4373 /* Can combine sseregparm with all attributes. */
4378 /* Return 0 if the attributes for two types are incompatible, 1 if they
4379 are compatible, and 2 if they are nearly compatible (which causes a
4380 warning to be generated). */
4383 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4385 /* Check for mismatch of non-default calling convention. */
4386 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4388 if (TREE_CODE (type1) != FUNCTION_TYPE
4389 && TREE_CODE (type1) != METHOD_TYPE)
4392 /* Check for mismatched fastcall/regparm types. */
4393 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4394 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4395 || (ix86_function_regparm (type1, NULL)
4396 != ix86_function_regparm (type2, NULL)))
4399 /* Check for mismatched sseregparm types. */
4400 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4401 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4404 /* Check for mismatched return types (cdecl vs stdcall). */
4405 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4406 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4412 /* Return the regparm value for a function with the indicated TYPE and DECL.
4413 DECL may be NULL when calling function indirectly
4414 or considering a libcall. */
4417 ix86_function_regparm (const_tree type, const_tree decl)
4422 static bool error_issued;
4425 return (ix86_function_type_abi (type) == SYSV_ABI
4426 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4428 regparm = ix86_regparm;
4429 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4433 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4435 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4437 /* We can't use regparm(3) for nested functions because
4438 these pass static chain pointer in %ecx register. */
4439 if (!error_issued && regparm == 3
4440 && decl_function_context (decl)
4441 && !DECL_NO_STATIC_CHAIN (decl))
4443 error ("nested functions are limited to 2 register parameters");
4444 error_issued = true;
4452 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4455 /* Use register calling convention for local functions when possible. */
4457 && TREE_CODE (decl) == FUNCTION_DECL
4461 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4462 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4465 int local_regparm, globals = 0, regno;
4468 /* Make sure no regparm register is taken by a
4469 fixed register variable. */
4470 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4471 if (fixed_regs[local_regparm])
4474 /* We can't use regparm(3) for nested functions as these use
4475 static chain pointer in third argument. */
4476 if (local_regparm == 3
4477 && decl_function_context (decl)
4478 && !DECL_NO_STATIC_CHAIN (decl))
4481 /* If the function realigns its stackpointer, the prologue will
4482 clobber %ecx. If we've already generated code for the callee,
4483 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4484 scanning the attributes for the self-realigning property. */
4485 f = DECL_STRUCT_FUNCTION (decl);
4486 /* Since current internal arg pointer won't conflict with
4487 parameter passing regs, so no need to change stack
4488 realignment and adjust regparm number.
4490 Each fixed register usage increases register pressure,
4491 so less registers should be used for argument passing.
4492 This functionality can be overriden by an explicit
4494 for (regno = 0; regno <= DI_REG; regno++)
4495 if (fixed_regs[regno])
4499 = globals < local_regparm ? local_regparm - globals : 0;
4501 if (local_regparm > regparm)
4502 regparm = local_regparm;
4509 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4510 DFmode (2) arguments in SSE registers for a function with the
4511 indicated TYPE and DECL. DECL may be NULL when calling function
4512 indirectly or considering a libcall. Otherwise return 0. */
4515 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4517 gcc_assert (!TARGET_64BIT);
4519 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4520 by the sseregparm attribute. */
4521 if (TARGET_SSEREGPARM
4522 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4529 error ("Calling %qD with attribute sseregparm without "
4530 "SSE/SSE2 enabled", decl);
4532 error ("Calling %qT with attribute sseregparm without "
4533 "SSE/SSE2 enabled", type);
4541 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4542 (and DFmode for SSE2) arguments in SSE registers. */
4543 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4545 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4546 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4548 return TARGET_SSE2 ? 2 : 1;
4554 /* Return true if EAX is live at the start of the function. Used by
4555 ix86_expand_prologue to determine if we need special help before
4556 calling allocate_stack_worker. */
4559 ix86_eax_live_at_start_p (void)
4561 /* Cheat. Don't bother working forward from ix86_function_regparm
4562 to the function type to whether an actual argument is located in
4563 eax. Instead just look at cfg info, which is still close enough
4564 to correct at this point. This gives false positives for broken
4565 functions that might use uninitialized data that happens to be
4566 allocated in eax, but who cares? */
4567 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4570 /* Value is the number of bytes of arguments automatically
4571 popped when returning from a subroutine call.
4572 FUNDECL is the declaration node of the function (as a tree),
4573 FUNTYPE is the data type of the function (as a tree),
4574 or for a library call it is an identifier node for the subroutine name.
4575 SIZE is the number of bytes of arguments passed on the stack.
4577 On the 80386, the RTD insn may be used to pop them if the number
4578 of args is fixed, but if the number is variable then the caller
4579 must pop them all. RTD can't be used for library calls now
4580 because the library is compiled with the Unix compiler.
4581 Use of RTD is a selectable option, since it is incompatible with
4582 standard Unix calling sequences. If the option is not selected,
4583 the caller must always pop the args.
4585 The attribute stdcall is equivalent to RTD on a per module basis. */
4588 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4592 /* None of the 64-bit ABIs pop arguments. */
4596 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4598 /* Cdecl functions override -mrtd, and never pop the stack. */
4599 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4601 /* Stdcall and fastcall functions will pop the stack if not
4603 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4604 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4607 if (rtd && ! stdarg_p (funtype))
4611 /* Lose any fake structure return argument if it is passed on the stack. */
4612 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4613 && !KEEP_AGGREGATE_RETURN_POINTER)
4615 int nregs = ix86_function_regparm (funtype, fundecl);
4617 return GET_MODE_SIZE (Pmode);
4623 /* Argument support functions. */
4625 /* Return true when register may be used to pass function parameters. */
4627 ix86_function_arg_regno_p (int regno)
4630 const int *parm_regs;
4635 return (regno < REGPARM_MAX
4636 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4638 return (regno < REGPARM_MAX
4639 || (TARGET_MMX && MMX_REGNO_P (regno)
4640 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4641 || (TARGET_SSE && SSE_REGNO_P (regno)
4642 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4647 if (SSE_REGNO_P (regno) && TARGET_SSE)
4652 if (TARGET_SSE && SSE_REGNO_P (regno)
4653 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4657 /* TODO: The function should depend on current function ABI but
4658 builtins.c would need updating then. Therefore we use the
4661 /* RAX is used as hidden argument to va_arg functions. */
4662 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4665 if (ix86_abi == MS_ABI)
4666 parm_regs = x86_64_ms_abi_int_parameter_registers;
4668 parm_regs = x86_64_int_parameter_registers;
4669 for (i = 0; i < (ix86_abi == MS_ABI
4670 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
4671 if (regno == parm_regs[i])
4676 /* Return if we do not know how to pass TYPE solely in registers. */
4679 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4681 if (must_pass_in_stack_var_size_or_pad (mode, type))
4684 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4685 The layout_type routine is crafty and tries to trick us into passing
4686 currently unsupported vector types on the stack by using TImode. */
4687 return (!TARGET_64BIT && mode == TImode
4688 && type && TREE_CODE (type) != VECTOR_TYPE);
4691 /* It returns the size, in bytes, of the area reserved for arguments passed
4692 in registers for the function represented by fndecl dependent to the used
4695 ix86_reg_parm_stack_space (const_tree fndecl)
4697 enum calling_abi call_abi = SYSV_ABI;
4698 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4699 call_abi = ix86_function_abi (fndecl);
4701 call_abi = ix86_function_type_abi (fndecl);
4702 if (call_abi == MS_ABI)
4707 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4710 ix86_function_type_abi (const_tree fntype)
4712 if (TARGET_64BIT && fntype != NULL)
4714 enum calling_abi abi = ix86_abi;
4715 if (abi == SYSV_ABI)
4717 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4720 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4727 static enum calling_abi
4728 ix86_function_abi (const_tree fndecl)
4732 return ix86_function_type_abi (TREE_TYPE (fndecl));
4735 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4738 ix86_cfun_abi (void)
4740 if (! cfun || ! TARGET_64BIT)
4742 return cfun->machine->call_abi;
4746 extern void init_regs (void);
4748 /* Implementation of call abi switching target hook. Specific to FNDECL
4749 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4750 for more details. */
4752 ix86_call_abi_override (const_tree fndecl)
4754 if (fndecl == NULL_TREE)
4755 cfun->machine->call_abi = ix86_abi;
4757 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4760 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4761 re-initialization of init_regs each time we switch function context since
4762 this is needed only during RTL expansion. */
4764 ix86_maybe_switch_abi (void)
4767 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4771 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4772 for a call to a function whose data type is FNTYPE.
4773 For a library call, FNTYPE is 0. */
4776 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4777 tree fntype, /* tree ptr for function decl */
4778 rtx libname, /* SYMBOL_REF of library name or 0 */
4781 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4782 memset (cum, 0, sizeof (*cum));
4785 cum->call_abi = ix86_function_abi (fndecl);
4787 cum->call_abi = ix86_function_type_abi (fntype);
4788 /* Set up the number of registers to use for passing arguments. */
4790 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4791 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4792 cum->nregs = ix86_regparm;
4795 if (cum->call_abi != ix86_abi)
4796 cum->nregs = (ix86_abi != SYSV_ABI
4797 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4801 cum->sse_nregs = SSE_REGPARM_MAX;
4804 if (cum->call_abi != ix86_abi)
4805 cum->sse_nregs = (ix86_abi != SYSV_ABI
4806 ? X86_64_SSE_REGPARM_MAX
4807 : X86_64_MS_SSE_REGPARM_MAX);
4811 cum->mmx_nregs = MMX_REGPARM_MAX;
4812 cum->warn_avx = true;
4813 cum->warn_sse = true;
4814 cum->warn_mmx = true;
4816 /* Because type might mismatch in between caller and callee, we need to
4817 use actual type of function for local calls.
4818 FIXME: cgraph_analyze can be told to actually record if function uses
4819 va_start so for local functions maybe_vaarg can be made aggressive
4821 FIXME: once typesytem is fixed, we won't need this code anymore. */
4823 fntype = TREE_TYPE (fndecl);
4824 cum->maybe_vaarg = (fntype
4825 ? (!prototype_p (fntype) || stdarg_p (fntype))
4830 /* If there are variable arguments, then we won't pass anything
4831 in registers in 32-bit mode. */
4832 if (stdarg_p (fntype))
4843 /* Use ecx and edx registers if function has fastcall attribute,
4844 else look for regparm information. */
4847 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4853 cum->nregs = ix86_function_regparm (fntype, fndecl);
4856 /* Set up the number of SSE registers used for passing SFmode
4857 and DFmode arguments. Warn for mismatching ABI. */
4858 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4862 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4863 But in the case of vector types, it is some vector mode.
4865 When we have only some of our vector isa extensions enabled, then there
4866 are some modes for which vector_mode_supported_p is false. For these
4867 modes, the generic vector support in gcc will choose some non-vector mode
4868 in order to implement the type. By computing the natural mode, we'll
4869 select the proper ABI location for the operand and not depend on whatever
4870 the middle-end decides to do with these vector types.
4872 The midde-end can't deal with the vector types > 16 bytes. In this
4873 case, we return the original mode and warn ABI change if CUM isn't
4876 static enum machine_mode
4877 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4879 enum machine_mode mode = TYPE_MODE (type);
4881 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4883 HOST_WIDE_INT size = int_size_in_bytes (type);
4884 if ((size == 8 || size == 16 || size == 32)
4885 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4886 && TYPE_VECTOR_SUBPARTS (type) > 1)
4888 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4890 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4891 mode = MIN_MODE_VECTOR_FLOAT;
4893 mode = MIN_MODE_VECTOR_INT;
4895 /* Get the mode which has this inner mode and number of units. */
4896 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4897 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4898 && GET_MODE_INNER (mode) == innermode)
4900 if (size == 32 && !TARGET_AVX)
4902 static bool warnedavx;
4909 warning (0, "AVX vector argument without AVX "
4910 "enabled changes the ABI");
4912 return TYPE_MODE (type);
4925 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4926 this may not agree with the mode that the type system has chosen for the
4927 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4928 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4931 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4936 if (orig_mode != BLKmode)
4937 tmp = gen_rtx_REG (orig_mode, regno);
4940 tmp = gen_rtx_REG (mode, regno);
4941 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4942 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4948 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4949 of this code is to classify each 8bytes of incoming argument by the register
4950 class and assign registers accordingly. */
4952 /* Return the union class of CLASS1 and CLASS2.
4953 See the x86-64 PS ABI for details. */
4955 static enum x86_64_reg_class
4956 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4958 /* Rule #1: If both classes are equal, this is the resulting class. */
4959 if (class1 == class2)
4962 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4964 if (class1 == X86_64_NO_CLASS)
4966 if (class2 == X86_64_NO_CLASS)
4969 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4970 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4971 return X86_64_MEMORY_CLASS;
4973 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4974 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4975 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4976 return X86_64_INTEGERSI_CLASS;
4977 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4978 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4979 return X86_64_INTEGER_CLASS;
4981 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4983 if (class1 == X86_64_X87_CLASS
4984 || class1 == X86_64_X87UP_CLASS
4985 || class1 == X86_64_COMPLEX_X87_CLASS
4986 || class2 == X86_64_X87_CLASS
4987 || class2 == X86_64_X87UP_CLASS
4988 || class2 == X86_64_COMPLEX_X87_CLASS)
4989 return X86_64_MEMORY_CLASS;
4991 /* Rule #6: Otherwise class SSE is used. */
4992 return X86_64_SSE_CLASS;
4995 /* Classify the argument of type TYPE and mode MODE.
4996 CLASSES will be filled by the register class used to pass each word
4997 of the operand. The number of words is returned. In case the parameter
4998 should be passed in memory, 0 is returned. As a special case for zero
4999 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5001 BIT_OFFSET is used internally for handling records and specifies offset
5002 of the offset in bits modulo 256 to avoid overflow cases.
5004 See the x86-64 PS ABI for details.
5008 classify_argument (enum machine_mode mode, const_tree type,
5009 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5011 HOST_WIDE_INT bytes =
5012 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5013 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5015 /* Variable sized entities are always passed/returned in memory. */
5019 if (mode != VOIDmode
5020 && targetm.calls.must_pass_in_stack (mode, type))
5023 if (type && AGGREGATE_TYPE_P (type))
5027 enum x86_64_reg_class subclasses[MAX_CLASSES];
5029 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5033 for (i = 0; i < words; i++)
5034 classes[i] = X86_64_NO_CLASS;
5036 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5037 signalize memory class, so handle it as special case. */
5040 classes[0] = X86_64_NO_CLASS;
5044 /* Classify each field of record and merge classes. */
5045 switch (TREE_CODE (type))
5048 /* And now merge the fields of structure. */
5049 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5051 if (TREE_CODE (field) == FIELD_DECL)
5055 if (TREE_TYPE (field) == error_mark_node)
5058 /* Bitfields are always classified as integer. Handle them
5059 early, since later code would consider them to be
5060 misaligned integers. */
5061 if (DECL_BIT_FIELD (field))
5063 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5064 i < ((int_bit_position (field) + (bit_offset % 64))
5065 + tree_low_cst (DECL_SIZE (field), 0)
5068 merge_classes (X86_64_INTEGER_CLASS,
5075 type = TREE_TYPE (field);
5077 /* Flexible array member is ignored. */
5078 if (TYPE_MODE (type) == BLKmode
5079 && TREE_CODE (type) == ARRAY_TYPE
5080 && TYPE_SIZE (type) == NULL_TREE
5081 && TYPE_DOMAIN (type) != NULL_TREE
5082 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5087 if (!warned && warn_psabi)
5090 inform (input_location,
5091 "The ABI of passing struct with"
5092 " a flexible array member has"
5093 " changed in GCC 4.4");
5097 num = classify_argument (TYPE_MODE (type), type,
5099 (int_bit_position (field)
5100 + bit_offset) % 256);
5103 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5104 for (i = 0; i < num && (i + pos) < words; i++)
5106 merge_classes (subclasses[i], classes[i + pos]);
5113 /* Arrays are handled as small records. */
5116 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5117 TREE_TYPE (type), subclasses, bit_offset);
5121 /* The partial classes are now full classes. */
5122 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5123 subclasses[0] = X86_64_SSE_CLASS;
5124 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5125 && !((bit_offset % 64) == 0 && bytes == 4))
5126 subclasses[0] = X86_64_INTEGER_CLASS;
5128 for (i = 0; i < words; i++)
5129 classes[i] = subclasses[i % num];
5134 case QUAL_UNION_TYPE:
5135 /* Unions are similar to RECORD_TYPE but offset is always 0.
5137 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5139 if (TREE_CODE (field) == FIELD_DECL)
5143 if (TREE_TYPE (field) == error_mark_node)
5146 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5147 TREE_TYPE (field), subclasses,
5151 for (i = 0; i < num; i++)
5152 classes[i] = merge_classes (subclasses[i], classes[i]);
5163 /* When size > 16 bytes, if the first one isn't
5164 X86_64_SSE_CLASS or any other ones aren't
5165 X86_64_SSEUP_CLASS, everything should be passed in
5167 if (classes[0] != X86_64_SSE_CLASS)
5170 for (i = 1; i < words; i++)
5171 if (classes[i] != X86_64_SSEUP_CLASS)
5175 /* Final merger cleanup. */
5176 for (i = 0; i < words; i++)
5178 /* If one class is MEMORY, everything should be passed in
5180 if (classes[i] == X86_64_MEMORY_CLASS)
5183 /* The X86_64_SSEUP_CLASS should be always preceded by
5184 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5185 if (classes[i] == X86_64_SSEUP_CLASS
5186 && classes[i - 1] != X86_64_SSE_CLASS
5187 && classes[i - 1] != X86_64_SSEUP_CLASS)
5189 /* The first one should never be X86_64_SSEUP_CLASS. */
5190 gcc_assert (i != 0);
5191 classes[i] = X86_64_SSE_CLASS;
5194 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5195 everything should be passed in memory. */
5196 if (classes[i] == X86_64_X87UP_CLASS
5197 && (classes[i - 1] != X86_64_X87_CLASS))
5201 /* The first one should never be X86_64_X87UP_CLASS. */
5202 gcc_assert (i != 0);
5203 if (!warned && warn_psabi)
5206 inform (input_location,
5207 "The ABI of passing union with long double"
5208 " has changed in GCC 4.4");
5216 /* Compute alignment needed. We align all types to natural boundaries with
5217 exception of XFmode that is aligned to 64bits. */
5218 if (mode != VOIDmode && mode != BLKmode)
5220 int mode_alignment = GET_MODE_BITSIZE (mode);
5223 mode_alignment = 128;
5224 else if (mode == XCmode)
5225 mode_alignment = 256;
5226 if (COMPLEX_MODE_P (mode))
5227 mode_alignment /= 2;
5228 /* Misaligned fields are always returned in memory. */
5229 if (bit_offset % mode_alignment)
5233 /* for V1xx modes, just use the base mode */
5234 if (VECTOR_MODE_P (mode) && mode != V1DImode
5235 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5236 mode = GET_MODE_INNER (mode);
5238 /* Classification of atomic types. */
5243 classes[0] = X86_64_SSE_CLASS;
5246 classes[0] = X86_64_SSE_CLASS;
5247 classes[1] = X86_64_SSEUP_CLASS;
5257 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5261 classes[0] = X86_64_INTEGERSI_CLASS;
5264 else if (size <= 64)
5266 classes[0] = X86_64_INTEGER_CLASS;
5269 else if (size <= 64+32)
5271 classes[0] = X86_64_INTEGER_CLASS;
5272 classes[1] = X86_64_INTEGERSI_CLASS;
5275 else if (size <= 64+64)
5277 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5285 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5289 /* OImode shouldn't be used directly. */
5294 if (!(bit_offset % 64))
5295 classes[0] = X86_64_SSESF_CLASS;
5297 classes[0] = X86_64_SSE_CLASS;
5300 classes[0] = X86_64_SSEDF_CLASS;
5303 classes[0] = X86_64_X87_CLASS;
5304 classes[1] = X86_64_X87UP_CLASS;
5307 classes[0] = X86_64_SSE_CLASS;
5308 classes[1] = X86_64_SSEUP_CLASS;
5311 classes[0] = X86_64_SSE_CLASS;
5312 if (!(bit_offset % 64))
5318 if (!warned && warn_psabi)
5321 inform (input_location,
5322 "The ABI of passing structure with complex float"
5323 " member has changed in GCC 4.4");
5325 classes[1] = X86_64_SSESF_CLASS;
5329 classes[0] = X86_64_SSEDF_CLASS;
5330 classes[1] = X86_64_SSEDF_CLASS;
5333 classes[0] = X86_64_COMPLEX_X87_CLASS;
5336 /* This modes is larger than 16 bytes. */
5344 classes[0] = X86_64_SSE_CLASS;
5345 classes[1] = X86_64_SSEUP_CLASS;
5346 classes[2] = X86_64_SSEUP_CLASS;
5347 classes[3] = X86_64_SSEUP_CLASS;
5355 classes[0] = X86_64_SSE_CLASS;
5356 classes[1] = X86_64_SSEUP_CLASS;
5363 classes[0] = X86_64_SSE_CLASS;
5369 gcc_assert (VECTOR_MODE_P (mode));
5374 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5376 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5377 classes[0] = X86_64_INTEGERSI_CLASS;
5379 classes[0] = X86_64_INTEGER_CLASS;
5380 classes[1] = X86_64_INTEGER_CLASS;
5381 return 1 + (bytes > 8);
5385 /* Examine the argument and return set number of register required in each
5386 class. Return 0 iff parameter should be passed in memory. */
5388 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5389 int *int_nregs, int *sse_nregs)
5391 enum x86_64_reg_class regclass[MAX_CLASSES];
5392 int n = classify_argument (mode, type, regclass, 0);
5398 for (n--; n >= 0; n--)
5399 switch (regclass[n])
5401 case X86_64_INTEGER_CLASS:
5402 case X86_64_INTEGERSI_CLASS:
5405 case X86_64_SSE_CLASS:
5406 case X86_64_SSESF_CLASS:
5407 case X86_64_SSEDF_CLASS:
5410 case X86_64_NO_CLASS:
5411 case X86_64_SSEUP_CLASS:
5413 case X86_64_X87_CLASS:
5414 case X86_64_X87UP_CLASS:
5418 case X86_64_COMPLEX_X87_CLASS:
5419 return in_return ? 2 : 0;
5420 case X86_64_MEMORY_CLASS:
5426 /* Construct container for the argument used by GCC interface. See
5427 FUNCTION_ARG for the detailed description. */
5430 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5431 const_tree type, int in_return, int nintregs, int nsseregs,
5432 const int *intreg, int sse_regno)
5434 /* The following variables hold the static issued_error state. */
5435 static bool issued_sse_arg_error;
5436 static bool issued_sse_ret_error;
5437 static bool issued_x87_ret_error;
5439 enum machine_mode tmpmode;
5441 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5442 enum x86_64_reg_class regclass[MAX_CLASSES];
5446 int needed_sseregs, needed_intregs;
5447 rtx exp[MAX_CLASSES];
5450 n = classify_argument (mode, type, regclass, 0);
5453 if (!examine_argument (mode, type, in_return, &needed_intregs,
5456 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5459 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5460 some less clueful developer tries to use floating-point anyway. */
5461 if (needed_sseregs && !TARGET_SSE)
5465 if (!issued_sse_ret_error)
5467 error ("SSE register return with SSE disabled");
5468 issued_sse_ret_error = true;
5471 else if (!issued_sse_arg_error)
5473 error ("SSE register argument with SSE disabled");
5474 issued_sse_arg_error = true;
5479 /* Likewise, error if the ABI requires us to return values in the
5480 x87 registers and the user specified -mno-80387. */
5481 if (!TARGET_80387 && in_return)
5482 for (i = 0; i < n; i++)
5483 if (regclass[i] == X86_64_X87_CLASS
5484 || regclass[i] == X86_64_X87UP_CLASS
5485 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5487 if (!issued_x87_ret_error)
5489 error ("x87 register return with x87 disabled");
5490 issued_x87_ret_error = true;
5495 /* First construct simple cases. Avoid SCmode, since we want to use
5496 single register to pass this type. */
5497 if (n == 1 && mode != SCmode)
5498 switch (regclass[0])
5500 case X86_64_INTEGER_CLASS:
5501 case X86_64_INTEGERSI_CLASS:
5502 return gen_rtx_REG (mode, intreg[0]);
5503 case X86_64_SSE_CLASS:
5504 case X86_64_SSESF_CLASS:
5505 case X86_64_SSEDF_CLASS:
5506 if (mode != BLKmode)
5507 return gen_reg_or_parallel (mode, orig_mode,
5508 SSE_REGNO (sse_regno));
5510 case X86_64_X87_CLASS:
5511 case X86_64_COMPLEX_X87_CLASS:
5512 return gen_rtx_REG (mode, FIRST_STACK_REG);
5513 case X86_64_NO_CLASS:
5514 /* Zero sized array, struct or class. */
5519 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5520 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5521 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5523 && regclass[0] == X86_64_SSE_CLASS
5524 && regclass[1] == X86_64_SSEUP_CLASS
5525 && regclass[2] == X86_64_SSEUP_CLASS
5526 && regclass[3] == X86_64_SSEUP_CLASS
5528 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5531 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5532 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5533 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5534 && regclass[1] == X86_64_INTEGER_CLASS
5535 && (mode == CDImode || mode == TImode || mode == TFmode)
5536 && intreg[0] + 1 == intreg[1])
5537 return gen_rtx_REG (mode, intreg[0]);
5539 /* Otherwise figure out the entries of the PARALLEL. */
5540 for (i = 0; i < n; i++)
5544 switch (regclass[i])
5546 case X86_64_NO_CLASS:
5548 case X86_64_INTEGER_CLASS:
5549 case X86_64_INTEGERSI_CLASS:
5550 /* Merge TImodes on aligned occasions here too. */
5551 if (i * 8 + 8 > bytes)
5552 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5553 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5557 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5558 if (tmpmode == BLKmode)
5560 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5561 gen_rtx_REG (tmpmode, *intreg),
5565 case X86_64_SSESF_CLASS:
5566 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5567 gen_rtx_REG (SFmode,
5568 SSE_REGNO (sse_regno)),
5572 case X86_64_SSEDF_CLASS:
5573 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5574 gen_rtx_REG (DFmode,
5575 SSE_REGNO (sse_regno)),
5579 case X86_64_SSE_CLASS:
5587 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5597 && regclass[1] == X86_64_SSEUP_CLASS
5598 && regclass[2] == X86_64_SSEUP_CLASS
5599 && regclass[3] == X86_64_SSEUP_CLASS);
5606 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5607 gen_rtx_REG (tmpmode,
5608 SSE_REGNO (sse_regno)),
5617 /* Empty aligned struct, union or class. */
5621 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5622 for (i = 0; i < nexps; i++)
5623 XVECEXP (ret, 0, i) = exp [i];
5627 /* Update the data in CUM to advance over an argument of mode MODE
5628 and data type TYPE. (TYPE is null for libcalls where that information
5629 may not be available.) */
5632 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5633 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5649 cum->words += words;
5650 cum->nregs -= words;
5651 cum->regno += words;
5653 if (cum->nregs <= 0)
5661 /* OImode shouldn't be used directly. */
5665 if (cum->float_in_sse < 2)
5668 if (cum->float_in_sse < 1)
5685 if (!type || !AGGREGATE_TYPE_P (type))
5687 cum->sse_words += words;
5688 cum->sse_nregs -= 1;
5689 cum->sse_regno += 1;
5690 if (cum->sse_nregs <= 0)
5703 if (!type || !AGGREGATE_TYPE_P (type))
5705 cum->mmx_words += words;
5706 cum->mmx_nregs -= 1;
5707 cum->mmx_regno += 1;
5708 if (cum->mmx_nregs <= 0)
5719 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5720 tree type, HOST_WIDE_INT words, int named)
5722 int int_nregs, sse_nregs;
5724 /* Unnamed 256bit vector mode parameters are passed on stack. */
5725 if (!named && VALID_AVX256_REG_MODE (mode))
5728 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5729 cum->words += words;
5730 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5732 cum->nregs -= int_nregs;
5733 cum->sse_nregs -= sse_nregs;
5734 cum->regno += int_nregs;
5735 cum->sse_regno += sse_nregs;
5738 cum->words += words;
5742 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5743 HOST_WIDE_INT words)
5745 /* Otherwise, this should be passed indirect. */
5746 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5748 cum->words += words;
5757 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5758 tree type, int named)
5760 HOST_WIDE_INT bytes, words;
5762 if (mode == BLKmode)
5763 bytes = int_size_in_bytes (type);
5765 bytes = GET_MODE_SIZE (mode);
5766 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5769 mode = type_natural_mode (type, NULL);
5771 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5772 function_arg_advance_ms_64 (cum, bytes, words);
5773 else if (TARGET_64BIT)
5774 function_arg_advance_64 (cum, mode, type, words, named);
5776 function_arg_advance_32 (cum, mode, type, bytes, words);
5779 /* Define where to put the arguments to a function.
5780 Value is zero to push the argument on the stack,
5781 or a hard register in which to store the argument.
5783 MODE is the argument's machine mode.
5784 TYPE is the data type of the argument (as a tree).
5785 This is null for libcalls where that information may
5787 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5788 the preceding args and about the function being called.
5789 NAMED is nonzero if this argument is a named parameter
5790 (otherwise it is an extra parameter matching an ellipsis). */
5793 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5794 enum machine_mode orig_mode, tree type,
5795 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5797 static bool warnedsse, warnedmmx;
5799 /* Avoid the AL settings for the Unix64 ABI. */
5800 if (mode == VOIDmode)
5816 if (words <= cum->nregs)
5818 int regno = cum->regno;
5820 /* Fastcall allocates the first two DWORD (SImode) or
5821 smaller arguments to ECX and EDX if it isn't an
5827 || (type && AGGREGATE_TYPE_P (type)))
5830 /* ECX not EAX is the first allocated register. */
5831 if (regno == AX_REG)
5834 return gen_rtx_REG (mode, regno);
5839 if (cum->float_in_sse < 2)
5842 if (cum->float_in_sse < 1)
5846 /* In 32bit, we pass TImode in xmm registers. */
5853 if (!type || !AGGREGATE_TYPE_P (type))
5855 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5858 warning (0, "SSE vector argument without SSE enabled "
5862 return gen_reg_or_parallel (mode, orig_mode,
5863 cum->sse_regno + FIRST_SSE_REG);
5868 /* OImode shouldn't be used directly. */
5877 if (!type || !AGGREGATE_TYPE_P (type))
5880 return gen_reg_or_parallel (mode, orig_mode,
5881 cum->sse_regno + FIRST_SSE_REG);
5890 if (!type || !AGGREGATE_TYPE_P (type))
5892 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5895 warning (0, "MMX vector argument without MMX enabled "
5899 return gen_reg_or_parallel (mode, orig_mode,
5900 cum->mmx_regno + FIRST_MMX_REG);
5909 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5910 enum machine_mode orig_mode, tree type, int named)
5912 /* Handle a hidden AL argument containing number of registers
5913 for varargs x86-64 functions. */
5914 if (mode == VOIDmode)
5915 return GEN_INT (cum->maybe_vaarg
5916 ? (cum->sse_nregs < 0
5917 ? (cum->call_abi == ix86_abi
5919 : (ix86_abi != SYSV_ABI
5920 ? X86_64_SSE_REGPARM_MAX
5921 : X86_64_MS_SSE_REGPARM_MAX))
5936 /* Unnamed 256bit vector mode parameters are passed on stack. */
5942 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5944 &x86_64_int_parameter_registers [cum->regno],
5949 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5950 enum machine_mode orig_mode, int named,
5951 HOST_WIDE_INT bytes)
5955 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5956 We use value of -2 to specify that current function call is MSABI. */
5957 if (mode == VOIDmode)
5958 return GEN_INT (-2);
5960 /* If we've run out of registers, it goes on the stack. */
5961 if (cum->nregs == 0)
5964 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5966 /* Only floating point modes are passed in anything but integer regs. */
5967 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5970 regno = cum->regno + FIRST_SSE_REG;
5975 /* Unnamed floating parameters are passed in both the
5976 SSE and integer registers. */
5977 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5978 t2 = gen_rtx_REG (mode, regno);
5979 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5980 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5981 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5984 /* Handle aggregated types passed in register. */
5985 if (orig_mode == BLKmode)
5987 if (bytes > 0 && bytes <= 8)
5988 mode = (bytes > 4 ? DImode : SImode);
5989 if (mode == BLKmode)
5993 return gen_reg_or_parallel (mode, orig_mode, regno);
5997 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5998 tree type, int named)
6000 enum machine_mode mode = omode;
6001 HOST_WIDE_INT bytes, words;
6003 if (mode == BLKmode)
6004 bytes = int_size_in_bytes (type);
6006 bytes = GET_MODE_SIZE (mode);
6007 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6009 /* To simplify the code below, represent vector types with a vector mode
6010 even if MMX/SSE are not active. */
6011 if (type && TREE_CODE (type) == VECTOR_TYPE)
6012 mode = type_natural_mode (type, cum);
6014 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6015 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6016 else if (TARGET_64BIT)
6017 return function_arg_64 (cum, mode, omode, type, named);
6019 return function_arg_32 (cum, mode, omode, type, bytes, words);
6022 /* A C expression that indicates when an argument must be passed by
6023 reference. If nonzero for an argument, a copy of that argument is
6024 made in memory and a pointer to the argument is passed instead of
6025 the argument itself. The pointer is passed in whatever way is
6026 appropriate for passing a pointer to that type. */
6029 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6030 enum machine_mode mode ATTRIBUTE_UNUSED,
6031 const_tree type, bool named ATTRIBUTE_UNUSED)
6033 /* See Windows x64 Software Convention. */
6034 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6036 int msize = (int) GET_MODE_SIZE (mode);
6039 /* Arrays are passed by reference. */
6040 if (TREE_CODE (type) == ARRAY_TYPE)
6043 if (AGGREGATE_TYPE_P (type))
6045 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6046 are passed by reference. */
6047 msize = int_size_in_bytes (type);
6051 /* __m128 is passed by reference. */
6053 case 1: case 2: case 4: case 8:
6059 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6065 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6068 contains_aligned_value_p (tree type)
6070 enum machine_mode mode = TYPE_MODE (type);
6071 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6075 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6077 if (TYPE_ALIGN (type) < 128)
6080 if (AGGREGATE_TYPE_P (type))
6082 /* Walk the aggregates recursively. */
6083 switch (TREE_CODE (type))
6087 case QUAL_UNION_TYPE:
6091 /* Walk all the structure fields. */
6092 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6094 if (TREE_CODE (field) == FIELD_DECL
6095 && contains_aligned_value_p (TREE_TYPE (field)))
6102 /* Just for use if some languages passes arrays by value. */
6103 if (contains_aligned_value_p (TREE_TYPE (type)))
6114 /* Gives the alignment boundary, in bits, of an argument with the
6115 specified mode and type. */
6118 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6123 /* Since canonical type is used for call, we convert it to
6124 canonical type if needed. */
6125 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6126 type = TYPE_CANONICAL (type);
6127 align = TYPE_ALIGN (type);
6130 align = GET_MODE_ALIGNMENT (mode);
6131 if (align < PARM_BOUNDARY)
6132 align = PARM_BOUNDARY;
6133 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6134 natural boundaries. */
6135 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6137 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6138 make an exception for SSE modes since these require 128bit
6141 The handling here differs from field_alignment. ICC aligns MMX
6142 arguments to 4 byte boundaries, while structure fields are aligned
6143 to 8 byte boundaries. */
6146 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6147 align = PARM_BOUNDARY;
6151 if (!contains_aligned_value_p (type))
6152 align = PARM_BOUNDARY;
6155 if (align > BIGGEST_ALIGNMENT)
6156 align = BIGGEST_ALIGNMENT;
6160 /* Return true if N is a possible register number of function value. */
6163 ix86_function_value_regno_p (int regno)
6170 case FIRST_FLOAT_REG:
6171 /* TODO: The function should depend on current function ABI but
6172 builtins.c would need updating then. Therefore we use the
6174 if (TARGET_64BIT && ix86_abi == MS_ABI)
6176 return TARGET_FLOAT_RETURNS_IN_80387;
6182 if (TARGET_MACHO || TARGET_64BIT)
6190 /* Define how to find the value returned by a function.
6191 VALTYPE is the data type of the value (as a tree).
6192 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6193 otherwise, FUNC is 0. */
6196 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6197 const_tree fntype, const_tree fn)
6201 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6202 we normally prevent this case when mmx is not available. However
6203 some ABIs may require the result to be returned like DImode. */
6204 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6205 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6207 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6208 we prevent this case when sse is not available. However some ABIs
6209 may require the result to be returned like integer TImode. */
6210 else if (mode == TImode
6211 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6212 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6214 /* 32-byte vector modes in %ymm0. */
6215 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6216 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6218 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6219 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6220 regno = FIRST_FLOAT_REG;
6222 /* Most things go in %eax. */
6225 /* Override FP return register with %xmm0 for local functions when
6226 SSE math is enabled or for functions with sseregparm attribute. */
6227 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6229 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6230 if ((sse_level >= 1 && mode == SFmode)
6231 || (sse_level == 2 && mode == DFmode))
6232 regno = FIRST_SSE_REG;
6235 /* OImode shouldn't be used directly. */
6236 gcc_assert (mode != OImode);
6238 return gen_rtx_REG (orig_mode, regno);
6242 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6247 /* Handle libcalls, which don't provide a type node. */
6248 if (valtype == NULL)
6260 return gen_rtx_REG (mode, FIRST_SSE_REG);
6263 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6267 return gen_rtx_REG (mode, AX_REG);
6271 ret = construct_container (mode, orig_mode, valtype, 1,
6272 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6273 x86_64_int_return_registers, 0);
6275 /* For zero sized structures, construct_container returns NULL, but we
6276 need to keep rest of compiler happy by returning meaningful value. */
6278 ret = gen_rtx_REG (orig_mode, AX_REG);
6284 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6286 unsigned int regno = AX_REG;
6290 switch (GET_MODE_SIZE (mode))
6293 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6294 && !COMPLEX_MODE_P (mode))
6295 regno = FIRST_SSE_REG;
6299 if (mode == SFmode || mode == DFmode)
6300 regno = FIRST_SSE_REG;
6306 return gen_rtx_REG (orig_mode, regno);
6310 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6311 enum machine_mode orig_mode, enum machine_mode mode)
6313 const_tree fn, fntype;
6316 if (fntype_or_decl && DECL_P (fntype_or_decl))
6317 fn = fntype_or_decl;
6318 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6320 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6321 return function_value_ms_64 (orig_mode, mode);
6322 else if (TARGET_64BIT)
6323 return function_value_64 (orig_mode, mode, valtype);
6325 return function_value_32 (orig_mode, mode, fntype, fn);
6329 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6330 bool outgoing ATTRIBUTE_UNUSED)
6332 enum machine_mode mode, orig_mode;
6334 orig_mode = TYPE_MODE (valtype);
6335 mode = type_natural_mode (valtype, NULL);
6336 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6340 ix86_libcall_value (enum machine_mode mode)
6342 return ix86_function_value_1 (NULL, NULL, mode, mode);
6345 /* Return true iff type is returned in memory. */
6347 static int ATTRIBUTE_UNUSED
6348 return_in_memory_32 (const_tree type, enum machine_mode mode)
6352 if (mode == BLKmode)
6355 size = int_size_in_bytes (type);
6357 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6360 if (VECTOR_MODE_P (mode) || mode == TImode)
6362 /* User-created vectors small enough to fit in EAX. */
6366 /* MMX/3dNow values are returned in MM0,
6367 except when it doesn't exits. */
6369 return (TARGET_MMX ? 0 : 1);
6371 /* SSE values are returned in XMM0, except when it doesn't exist. */
6373 return (TARGET_SSE ? 0 : 1);
6375 /* AVX values are returned in YMM0, except when it doesn't exist. */
6377 return TARGET_AVX ? 0 : 1;
6386 /* OImode shouldn't be used directly. */
6387 gcc_assert (mode != OImode);
6392 static int ATTRIBUTE_UNUSED
6393 return_in_memory_64 (const_tree type, enum machine_mode mode)
6395 int needed_intregs, needed_sseregs;
6396 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6399 static int ATTRIBUTE_UNUSED
6400 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6402 HOST_WIDE_INT size = int_size_in_bytes (type);
6404 /* __m128 is returned in xmm0. */
6405 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6406 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6409 /* Otherwise, the size must be exactly in [1248]. */
6410 return (size != 1 && size != 2 && size != 4 && size != 8);
6414 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6416 #ifdef SUBTARGET_RETURN_IN_MEMORY
6417 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6419 const enum machine_mode mode = type_natural_mode (type, NULL);
6423 if (ix86_function_type_abi (fntype) == MS_ABI)
6424 return return_in_memory_ms_64 (type, mode);
6426 return return_in_memory_64 (type, mode);
6429 return return_in_memory_32 (type, mode);
6433 /* Return false iff TYPE is returned in memory. This version is used
6434 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6435 but differs notably in that when MMX is available, 8-byte vectors
6436 are returned in memory, rather than in MMX registers. */
6439 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6442 enum machine_mode mode = type_natural_mode (type, NULL);
6445 return return_in_memory_64 (type, mode);
6447 if (mode == BLKmode)
6450 size = int_size_in_bytes (type);
6452 if (VECTOR_MODE_P (mode))
6454 /* Return in memory only if MMX registers *are* available. This
6455 seems backwards, but it is consistent with the existing
6462 else if (mode == TImode)
6464 else if (mode == XFmode)
6470 /* When returning SSE vector types, we have a choice of either
6471 (1) being abi incompatible with a -march switch, or
6472 (2) generating an error.
6473 Given no good solution, I think the safest thing is one warning.
6474 The user won't be able to use -Werror, but....
6476 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6477 called in response to actually generating a caller or callee that
6478 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6479 via aggregate_value_p for general type probing from tree-ssa. */
6482 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6484 static bool warnedsse, warnedmmx;
6486 if (!TARGET_64BIT && type)
6488 /* Look at the return type of the function, not the function type. */
6489 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6491 if (!TARGET_SSE && !warnedsse)
6494 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6497 warning (0, "SSE vector return without SSE enabled "
6502 if (!TARGET_MMX && !warnedmmx)
6504 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6507 warning (0, "MMX vector return without MMX enabled "
6517 /* Create the va_list data type. */
6519 /* Returns the calling convention specific va_list date type.
6520 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6523 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6525 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6527 /* For i386 we use plain pointer to argument area. */
6528 if (!TARGET_64BIT || abi == MS_ABI)
6529 return build_pointer_type (char_type_node);
6531 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6532 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6534 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6535 unsigned_type_node);
6536 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6537 unsigned_type_node);
6538 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6540 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6543 va_list_gpr_counter_field = f_gpr;
6544 va_list_fpr_counter_field = f_fpr;
6546 DECL_FIELD_CONTEXT (f_gpr) = record;
6547 DECL_FIELD_CONTEXT (f_fpr) = record;
6548 DECL_FIELD_CONTEXT (f_ovf) = record;
6549 DECL_FIELD_CONTEXT (f_sav) = record;
6551 TREE_CHAIN (record) = type_decl;
6552 TYPE_NAME (record) = type_decl;
6553 TYPE_FIELDS (record) = f_gpr;
6554 TREE_CHAIN (f_gpr) = f_fpr;
6555 TREE_CHAIN (f_fpr) = f_ovf;
6556 TREE_CHAIN (f_ovf) = f_sav;
6558 layout_type (record);
6560 /* The correct type is an array type of one element. */
6561 return build_array_type (record, build_index_type (size_zero_node));
6564 /* Setup the builtin va_list data type and for 64-bit the additional
6565 calling convention specific va_list data types. */
6568 ix86_build_builtin_va_list (void)
6570 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6572 /* Initialize abi specific va_list builtin types. */
6576 if (ix86_abi == MS_ABI)
6578 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6579 if (TREE_CODE (t) != RECORD_TYPE)
6580 t = build_variant_type_copy (t);
6581 sysv_va_list_type_node = t;
6586 if (TREE_CODE (t) != RECORD_TYPE)
6587 t = build_variant_type_copy (t);
6588 sysv_va_list_type_node = t;
6590 if (ix86_abi != MS_ABI)
6592 t = ix86_build_builtin_va_list_abi (MS_ABI);
6593 if (TREE_CODE (t) != RECORD_TYPE)
6594 t = build_variant_type_copy (t);
6595 ms_va_list_type_node = t;
6600 if (TREE_CODE (t) != RECORD_TYPE)
6601 t = build_variant_type_copy (t);
6602 ms_va_list_type_node = t;
6609 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6612 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6621 int regparm = ix86_regparm;
6623 if (cum->call_abi != ix86_abi)
6624 regparm = (ix86_abi != SYSV_ABI
6625 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6627 /* GPR size of varargs save area. */
6628 if (cfun->va_list_gpr_size)
6629 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6631 ix86_varargs_gpr_size = 0;
6633 /* FPR size of varargs save area. We don't need it if we don't pass
6634 anything in SSE registers. */
6635 if (cum->sse_nregs && cfun->va_list_fpr_size)
6636 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6638 ix86_varargs_fpr_size = 0;
6640 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6643 save_area = frame_pointer_rtx;
6644 set = get_varargs_alias_set ();
6646 for (i = cum->regno;
6648 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6651 mem = gen_rtx_MEM (Pmode,
6652 plus_constant (save_area, i * UNITS_PER_WORD));
6653 MEM_NOTRAP_P (mem) = 1;
6654 set_mem_alias_set (mem, set);
6655 emit_move_insn (mem, gen_rtx_REG (Pmode,
6656 x86_64_int_parameter_registers[i]));
6659 if (ix86_varargs_fpr_size)
6661 /* Now emit code to save SSE registers. The AX parameter contains number
6662 of SSE parameter registers used to call this function. We use
6663 sse_prologue_save insn template that produces computed jump across
6664 SSE saves. We need some preparation work to get this working. */
6666 label = gen_label_rtx ();
6667 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6669 /* Compute address to jump to :
6670 label - eax*4 + nnamed_sse_arguments*4 Or
6671 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6672 tmp_reg = gen_reg_rtx (Pmode);
6673 nsse_reg = gen_reg_rtx (Pmode);
6674 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6675 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6676 gen_rtx_MULT (Pmode, nsse_reg,
6679 /* vmovaps is one byte longer than movaps. */
6681 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6682 gen_rtx_PLUS (Pmode, tmp_reg,
6688 gen_rtx_CONST (DImode,
6689 gen_rtx_PLUS (DImode,
6691 GEN_INT (cum->sse_regno
6692 * (TARGET_AVX ? 5 : 4)))));
6694 emit_move_insn (nsse_reg, label_ref);
6695 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6697 /* Compute address of memory block we save into. We always use pointer
6698 pointing 127 bytes after first byte to store - this is needed to keep
6699 instruction size limited by 4 bytes (5 bytes for AVX) with one
6700 byte displacement. */
6701 tmp_reg = gen_reg_rtx (Pmode);
6702 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6703 plus_constant (save_area,
6704 ix86_varargs_gpr_size + 127)));
6705 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6706 MEM_NOTRAP_P (mem) = 1;
6707 set_mem_alias_set (mem, set);
6708 set_mem_align (mem, BITS_PER_WORD);
6710 /* And finally do the dirty job! */
6711 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6712 GEN_INT (cum->sse_regno), label));
6717 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6719 alias_set_type set = get_varargs_alias_set ();
6722 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
6726 mem = gen_rtx_MEM (Pmode,
6727 plus_constant (virtual_incoming_args_rtx,
6728 i * UNITS_PER_WORD));
6729 MEM_NOTRAP_P (mem) = 1;
6730 set_mem_alias_set (mem, set);
6732 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6733 emit_move_insn (mem, reg);
6738 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6739 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6742 CUMULATIVE_ARGS next_cum;
6745 /* This argument doesn't appear to be used anymore. Which is good,
6746 because the old code here didn't suppress rtl generation. */
6747 gcc_assert (!no_rtl);
6752 fntype = TREE_TYPE (current_function_decl);
6754 /* For varargs, we do not want to skip the dummy va_dcl argument.
6755 For stdargs, we do want to skip the last named argument. */
6757 if (stdarg_p (fntype))
6758 function_arg_advance (&next_cum, mode, type, 1);
6760 if (cum->call_abi == MS_ABI)
6761 setup_incoming_varargs_ms_64 (&next_cum);
6763 setup_incoming_varargs_64 (&next_cum);
6766 /* Checks if TYPE is of kind va_list char *. */
6769 is_va_list_char_pointer (tree type)
6773 /* For 32-bit it is always true. */
6776 canonic = ix86_canonical_va_list_type (type);
6777 return (canonic == ms_va_list_type_node
6778 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6781 /* Implement va_start. */
6784 ix86_va_start (tree valist, rtx nextarg)
6786 HOST_WIDE_INT words, n_gpr, n_fpr;
6787 tree f_gpr, f_fpr, f_ovf, f_sav;
6788 tree gpr, fpr, ovf, sav, t;
6791 /* Only 64bit target needs something special. */
6792 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6794 std_expand_builtin_va_start (valist, nextarg);
6798 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6799 f_fpr = TREE_CHAIN (f_gpr);
6800 f_ovf = TREE_CHAIN (f_fpr);
6801 f_sav = TREE_CHAIN (f_ovf);
6803 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6804 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6805 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6806 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6807 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6809 /* Count number of gp and fp argument registers used. */
6810 words = crtl->args.info.words;
6811 n_gpr = crtl->args.info.regno;
6812 n_fpr = crtl->args.info.sse_regno;
6814 if (cfun->va_list_gpr_size)
6816 type = TREE_TYPE (gpr);
6817 t = build2 (MODIFY_EXPR, type,
6818 gpr, build_int_cst (type, n_gpr * 8));
6819 TREE_SIDE_EFFECTS (t) = 1;
6820 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6823 if (TARGET_SSE && cfun->va_list_fpr_size)
6825 type = TREE_TYPE (fpr);
6826 t = build2 (MODIFY_EXPR, type, fpr,
6827 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6828 TREE_SIDE_EFFECTS (t) = 1;
6829 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6832 /* Find the overflow area. */
6833 type = TREE_TYPE (ovf);
6834 t = make_tree (type, crtl->args.internal_arg_pointer);
6836 t = build2 (POINTER_PLUS_EXPR, type, t,
6837 size_int (words * UNITS_PER_WORD));
6838 t = build2 (MODIFY_EXPR, type, ovf, t);
6839 TREE_SIDE_EFFECTS (t) = 1;
6840 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6842 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6844 /* Find the register save area.
6845 Prologue of the function save it right above stack frame. */
6846 type = TREE_TYPE (sav);
6847 t = make_tree (type, frame_pointer_rtx);
6848 if (!ix86_varargs_gpr_size)
6849 t = build2 (POINTER_PLUS_EXPR, type, t,
6850 size_int (-8 * X86_64_REGPARM_MAX));
6851 t = build2 (MODIFY_EXPR, type, sav, t);
6852 TREE_SIDE_EFFECTS (t) = 1;
6853 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6857 /* Implement va_arg. */
6860 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6863 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6864 tree f_gpr, f_fpr, f_ovf, f_sav;
6865 tree gpr, fpr, ovf, sav, t;
6867 tree lab_false, lab_over = NULL_TREE;
6872 enum machine_mode nat_mode;
6875 /* Only 64bit target needs something special. */
6876 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6877 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6879 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6880 f_fpr = TREE_CHAIN (f_gpr);
6881 f_ovf = TREE_CHAIN (f_fpr);
6882 f_sav = TREE_CHAIN (f_ovf);
6884 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6885 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6886 valist = build_va_arg_indirect_ref (valist);
6887 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6888 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6889 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6891 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6893 type = build_pointer_type (type);
6894 size = int_size_in_bytes (type);
6895 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6897 nat_mode = type_natural_mode (type, NULL);
6906 /* Unnamed 256bit vector mode parameters are passed on stack. */
6907 if (ix86_cfun_abi () == SYSV_ABI)
6914 container = construct_container (nat_mode, TYPE_MODE (type),
6915 type, 0, X86_64_REGPARM_MAX,
6916 X86_64_SSE_REGPARM_MAX, intreg,
6921 /* Pull the value out of the saved registers. */
6923 addr = create_tmp_var (ptr_type_node, "addr");
6924 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6928 int needed_intregs, needed_sseregs;
6930 tree int_addr, sse_addr;
6932 lab_false = create_artificial_label ();
6933 lab_over = create_artificial_label ();
6935 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6937 need_temp = (!REG_P (container)
6938 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6939 || TYPE_ALIGN (type) > 128));
6941 /* In case we are passing structure, verify that it is consecutive block
6942 on the register save area. If not we need to do moves. */
6943 if (!need_temp && !REG_P (container))
6945 /* Verify that all registers are strictly consecutive */
6946 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6950 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6952 rtx slot = XVECEXP (container, 0, i);
6953 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6954 || INTVAL (XEXP (slot, 1)) != i * 16)
6962 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6964 rtx slot = XVECEXP (container, 0, i);
6965 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6966 || INTVAL (XEXP (slot, 1)) != i * 8)
6978 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6979 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6980 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6981 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6984 /* First ensure that we fit completely in registers. */
6987 t = build_int_cst (TREE_TYPE (gpr),
6988 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6989 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6990 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6991 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6992 gimplify_and_add (t, pre_p);
6996 t = build_int_cst (TREE_TYPE (fpr),
6997 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6998 + X86_64_REGPARM_MAX * 8);
6999 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7000 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7001 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7002 gimplify_and_add (t, pre_p);
7005 /* Compute index to start of area used for integer regs. */
7008 /* int_addr = gpr + sav; */
7009 t = fold_convert (sizetype, gpr);
7010 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7011 gimplify_assign (int_addr, t, pre_p);
7015 /* sse_addr = fpr + sav; */
7016 t = fold_convert (sizetype, fpr);
7017 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7018 gimplify_assign (sse_addr, t, pre_p);
7023 tree temp = create_tmp_var (type, "va_arg_tmp");
7026 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7027 gimplify_assign (addr, t, pre_p);
7029 for (i = 0; i < XVECLEN (container, 0); i++)
7031 rtx slot = XVECEXP (container, 0, i);
7032 rtx reg = XEXP (slot, 0);
7033 enum machine_mode mode = GET_MODE (reg);
7034 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7035 tree addr_type = build_pointer_type (piece_type);
7036 tree daddr_type = build_pointer_type_for_mode (piece_type,
7040 tree dest_addr, dest;
7042 if (SSE_REGNO_P (REGNO (reg)))
7044 src_addr = sse_addr;
7045 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7049 src_addr = int_addr;
7050 src_offset = REGNO (reg) * 8;
7052 src_addr = fold_convert (addr_type, src_addr);
7053 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7054 size_int (src_offset));
7055 src = build_va_arg_indirect_ref (src_addr);
7057 dest_addr = fold_convert (daddr_type, addr);
7058 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7059 size_int (INTVAL (XEXP (slot, 1))));
7060 dest = build_va_arg_indirect_ref (dest_addr);
7062 gimplify_assign (dest, src, pre_p);
7068 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7069 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7070 gimplify_assign (gpr, t, pre_p);
7075 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7076 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7077 gimplify_assign (fpr, t, pre_p);
7080 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7082 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7085 /* ... otherwise out of the overflow area. */
7087 /* When we align parameter on stack for caller, if the parameter
7088 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7089 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7090 here with caller. */
7091 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7092 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7093 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7095 /* Care for on-stack alignment if needed. */
7096 if (arg_boundary <= 64
7097 || integer_zerop (TYPE_SIZE (type)))
7101 HOST_WIDE_INT align = arg_boundary / 8;
7102 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7103 size_int (align - 1));
7104 t = fold_convert (sizetype, t);
7105 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7107 t = fold_convert (TREE_TYPE (ovf), t);
7109 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7110 gimplify_assign (addr, t, pre_p);
7112 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7113 size_int (rsize * UNITS_PER_WORD));
7114 gimplify_assign (unshare_expr (ovf), t, pre_p);
7117 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7119 ptrtype = build_pointer_type (type);
7120 addr = fold_convert (ptrtype, addr);
7123 addr = build_va_arg_indirect_ref (addr);
7124 return build_va_arg_indirect_ref (addr);
7127 /* Return nonzero if OPNUM's MEM should be matched
7128 in movabs* patterns. */
7131 ix86_check_movabs (rtx insn, int opnum)
7135 set = PATTERN (insn);
7136 if (GET_CODE (set) == PARALLEL)
7137 set = XVECEXP (set, 0, 0);
7138 gcc_assert (GET_CODE (set) == SET);
7139 mem = XEXP (set, opnum);
7140 while (GET_CODE (mem) == SUBREG)
7141 mem = SUBREG_REG (mem);
7142 gcc_assert (MEM_P (mem));
7143 return (volatile_ok || !MEM_VOLATILE_P (mem));
7146 /* Initialize the table of extra 80387 mathematical constants. */
7149 init_ext_80387_constants (void)
7151 static const char * cst[5] =
7153 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7154 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7155 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7156 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7157 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7161 for (i = 0; i < 5; i++)
7163 real_from_string (&ext_80387_constants_table[i], cst[i]);
7164 /* Ensure each constant is rounded to XFmode precision. */
7165 real_convert (&ext_80387_constants_table[i],
7166 XFmode, &ext_80387_constants_table[i]);
7169 ext_80387_constants_init = 1;
7172 /* Return true if the constant is something that can be loaded with
7173 a special instruction. */
7176 standard_80387_constant_p (rtx x)
7178 enum machine_mode mode = GET_MODE (x);
7182 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7185 if (x == CONST0_RTX (mode))
7187 if (x == CONST1_RTX (mode))
7190 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7192 /* For XFmode constants, try to find a special 80387 instruction when
7193 optimizing for size or on those CPUs that benefit from them. */
7195 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7199 if (! ext_80387_constants_init)
7200 init_ext_80387_constants ();
7202 for (i = 0; i < 5; i++)
7203 if (real_identical (&r, &ext_80387_constants_table[i]))
7207 /* Load of the constant -0.0 or -1.0 will be split as
7208 fldz;fchs or fld1;fchs sequence. */
7209 if (real_isnegzero (&r))
7211 if (real_identical (&r, &dconstm1))
7217 /* Return the opcode of the special instruction to be used to load
7221 standard_80387_constant_opcode (rtx x)
7223 switch (standard_80387_constant_p (x))
7247 /* Return the CONST_DOUBLE representing the 80387 constant that is
7248 loaded by the specified special instruction. The argument IDX
7249 matches the return value from standard_80387_constant_p. */
7252 standard_80387_constant_rtx (int idx)
7256 if (! ext_80387_constants_init)
7257 init_ext_80387_constants ();
7273 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7277 /* Return 1 if mode is a valid mode for sse. */
7279 standard_sse_mode_p (enum machine_mode mode)
7296 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7297 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7298 modes and AVX is enabled. */
7301 standard_sse_constant_p (rtx x)
7303 enum machine_mode mode = GET_MODE (x);
7305 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7307 if (vector_all_ones_operand (x, mode))
7309 if (standard_sse_mode_p (mode))
7310 return TARGET_SSE2 ? 2 : -2;
7311 else if (VALID_AVX256_REG_MODE (mode))
7312 return TARGET_AVX ? 3 : -3;
7318 /* Return the opcode of the special instruction to be used to load
7322 standard_sse_constant_opcode (rtx insn, rtx x)
7324 switch (standard_sse_constant_p (x))
7327 switch (get_attr_mode (insn))
7330 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7332 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7334 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7336 return "vxorps\t%x0, %x0, %x0";
7338 return "vxorpd\t%x0, %x0, %x0";
7340 return "vpxor\t%x0, %x0, %x0";
7346 switch (get_attr_mode (insn))
7351 return "vpcmpeqd\t%0, %0, %0";
7357 return "pcmpeqd\t%0, %0";
7362 /* Returns 1 if OP contains a symbol reference */
7365 symbolic_reference_mentioned_p (rtx op)
7370 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7373 fmt = GET_RTX_FORMAT (GET_CODE (op));
7374 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7380 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7381 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7385 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7392 /* Return 1 if it is appropriate to emit `ret' instructions in the
7393 body of a function. Do this only if the epilogue is simple, needing a
7394 couple of insns. Prior to reloading, we can't tell how many registers
7395 must be saved, so return 0 then. Return 0 if there is no frame
7396 marker to de-allocate. */
7399 ix86_can_use_return_insn_p (void)
7401 struct ix86_frame frame;
7403 if (! reload_completed || frame_pointer_needed)
7406 /* Don't allow more than 32 pop, since that's all we can do
7407 with one instruction. */
7408 if (crtl->args.pops_args
7409 && crtl->args.size >= 32768)
7412 ix86_compute_frame_layout (&frame);
7413 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7416 /* Value should be nonzero if functions must have frame pointers.
7417 Zero means the frame pointer need not be set up (and parms may
7418 be accessed via the stack pointer) in functions that seem suitable. */
7421 ix86_frame_pointer_required (void)
7423 /* If we accessed previous frames, then the generated code expects
7424 to be able to access the saved ebp value in our frame. */
7425 if (cfun->machine->accesses_prev_frame)
7428 /* Several x86 os'es need a frame pointer for other reasons,
7429 usually pertaining to setjmp. */
7430 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7433 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7434 the frame pointer by default. Turn it back on now if we've not
7435 got a leaf function. */
7436 if (TARGET_OMIT_LEAF_FRAME_POINTER
7437 && (!current_function_is_leaf
7438 || ix86_current_function_calls_tls_descriptor))
7447 /* Record that the current function accesses previous call frames. */
7450 ix86_setup_frame_addresses (void)
7452 cfun->machine->accesses_prev_frame = 1;
7455 #ifndef USE_HIDDEN_LINKONCE
7456 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7457 # define USE_HIDDEN_LINKONCE 1
7459 # define USE_HIDDEN_LINKONCE 0
7463 static int pic_labels_used;
7465 /* Fills in the label name that should be used for a pc thunk for
7466 the given register. */
7469 get_pc_thunk_name (char name[32], unsigned int regno)
7471 gcc_assert (!TARGET_64BIT);
7473 if (USE_HIDDEN_LINKONCE)
7474 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7476 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7480 /* This function generates code for -fpic that loads %ebx with
7481 the return address of the caller and then returns. */
7484 ix86_file_end (void)
7489 for (regno = 0; regno < 8; ++regno)
7493 if (! ((pic_labels_used >> regno) & 1))
7496 get_pc_thunk_name (name, regno);
7501 switch_to_section (darwin_sections[text_coal_section]);
7502 fputs ("\t.weak_definition\t", asm_out_file);
7503 assemble_name (asm_out_file, name);
7504 fputs ("\n\t.private_extern\t", asm_out_file);
7505 assemble_name (asm_out_file, name);
7506 fputs ("\n", asm_out_file);
7507 ASM_OUTPUT_LABEL (asm_out_file, name);
7511 if (USE_HIDDEN_LINKONCE)
7515 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7517 TREE_PUBLIC (decl) = 1;
7518 TREE_STATIC (decl) = 1;
7519 DECL_ONE_ONLY (decl) = 1;
7521 (*targetm.asm_out.unique_section) (decl, 0);
7522 switch_to_section (get_named_section (decl, NULL, 0));
7524 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7525 fputs ("\t.hidden\t", asm_out_file);
7526 assemble_name (asm_out_file, name);
7527 fputc ('\n', asm_out_file);
7528 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7532 switch_to_section (text_section);
7533 ASM_OUTPUT_LABEL (asm_out_file, name);
7536 xops[0] = gen_rtx_REG (Pmode, regno);
7537 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7538 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7539 output_asm_insn ("ret", xops);
7542 if (NEED_INDICATE_EXEC_STACK)
7543 file_end_indicate_exec_stack ();
7546 /* Emit code for the SET_GOT patterns. */
7549 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7555 if (TARGET_VXWORKS_RTP && flag_pic)
7557 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7558 xops[2] = gen_rtx_MEM (Pmode,
7559 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7560 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7562 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7563 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7564 an unadorned address. */
7565 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7566 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7567 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7571 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7573 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7575 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7578 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7580 output_asm_insn ("call\t%a2", xops);
7583 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7584 is what will be referenced by the Mach-O PIC subsystem. */
7586 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7589 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7590 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7593 output_asm_insn ("pop%z0\t%0", xops);
7598 get_pc_thunk_name (name, REGNO (dest));
7599 pic_labels_used |= 1 << REGNO (dest);
7601 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7602 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7603 output_asm_insn ("call\t%X2", xops);
7604 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7605 is what will be referenced by the Mach-O PIC subsystem. */
7608 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7610 targetm.asm_out.internal_label (asm_out_file, "L",
7611 CODE_LABEL_NUMBER (label));
7618 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7619 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7621 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7626 /* Generate an "push" pattern for input ARG. */
7631 if (ix86_cfa_state->reg == stack_pointer_rtx)
7632 ix86_cfa_state->offset += UNITS_PER_WORD;
7634 return gen_rtx_SET (VOIDmode,
7636 gen_rtx_PRE_DEC (Pmode,
7637 stack_pointer_rtx)),
7641 /* Return >= 0 if there is an unused call-clobbered register available
7642 for the entire function. */
7645 ix86_select_alt_pic_regnum (void)
7647 if (current_function_is_leaf && !crtl->profile
7648 && !ix86_current_function_calls_tls_descriptor)
7651 /* Can't use the same register for both PIC and DRAP. */
7653 drap = REGNO (crtl->drap_reg);
7656 for (i = 2; i >= 0; --i)
7657 if (i != drap && !df_regs_ever_live_p (i))
7661 return INVALID_REGNUM;
7664 /* Return 1 if we need to save REGNO. */
7666 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7668 if (pic_offset_table_rtx
7669 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7670 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7672 || crtl->calls_eh_return
7673 || crtl->uses_const_pool))
7675 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7680 if (crtl->calls_eh_return && maybe_eh_return)
7685 unsigned test = EH_RETURN_DATA_REGNO (i);
7686 if (test == INVALID_REGNUM)
7693 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
7696 return (df_regs_ever_live_p (regno)
7697 && !call_used_regs[regno]
7698 && !fixed_regs[regno]
7699 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7702 /* Return number of saved general prupose registers. */
7705 ix86_nsaved_regs (void)
7710 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7711 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7716 /* Return number of saved SSE registrers. */
7719 ix86_nsaved_sseregs (void)
7724 if (ix86_cfun_abi () != MS_ABI)
7726 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7727 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7732 /* Given FROM and TO register numbers, say whether this elimination is
7733 allowed. If stack alignment is needed, we can only replace argument
7734 pointer with hard frame pointer, or replace frame pointer with stack
7735 pointer. Otherwise, frame pointer elimination is automatically
7736 handled and all other eliminations are valid. */
7739 ix86_can_eliminate (int from, int to)
7741 if (stack_realign_fp)
7742 return ((from == ARG_POINTER_REGNUM
7743 && to == HARD_FRAME_POINTER_REGNUM)
7744 || (from == FRAME_POINTER_REGNUM
7745 && to == STACK_POINTER_REGNUM));
7747 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7750 /* Return the offset between two registers, one to be eliminated, and the other
7751 its replacement, at the start of a routine. */
7754 ix86_initial_elimination_offset (int from, int to)
7756 struct ix86_frame frame;
7757 ix86_compute_frame_layout (&frame);
7759 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7760 return frame.hard_frame_pointer_offset;
7761 else if (from == FRAME_POINTER_REGNUM
7762 && to == HARD_FRAME_POINTER_REGNUM)
7763 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7766 gcc_assert (to == STACK_POINTER_REGNUM);
7768 if (from == ARG_POINTER_REGNUM)
7769 return frame.stack_pointer_offset;
7771 gcc_assert (from == FRAME_POINTER_REGNUM);
7772 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7776 /* In a dynamically-aligned function, we can't know the offset from
7777 stack pointer to frame pointer, so we must ensure that setjmp
7778 eliminates fp against the hard fp (%ebp) rather than trying to
7779 index from %esp up to the top of the frame across a gap that is
7780 of unknown (at compile-time) size. */
7782 ix86_builtin_setjmp_frame_value (void)
7784 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7787 /* Fill structure ix86_frame about frame of currently computed function. */
7790 ix86_compute_frame_layout (struct ix86_frame *frame)
7792 HOST_WIDE_INT total_size;
7793 unsigned int stack_alignment_needed;
7794 HOST_WIDE_INT offset;
7795 unsigned int preferred_alignment;
7796 HOST_WIDE_INT size = get_frame_size ();
7798 frame->nregs = ix86_nsaved_regs ();
7799 frame->nsseregs = ix86_nsaved_sseregs ();
7802 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7803 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7805 /* MS ABI seem to require stack alignment to be always 16 except for function
7807 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7809 preferred_alignment = 16;
7810 stack_alignment_needed = 16;
7811 crtl->preferred_stack_boundary = 128;
7812 crtl->stack_alignment_needed = 128;
7815 gcc_assert (!size || stack_alignment_needed);
7816 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7817 gcc_assert (preferred_alignment <= stack_alignment_needed);
7819 /* During reload iteration the amount of registers saved can change.
7820 Recompute the value as needed. Do not recompute when amount of registers
7821 didn't change as reload does multiple calls to the function and does not
7822 expect the decision to change within single iteration. */
7823 if (!optimize_function_for_size_p (cfun)
7824 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7826 int count = frame->nregs;
7828 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7829 /* The fast prologue uses move instead of push to save registers. This
7830 is significantly longer, but also executes faster as modern hardware
7831 can execute the moves in parallel, but can't do that for push/pop.
7833 Be careful about choosing what prologue to emit: When function takes
7834 many instructions to execute we may use slow version as well as in
7835 case function is known to be outside hot spot (this is known with
7836 feedback only). Weight the size of function by number of registers
7837 to save as it is cheap to use one or two push instructions but very
7838 slow to use many of them. */
7840 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7841 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7842 || (flag_branch_probabilities
7843 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7844 cfun->machine->use_fast_prologue_epilogue = false;
7846 cfun->machine->use_fast_prologue_epilogue
7847 = !expensive_function_p (count);
7849 if (TARGET_PROLOGUE_USING_MOVE
7850 && cfun->machine->use_fast_prologue_epilogue)
7851 frame->save_regs_using_mov = true;
7853 frame->save_regs_using_mov = false;
7856 /* Skip return address and saved base pointer. */
7857 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7859 frame->hard_frame_pointer_offset = offset;
7861 /* Set offset to aligned because the realigned frame starts from
7863 if (stack_realign_fp)
7864 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7866 /* Register save area */
7867 offset += frame->nregs * UNITS_PER_WORD;
7869 /* Align SSE reg save area. */
7870 if (frame->nsseregs)
7871 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7873 frame->padding0 = 0;
7875 /* SSE register save area. */
7876 offset += frame->padding0 + frame->nsseregs * 16;
7879 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7880 offset += frame->va_arg_size;
7882 /* Align start of frame for local function. */
7883 frame->padding1 = ((offset + stack_alignment_needed - 1)
7884 & -stack_alignment_needed) - offset;
7886 offset += frame->padding1;
7888 /* Frame pointer points here. */
7889 frame->frame_pointer_offset = offset;
7893 /* Add outgoing arguments area. Can be skipped if we eliminated
7894 all the function calls as dead code.
7895 Skipping is however impossible when function calls alloca. Alloca
7896 expander assumes that last crtl->outgoing_args_size
7897 of stack frame are unused. */
7898 if (ACCUMULATE_OUTGOING_ARGS
7899 && (!current_function_is_leaf || cfun->calls_alloca
7900 || ix86_current_function_calls_tls_descriptor))
7902 offset += crtl->outgoing_args_size;
7903 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7906 frame->outgoing_arguments_size = 0;
7908 /* Align stack boundary. Only needed if we're calling another function
7910 if (!current_function_is_leaf || cfun->calls_alloca
7911 || ix86_current_function_calls_tls_descriptor)
7912 frame->padding2 = ((offset + preferred_alignment - 1)
7913 & -preferred_alignment) - offset;
7915 frame->padding2 = 0;
7917 offset += frame->padding2;
7919 /* We've reached end of stack frame. */
7920 frame->stack_pointer_offset = offset;
7922 /* Size prologue needs to allocate. */
7923 frame->to_allocate =
7924 (size + frame->padding1 + frame->padding2
7925 + frame->outgoing_arguments_size + frame->va_arg_size);
7927 if ((!frame->to_allocate && frame->nregs <= 1)
7928 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7929 frame->save_regs_using_mov = false;
7931 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7932 && current_function_is_leaf
7933 && !ix86_current_function_calls_tls_descriptor)
7935 frame->red_zone_size = frame->to_allocate;
7936 if (frame->save_regs_using_mov)
7937 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7938 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7939 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7942 frame->red_zone_size = 0;
7943 frame->to_allocate -= frame->red_zone_size;
7944 frame->stack_pointer_offset -= frame->red_zone_size;
7946 fprintf (stderr, "\n");
7947 fprintf (stderr, "size: %ld\n", (long)size);
7948 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7949 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7950 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7951 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7952 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7953 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7954 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7955 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7956 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7957 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7958 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7959 (long)frame->hard_frame_pointer_offset);
7960 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7961 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7962 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7963 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7967 /* Emit code to save registers in the prologue. */
7970 ix86_emit_save_regs (void)
7975 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7976 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7978 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7979 RTX_FRAME_RELATED_P (insn) = 1;
7983 /* Emit code to save registers using MOV insns. First register
7984 is restored from POINTER + OFFSET. */
7986 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7991 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7992 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7994 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7996 gen_rtx_REG (Pmode, regno));
7997 RTX_FRAME_RELATED_P (insn) = 1;
7998 offset += UNITS_PER_WORD;
8002 /* Emit code to save registers using MOV insns. First register
8003 is restored from POINTER + OFFSET. */
8005 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8011 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8012 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8014 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8015 set_mem_align (mem, 128);
8016 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8017 RTX_FRAME_RELATED_P (insn) = 1;
8022 static GTY(()) rtx queued_cfa_restores;
8024 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8025 manipulation insn. Don't add it if the previously
8026 saved value will be left untouched within stack red-zone till return,
8027 as unwinders can find the same value in the register and
8031 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8034 && !TARGET_64BIT_MS_ABI
8035 && red_offset + RED_ZONE_SIZE >= 0
8036 && crtl->args.pops_args < 65536)
8041 add_reg_note (insn, REG_CFA_RESTORE, reg);
8042 RTX_FRAME_RELATED_P (insn) = 1;
8046 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8049 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8052 ix86_add_queued_cfa_restore_notes (rtx insn)
8055 if (!queued_cfa_restores)
8057 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8059 XEXP (last, 1) = REG_NOTES (insn);
8060 REG_NOTES (insn) = queued_cfa_restores;
8061 queued_cfa_restores = NULL_RTX;
8062 RTX_FRAME_RELATED_P (insn) = 1;
8065 /* Expand prologue or epilogue stack adjustment.
8066 The pattern exist to put a dependency on all ebp-based memory accesses.
8067 STYLE should be negative if instructions should be marked as frame related,
8068 zero if %r11 register is live and cannot be freely used and positive
8072 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8073 int style, bool set_cfa)
8078 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8079 else if (x86_64_immediate_operand (offset, DImode))
8080 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8084 /* r11 is used by indirect sibcall return as well, set before the
8085 epilogue and used after the epilogue. ATM indirect sibcall
8086 shouldn't be used together with huge frame sizes in one
8087 function because of the frame_size check in sibcall.c. */
8089 r11 = gen_rtx_REG (DImode, R11_REG);
8090 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8092 RTX_FRAME_RELATED_P (insn) = 1;
8093 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8098 ix86_add_queued_cfa_restore_notes (insn);
8104 gcc_assert (ix86_cfa_state->reg == src);
8105 ix86_cfa_state->offset += INTVAL (offset);
8106 ix86_cfa_state->reg = dest;
8108 r = gen_rtx_PLUS (Pmode, src, offset);
8109 r = gen_rtx_SET (VOIDmode, dest, r);
8110 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8111 RTX_FRAME_RELATED_P (insn) = 1;
8114 RTX_FRAME_RELATED_P (insn) = 1;
8117 /* Find an available register to be used as dynamic realign argument
8118 pointer regsiter. Such a register will be written in prologue and
8119 used in begin of body, so it must not be
8120 1. parameter passing register.
8122 We reuse static-chain register if it is available. Otherwise, we
8123 use DI for i386 and R13 for x86-64. We chose R13 since it has
8126 Return: the regno of chosen register. */
8129 find_drap_reg (void)
8131 tree decl = cfun->decl;
8135 /* Use R13 for nested function or function need static chain.
8136 Since function with tail call may use any caller-saved
8137 registers in epilogue, DRAP must not use caller-saved
8138 register in such case. */
8139 if ((decl_function_context (decl)
8140 && !DECL_NO_STATIC_CHAIN (decl))
8141 || crtl->tail_call_emit)
8148 /* Use DI for nested function or function need static chain.
8149 Since function with tail call may use any caller-saved
8150 registers in epilogue, DRAP must not use caller-saved
8151 register in such case. */
8152 if ((decl_function_context (decl)
8153 && !DECL_NO_STATIC_CHAIN (decl))
8154 || crtl->tail_call_emit)
8157 /* Reuse static chain register if it isn't used for parameter
8159 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8160 && !lookup_attribute ("fastcall",
8161 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8168 /* Update incoming stack boundary and estimated stack alignment. */
8171 ix86_update_stack_boundary (void)
8173 /* Prefer the one specified at command line. */
8174 ix86_incoming_stack_boundary
8175 = (ix86_user_incoming_stack_boundary
8176 ? ix86_user_incoming_stack_boundary
8177 : ix86_default_incoming_stack_boundary);
8179 /* Incoming stack alignment can be changed on individual functions
8180 via force_align_arg_pointer attribute. We use the smallest
8181 incoming stack boundary. */
8182 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8183 && lookup_attribute (ix86_force_align_arg_pointer_string,
8184 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8185 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8187 /* The incoming stack frame has to be aligned at least at
8188 parm_stack_boundary. */
8189 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8190 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8192 /* Stack at entrance of main is aligned by runtime. We use the
8193 smallest incoming stack boundary. */
8194 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8195 && DECL_NAME (current_function_decl)
8196 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8197 && DECL_FILE_SCOPE_P (current_function_decl))
8198 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8200 /* x86_64 vararg needs 16byte stack alignment for register save
8204 && crtl->stack_alignment_estimated < 128)
8205 crtl->stack_alignment_estimated = 128;
8208 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8209 needed or an rtx for DRAP otherwise. */
8212 ix86_get_drap_rtx (void)
8214 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8215 crtl->need_drap = true;
8217 if (stack_realign_drap)
8219 /* Assign DRAP to vDRAP and returns vDRAP */
8220 unsigned int regno = find_drap_reg ();
8225 arg_ptr = gen_rtx_REG (Pmode, regno);
8226 crtl->drap_reg = arg_ptr;
8229 drap_vreg = copy_to_reg (arg_ptr);
8233 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8234 RTX_FRAME_RELATED_P (insn) = 1;
8241 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8244 ix86_internal_arg_pointer (void)
8246 return virtual_incoming_args_rtx;
8249 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8250 to be generated in correct form. */
8252 ix86_finalize_stack_realign_flags (void)
8254 /* Check if stack realign is really needed after reload, and
8255 stores result in cfun */
8256 unsigned int incoming_stack_boundary
8257 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8258 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8259 unsigned int stack_realign = (incoming_stack_boundary
8260 < (current_function_is_leaf
8261 ? crtl->max_used_stack_slot_alignment
8262 : crtl->stack_alignment_needed));
8264 if (crtl->stack_realign_finalized)
8266 /* After stack_realign_needed is finalized, we can't no longer
8268 gcc_assert (crtl->stack_realign_needed == stack_realign);
8272 crtl->stack_realign_needed = stack_realign;
8273 crtl->stack_realign_finalized = true;
8277 /* Expand the prologue into a bunch of separate insns. */
8280 ix86_expand_prologue (void)
8284 struct ix86_frame frame;
8285 HOST_WIDE_INT allocate;
8287 ix86_finalize_stack_realign_flags ();
8289 /* DRAP should not coexist with stack_realign_fp */
8290 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8292 /* Initialize CFA state for before the prologue. */
8293 ix86_cfa_state->reg = stack_pointer_rtx;
8294 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
8296 ix86_compute_frame_layout (&frame);
8298 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8299 of DRAP is needed and stack realignment is really needed after reload */
8300 if (crtl->drap_reg && crtl->stack_realign_needed)
8303 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8304 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8305 ? 0 : UNITS_PER_WORD);
8307 gcc_assert (stack_realign_drap);
8309 /* Grab the argument pointer. */
8310 x = plus_constant (stack_pointer_rtx,
8311 (UNITS_PER_WORD + param_ptr_offset));
8314 /* Only need to push parameter pointer reg if it is caller
8316 if (!call_used_regs[REGNO (crtl->drap_reg)])
8318 /* Push arg pointer reg */
8319 insn = emit_insn (gen_push (y));
8320 RTX_FRAME_RELATED_P (insn) = 1;
8323 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8324 RTX_FRAME_RELATED_P (insn) = 1;
8325 ix86_cfa_state->reg = crtl->drap_reg;
8327 /* Align the stack. */
8328 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8330 GEN_INT (-align_bytes)));
8331 RTX_FRAME_RELATED_P (insn) = 1;
8333 /* Replicate the return address on the stack so that return
8334 address can be reached via (argp - 1) slot. This is needed
8335 to implement macro RETURN_ADDR_RTX and intrinsic function
8336 expand_builtin_return_addr etc. */
8338 x = gen_frame_mem (Pmode,
8339 plus_constant (x, -UNITS_PER_WORD));
8340 insn = emit_insn (gen_push (x));
8341 RTX_FRAME_RELATED_P (insn) = 1;
8344 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8345 slower on all targets. Also sdb doesn't like it. */
8347 if (frame_pointer_needed)
8349 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8350 RTX_FRAME_RELATED_P (insn) = 1;
8352 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8353 RTX_FRAME_RELATED_P (insn) = 1;
8355 if (ix86_cfa_state->reg == stack_pointer_rtx)
8356 ix86_cfa_state->reg = hard_frame_pointer_rtx;
8359 if (stack_realign_fp)
8361 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8362 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8364 /* Align the stack. */
8365 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8367 GEN_INT (-align_bytes)));
8368 RTX_FRAME_RELATED_P (insn) = 1;
8371 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8373 if (!frame.save_regs_using_mov)
8374 ix86_emit_save_regs ();
8376 allocate += frame.nregs * UNITS_PER_WORD;
8378 /* When using red zone we may start register saving before allocating
8379 the stack frame saving one cycle of the prologue. However I will
8380 avoid doing this if I am going to have to probe the stack since
8381 at least on x86_64 the stack probe can turn into a call that clobbers
8382 a red zone location */
8383 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8384 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8385 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8386 && !crtl->stack_realign_needed)
8387 ? hard_frame_pointer_rtx
8388 : stack_pointer_rtx,
8389 -frame.nregs * UNITS_PER_WORD);
8393 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8394 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8395 GEN_INT (-allocate), -1,
8396 ix86_cfa_state->reg == stack_pointer_rtx);
8399 /* Only valid for Win32. */
8400 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8404 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8406 if (cfun->machine->call_abi == MS_ABI)
8409 eax_live = ix86_eax_live_at_start_p ();
8413 emit_insn (gen_push (eax));
8414 allocate -= UNITS_PER_WORD;
8417 emit_move_insn (eax, GEN_INT (allocate));
8420 insn = gen_allocate_stack_worker_64 (eax, eax);
8422 insn = gen_allocate_stack_worker_32 (eax, eax);
8423 insn = emit_insn (insn);
8425 if (ix86_cfa_state->reg == stack_pointer_rtx)
8427 ix86_cfa_state->offset += allocate;
8428 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8429 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8430 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8431 RTX_FRAME_RELATED_P (insn) = 1;
8436 if (frame_pointer_needed)
8437 t = plus_constant (hard_frame_pointer_rtx,
8440 - frame.nregs * UNITS_PER_WORD);
8442 t = plus_constant (stack_pointer_rtx, allocate);
8443 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8447 if (frame.save_regs_using_mov
8448 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8449 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8451 if (!frame_pointer_needed
8452 || !frame.to_allocate
8453 || crtl->stack_realign_needed)
8454 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8456 + frame.nsseregs * 16 + frame.padding0);
8458 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8459 -frame.nregs * UNITS_PER_WORD);
8461 if (!frame_pointer_needed
8462 || !frame.to_allocate
8463 || crtl->stack_realign_needed)
8464 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8467 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8468 - frame.nregs * UNITS_PER_WORD
8469 - frame.nsseregs * 16
8472 pic_reg_used = false;
8473 if (pic_offset_table_rtx
8474 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8477 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8479 if (alt_pic_reg_used != INVALID_REGNUM)
8480 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8482 pic_reg_used = true;
8489 if (ix86_cmodel == CM_LARGE_PIC)
8491 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8492 rtx label = gen_label_rtx ();
8494 LABEL_PRESERVE_P (label) = 1;
8495 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8496 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8497 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8498 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8499 pic_offset_table_rtx, tmp_reg));
8502 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8505 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8508 /* In the pic_reg_used case, make sure that the got load isn't deleted
8509 when mcount needs it. Blockage to avoid call movement across mcount
8510 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8512 if (crtl->profile && pic_reg_used)
8513 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8515 if (crtl->drap_reg && !crtl->stack_realign_needed)
8517 /* vDRAP is setup but after reload it turns out stack realign
8518 isn't necessary, here we will emit prologue to setup DRAP
8519 without stack realign adjustment */
8520 int drap_bp_offset = UNITS_PER_WORD * 2;
8521 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8522 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8525 /* Prevent instructions from being scheduled into register save push
8526 sequence when access to the redzone area is done through frame pointer.
8527 The offset betweeh the frame pointer and the stack pointer is calculated
8528 relative to the value of the stack pointer at the end of the function
8529 prologue, and moving instructions that access redzone area via frame
8530 pointer inside push sequence violates this assumption. */
8531 if (frame_pointer_needed && frame.red_zone_size)
8532 emit_insn (gen_memory_blockage ());
8534 /* Emit cld instruction if stringops are used in the function. */
8535 if (TARGET_CLD && ix86_current_function_needs_cld)
8536 emit_insn (gen_cld ());
8539 /* Emit code to restore REG using a POP insn. */
8542 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
8544 rtx insn = emit_insn (ix86_gen_pop1 (reg));
8546 if (ix86_cfa_state->reg == crtl->drap_reg
8547 && REGNO (reg) == REGNO (crtl->drap_reg))
8549 /* Previously we'd represented the CFA as an expression
8550 like *(%ebp - 8). We've just popped that value from
8551 the stack, which means we need to reset the CFA to
8552 the drap register. This will remain until we restore
8553 the stack pointer. */
8554 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8555 RTX_FRAME_RELATED_P (insn) = 1;
8559 if (ix86_cfa_state->reg == stack_pointer_rtx)
8561 ix86_cfa_state->offset -= UNITS_PER_WORD;
8562 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8563 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8564 RTX_FRAME_RELATED_P (insn) = 1;
8567 /* When the frame pointer is the CFA, and we pop it, we are
8568 swapping back to the stack pointer as the CFA. This happens
8569 for stack frames that don't allocate other data, so we assume
8570 the stack pointer is now pointing at the return address, i.e.
8571 the function entry state, which makes the offset be 1 word. */
8572 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
8573 && reg == hard_frame_pointer_rtx)
8575 ix86_cfa_state->reg = stack_pointer_rtx;
8576 ix86_cfa_state->offset = UNITS_PER_WORD;
8578 add_reg_note (insn, REG_CFA_DEF_CFA,
8579 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8580 GEN_INT (UNITS_PER_WORD)));
8581 RTX_FRAME_RELATED_P (insn) = 1;
8584 ix86_add_cfa_restore_note (insn, reg, red_offset);
8587 /* Emit code to restore saved registers using POP insns. */
8590 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
8594 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8595 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8597 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
8599 red_offset += UNITS_PER_WORD;
8603 /* Emit code and notes for the LEAVE instruction. */
8606 ix86_emit_leave (HOST_WIDE_INT red_offset)
8608 rtx insn = emit_insn (ix86_gen_leave ());
8610 ix86_add_queued_cfa_restore_notes (insn);
8612 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
8614 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8615 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
8616 RTX_FRAME_RELATED_P (insn) = 1;
8617 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
8621 /* Emit code to restore saved registers using MOV insns. First register
8622 is restored from POINTER + OFFSET. */
8624 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8625 HOST_WIDE_INT red_offset,
8626 int maybe_eh_return)
8629 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8632 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8633 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8635 rtx reg = gen_rtx_REG (Pmode, regno);
8637 /* Ensure that adjust_address won't be forced to produce pointer
8638 out of range allowed by x86-64 instruction set. */
8639 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8643 r11 = gen_rtx_REG (DImode, R11_REG);
8644 emit_move_insn (r11, GEN_INT (offset));
8645 emit_insn (gen_adddi3 (r11, r11, pointer));
8646 base_address = gen_rtx_MEM (Pmode, r11);
8649 insn = emit_move_insn (reg,
8650 adjust_address (base_address, Pmode, offset));
8651 offset += UNITS_PER_WORD;
8653 if (ix86_cfa_state->reg == crtl->drap_reg
8654 && regno == REGNO (crtl->drap_reg))
8656 /* Previously we'd represented the CFA as an expression
8657 like *(%ebp - 8). We've just popped that value from
8658 the stack, which means we need to reset the CFA to
8659 the drap register. This will remain until we restore
8660 the stack pointer. */
8661 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8662 RTX_FRAME_RELATED_P (insn) = 1;
8665 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8667 red_offset += UNITS_PER_WORD;
8671 /* Emit code to restore saved registers using MOV insns. First register
8672 is restored from POINTER + OFFSET. */
8674 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8675 HOST_WIDE_INT red_offset,
8676 int maybe_eh_return)
8679 rtx base_address = gen_rtx_MEM (TImode, pointer);
8682 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8683 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8685 rtx reg = gen_rtx_REG (TImode, regno);
8687 /* Ensure that adjust_address won't be forced to produce pointer
8688 out of range allowed by x86-64 instruction set. */
8689 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8693 r11 = gen_rtx_REG (DImode, R11_REG);
8694 emit_move_insn (r11, GEN_INT (offset));
8695 emit_insn (gen_adddi3 (r11, r11, pointer));
8696 base_address = gen_rtx_MEM (TImode, r11);
8699 mem = adjust_address (base_address, TImode, offset);
8700 set_mem_align (mem, 128);
8701 insn = emit_move_insn (reg, mem);
8704 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8710 /* Restore function stack, frame, and registers. */
8713 ix86_expand_epilogue (int style)
8716 struct ix86_frame frame;
8717 HOST_WIDE_INT offset, red_offset;
8718 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
8721 ix86_finalize_stack_realign_flags ();
8723 /* When stack is realigned, SP must be valid. */
8724 sp_valid = (!frame_pointer_needed
8725 || current_function_sp_is_unchanging
8726 || stack_realign_fp);
8728 ix86_compute_frame_layout (&frame);
8730 /* See the comment about red zone and frame
8731 pointer usage in ix86_expand_prologue. */
8732 if (frame_pointer_needed && frame.red_zone_size)
8733 emit_insn (gen_memory_blockage ());
8735 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8736 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
8738 /* Calculate start of saved registers relative to ebp. Special care
8739 must be taken for the normal return case of a function using
8740 eh_return: the eax and edx registers are marked as saved, but not
8741 restored along this path. */
8742 offset = frame.nregs;
8743 if (crtl->calls_eh_return && style != 2)
8745 offset *= -UNITS_PER_WORD;
8746 offset -= frame.nsseregs * 16 + frame.padding0;
8748 /* Calculate start of saved registers relative to esp on entry of the
8749 function. When realigning stack, this needs to be the most negative
8750 value possible at runtime. */
8751 red_offset = offset;
8753 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8755 else if (stack_realign_fp)
8756 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8758 if (frame_pointer_needed)
8759 red_offset -= UNITS_PER_WORD;
8761 /* If we're only restoring one register and sp is not valid then
8762 using a move instruction to restore the register since it's
8763 less work than reloading sp and popping the register.
8765 The default code result in stack adjustment using add/lea instruction,
8766 while this code results in LEAVE instruction (or discrete equivalent),
8767 so it is profitable in some other cases as well. Especially when there
8768 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8769 and there is exactly one register to pop. This heuristic may need some
8770 tuning in future. */
8771 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8772 || (TARGET_EPILOGUE_USING_MOVE
8773 && cfun->machine->use_fast_prologue_epilogue
8774 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8775 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8776 && frame.to_allocate)
8777 || (frame_pointer_needed && TARGET_USE_LEAVE
8778 && cfun->machine->use_fast_prologue_epilogue
8779 && (frame.nregs + frame.nsseregs) == 1)
8780 || crtl->calls_eh_return)
8782 /* Restore registers. We can use ebp or esp to address the memory
8783 locations. If both are available, default to ebp, since offsets
8784 are known to be small. Only exception is esp pointing directly
8785 to the end of block of saved registers, where we may simplify
8788 If we are realigning stack with bp and sp, regs restore can't
8789 be addressed by bp. sp must be used instead. */
8791 if (!frame_pointer_needed
8792 || (sp_valid && !frame.to_allocate)
8793 || stack_realign_fp)
8795 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8796 frame.to_allocate, red_offset,
8798 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8800 + frame.nsseregs * 16
8803 + frame.nsseregs * 16
8804 + frame.padding0, style == 2);
8808 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8811 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8813 + frame.nsseregs * 16
8816 + frame.nsseregs * 16
8817 + frame.padding0, style == 2);
8820 red_offset -= offset;
8822 /* eh_return epilogues need %ecx added to the stack pointer. */
8825 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8827 /* Stack align doesn't work with eh_return. */
8828 gcc_assert (!crtl->stack_realign_needed);
8830 if (frame_pointer_needed)
8832 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8833 tmp = plus_constant (tmp, UNITS_PER_WORD);
8834 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8836 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8837 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
8839 /* Note that we use SA as a temporary CFA, as the return
8840 address is at the proper place relative to it. We
8841 pretend this happens at the FP restore insn because
8842 prior to this insn the FP would be stored at the wrong
8843 offset relative to SA, and after this insn we have no
8844 other reasonable register to use for the CFA. We don't
8845 bother resetting the CFA to the SP for the duration of
8847 add_reg_note (tmp, REG_CFA_DEF_CFA,
8848 plus_constant (sa, UNITS_PER_WORD));
8849 ix86_add_queued_cfa_restore_notes (tmp);
8850 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8851 RTX_FRAME_RELATED_P (tmp) = 1;
8852 ix86_cfa_state->reg = sa;
8853 ix86_cfa_state->offset = UNITS_PER_WORD;
8855 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8856 const0_rtx, style, false);
8860 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8861 tmp = plus_constant (tmp, (frame.to_allocate
8862 + frame.nregs * UNITS_PER_WORD
8863 + frame.nsseregs * 16
8865 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8866 ix86_add_queued_cfa_restore_notes (tmp);
8868 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
8869 if (ix86_cfa_state->offset != UNITS_PER_WORD)
8871 ix86_cfa_state->offset = UNITS_PER_WORD;
8872 add_reg_note (tmp, REG_CFA_DEF_CFA,
8873 plus_constant (stack_pointer_rtx,
8875 RTX_FRAME_RELATED_P (tmp) = 1;
8879 else if (!frame_pointer_needed)
8880 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8881 GEN_INT (frame.to_allocate
8882 + frame.nregs * UNITS_PER_WORD
8883 + frame.nsseregs * 16
8885 style, !using_drap);
8886 /* If not an i386, mov & pop is faster than "leave". */
8887 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8888 || !cfun->machine->use_fast_prologue_epilogue)
8889 ix86_emit_leave (red_offset);
8892 pro_epilogue_adjust_stack (stack_pointer_rtx,
8893 hard_frame_pointer_rtx,
8894 const0_rtx, style, !using_drap);
8896 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
8901 /* First step is to deallocate the stack frame so that we can
8904 If we realign stack with frame pointer, then stack pointer
8905 won't be able to recover via lea $offset(%bp), %sp, because
8906 there is a padding area between bp and sp for realign.
8907 "add $to_allocate, %sp" must be used instead. */
8910 gcc_assert (frame_pointer_needed);
8911 gcc_assert (!stack_realign_fp);
8912 pro_epilogue_adjust_stack (stack_pointer_rtx,
8913 hard_frame_pointer_rtx,
8914 GEN_INT (offset), style, false);
8915 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8916 frame.to_allocate, red_offset,
8918 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8919 GEN_INT (frame.nsseregs * 16),
8922 else if (frame.to_allocate || frame.nsseregs)
8924 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8925 frame.to_allocate, red_offset,
8927 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8928 GEN_INT (frame.to_allocate
8929 + frame.nsseregs * 16
8930 + frame.padding0), style,
8931 !using_drap && !frame_pointer_needed);
8934 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
8936 red_offset -= offset;
8938 if (frame_pointer_needed)
8940 /* Leave results in shorter dependency chains on CPUs that are
8941 able to grok it fast. */
8942 if (TARGET_USE_LEAVE)
8943 ix86_emit_leave (red_offset);
8946 /* For stack realigned really happens, recover stack
8947 pointer to hard frame pointer is a must, if not using
8949 if (stack_realign_fp)
8950 pro_epilogue_adjust_stack (stack_pointer_rtx,
8951 hard_frame_pointer_rtx,
8952 const0_rtx, style, !using_drap);
8953 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
8961 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8962 ? 0 : UNITS_PER_WORD);
8965 gcc_assert (stack_realign_drap);
8967 insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8969 GEN_INT (-(UNITS_PER_WORD
8970 + param_ptr_offset))));
8972 ix86_cfa_state->reg = stack_pointer_rtx;
8973 ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
8975 add_reg_note (insn, REG_CFA_DEF_CFA,
8976 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
8977 GEN_INT (ix86_cfa_state->offset)));
8978 RTX_FRAME_RELATED_P (insn) = 1;
8980 if (param_ptr_offset)
8981 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
8984 /* Sibcall epilogues don't want a return instruction. */
8987 *ix86_cfa_state = cfa_state_save;
8991 if (crtl->args.pops_args && crtl->args.size)
8993 rtx popc = GEN_INT (crtl->args.pops_args);
8995 /* i386 can only pop 64K bytes. If asked to pop more, pop return
8996 address, do explicit add, and jump indirectly to the caller. */
8998 if (crtl->args.pops_args >= 65536)
9000 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9003 /* There is no "pascal" calling convention in any 64bit ABI. */
9004 gcc_assert (!TARGET_64BIT);
9006 insn = emit_insn (gen_popsi1 (ecx));
9007 ix86_cfa_state->offset -= UNITS_PER_WORD;
9009 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9010 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9011 add_reg_note (insn, REG_CFA_REGISTER,
9012 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
9013 RTX_FRAME_RELATED_P (insn) = 1;
9015 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9017 emit_jump_insn (gen_return_indirect_internal (ecx));
9020 emit_jump_insn (gen_return_pop_internal (popc));
9023 emit_jump_insn (gen_return_internal ());
9025 /* Restore the state back to the state from the prologue,
9026 so that it's correct for the next epilogue. */
9027 *ix86_cfa_state = cfa_state_save;
9030 /* Reset from the function's potential modifications. */
9033 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9034 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
9036 if (pic_offset_table_rtx)
9037 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9039 /* Mach-O doesn't support labels at the end of objects, so if
9040 it looks like we might want one, insert a NOP. */
9042 rtx insn = get_last_insn ();
9045 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9046 insn = PREV_INSN (insn);
9050 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
9051 fputs ("\tnop\n", file);
9057 /* Extract the parts of an RTL expression that is a valid memory address
9058 for an instruction. Return 0 if the structure of the address is
9059 grossly off. Return -1 if the address contains ASHIFT, so it is not
9060 strictly valid, but still used for computing length of lea instruction. */
9063 ix86_decompose_address (rtx addr, struct ix86_address *out)
9065 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9066 rtx base_reg, index_reg;
9067 HOST_WIDE_INT scale = 1;
9068 rtx scale_rtx = NULL_RTX;
9070 enum ix86_address_seg seg = SEG_DEFAULT;
9072 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
9074 else if (GET_CODE (addr) == PLUS)
9084 addends[n++] = XEXP (op, 1);
9087 while (GET_CODE (op) == PLUS);
9092 for (i = n; i >= 0; --i)
9095 switch (GET_CODE (op))
9100 index = XEXP (op, 0);
9101 scale_rtx = XEXP (op, 1);
9105 if (XINT (op, 1) == UNSPEC_TP
9106 && TARGET_TLS_DIRECT_SEG_REFS
9107 && seg == SEG_DEFAULT)
9108 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
9137 else if (GET_CODE (addr) == MULT)
9139 index = XEXP (addr, 0); /* index*scale */
9140 scale_rtx = XEXP (addr, 1);
9142 else if (GET_CODE (addr) == ASHIFT)
9146 /* We're called for lea too, which implements ashift on occasion. */
9147 index = XEXP (addr, 0);
9148 tmp = XEXP (addr, 1);
9149 if (!CONST_INT_P (tmp))
9151 scale = INTVAL (tmp);
9152 if ((unsigned HOST_WIDE_INT) scale > 3)
9158 disp = addr; /* displacement */
9160 /* Extract the integral value of scale. */
9163 if (!CONST_INT_P (scale_rtx))
9165 scale = INTVAL (scale_rtx);
9168 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
9169 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
9171 /* Avoid useless 0 displacement. */
9172 if (disp == const0_rtx && (base || index))
9175 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9176 if (base_reg && index_reg && scale == 1
9177 && (index_reg == arg_pointer_rtx
9178 || index_reg == frame_pointer_rtx
9179 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
9182 tmp = base, base = index, index = tmp;
9183 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
9186 /* Special case: %ebp cannot be encoded as a base without a displacement.
9190 && (base_reg == hard_frame_pointer_rtx
9191 || base_reg == frame_pointer_rtx
9192 || base_reg == arg_pointer_rtx
9193 || (REG_P (base_reg)
9194 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
9195 || REGNO (base_reg) == R13_REG))))
9198 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9199 Avoid this by transforming to [%esi+0].
9200 Reload calls address legitimization without cfun defined, so we need
9201 to test cfun for being non-NULL. */
9202 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9203 && base_reg && !index_reg && !disp
9205 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
9208 /* Special case: encode reg+reg instead of reg*2. */
9209 if (!base && index && scale == 2)
9210 base = index, base_reg = index_reg, scale = 1;
9212 /* Special case: scaling cannot be encoded without base or displacement. */
9213 if (!base && !disp && index && scale != 1)
9225 /* Return cost of the memory address x.
9226 For i386, it is better to use a complex address than let gcc copy
9227 the address into a reg and make a new pseudo. But not if the address
9228 requires to two regs - that would mean more pseudos with longer
9231 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9233 struct ix86_address parts;
9235 int ok = ix86_decompose_address (x, &parts);
9239 if (parts.base && GET_CODE (parts.base) == SUBREG)
9240 parts.base = SUBREG_REG (parts.base);
9241 if (parts.index && GET_CODE (parts.index) == SUBREG)
9242 parts.index = SUBREG_REG (parts.index);
9244 /* Attempt to minimize number of registers in the address. */
9246 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9248 && (!REG_P (parts.index)
9249 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9253 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9255 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9256 && parts.base != parts.index)
9259 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9260 since it's predecode logic can't detect the length of instructions
9261 and it degenerates to vector decoded. Increase cost of such
9262 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9263 to split such addresses or even refuse such addresses at all.
9265 Following addressing modes are affected:
9270 The first and last case may be avoidable by explicitly coding the zero in
9271 memory address, but I don't have AMD-K6 machine handy to check this
9275 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9276 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9277 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9283 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9284 this is used for to form addresses to local data when -fPIC is in
9288 darwin_local_data_pic (rtx disp)
9290 return (GET_CODE (disp) == UNSPEC
9291 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9294 /* Determine if a given RTX is a valid constant. We already know this
9295 satisfies CONSTANT_P. */
9298 legitimate_constant_p (rtx x)
9300 switch (GET_CODE (x))
9305 if (GET_CODE (x) == PLUS)
9307 if (!CONST_INT_P (XEXP (x, 1)))
9312 if (TARGET_MACHO && darwin_local_data_pic (x))
9315 /* Only some unspecs are valid as "constants". */
9316 if (GET_CODE (x) == UNSPEC)
9317 switch (XINT (x, 1))
9322 return TARGET_64BIT;
9325 x = XVECEXP (x, 0, 0);
9326 return (GET_CODE (x) == SYMBOL_REF
9327 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9329 x = XVECEXP (x, 0, 0);
9330 return (GET_CODE (x) == SYMBOL_REF
9331 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9336 /* We must have drilled down to a symbol. */
9337 if (GET_CODE (x) == LABEL_REF)
9339 if (GET_CODE (x) != SYMBOL_REF)
9344 /* TLS symbols are never valid. */
9345 if (SYMBOL_REF_TLS_MODEL (x))
9348 /* DLLIMPORT symbols are never valid. */
9349 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9350 && SYMBOL_REF_DLLIMPORT_P (x))
9355 if (GET_MODE (x) == TImode
9356 && x != CONST0_RTX (TImode)
9362 if (!standard_sse_constant_p (x))
9369 /* Otherwise we handle everything else in the move patterns. */
9373 /* Determine if it's legal to put X into the constant pool. This
9374 is not possible for the address of thread-local symbols, which
9375 is checked above. */
9378 ix86_cannot_force_const_mem (rtx x)
9380 /* We can always put integral constants and vectors in memory. */
9381 switch (GET_CODE (x))
9391 return !legitimate_constant_p (x);
9395 /* Nonzero if the constant value X is a legitimate general operand
9396 when generating PIC code. It is given that flag_pic is on and
9397 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9400 legitimate_pic_operand_p (rtx x)
9404 switch (GET_CODE (x))
9407 inner = XEXP (x, 0);
9408 if (GET_CODE (inner) == PLUS
9409 && CONST_INT_P (XEXP (inner, 1)))
9410 inner = XEXP (inner, 0);
9412 /* Only some unspecs are valid as "constants". */
9413 if (GET_CODE (inner) == UNSPEC)
9414 switch (XINT (inner, 1))
9419 return TARGET_64BIT;
9421 x = XVECEXP (inner, 0, 0);
9422 return (GET_CODE (x) == SYMBOL_REF
9423 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9424 case UNSPEC_MACHOPIC_OFFSET:
9425 return legitimate_pic_address_disp_p (x);
9433 return legitimate_pic_address_disp_p (x);
9440 /* Determine if a given CONST RTX is a valid memory displacement
9444 legitimate_pic_address_disp_p (rtx disp)
9448 /* In 64bit mode we can allow direct addresses of symbols and labels
9449 when they are not dynamic symbols. */
9452 rtx op0 = disp, op1;
9454 switch (GET_CODE (disp))
9460 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9462 op0 = XEXP (XEXP (disp, 0), 0);
9463 op1 = XEXP (XEXP (disp, 0), 1);
9464 if (!CONST_INT_P (op1)
9465 || INTVAL (op1) >= 16*1024*1024
9466 || INTVAL (op1) < -16*1024*1024)
9468 if (GET_CODE (op0) == LABEL_REF)
9470 if (GET_CODE (op0) != SYMBOL_REF)
9475 /* TLS references should always be enclosed in UNSPEC. */
9476 if (SYMBOL_REF_TLS_MODEL (op0))
9478 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9479 && ix86_cmodel != CM_LARGE_PIC)
9487 if (GET_CODE (disp) != CONST)
9489 disp = XEXP (disp, 0);
9493 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9494 of GOT tables. We should not need these anyway. */
9495 if (GET_CODE (disp) != UNSPEC
9496 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9497 && XINT (disp, 1) != UNSPEC_GOTOFF
9498 && XINT (disp, 1) != UNSPEC_PLTOFF))
9501 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9502 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9508 if (GET_CODE (disp) == PLUS)
9510 if (!CONST_INT_P (XEXP (disp, 1)))
9512 disp = XEXP (disp, 0);
9516 if (TARGET_MACHO && darwin_local_data_pic (disp))
9519 if (GET_CODE (disp) != UNSPEC)
9522 switch (XINT (disp, 1))
9527 /* We need to check for both symbols and labels because VxWorks loads
9528 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9530 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9531 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9533 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9534 While ABI specify also 32bit relocation but we don't produce it in
9535 small PIC model at all. */
9536 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9537 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9539 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9541 case UNSPEC_GOTTPOFF:
9542 case UNSPEC_GOTNTPOFF:
9543 case UNSPEC_INDNTPOFF:
9546 disp = XVECEXP (disp, 0, 0);
9547 return (GET_CODE (disp) == SYMBOL_REF
9548 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9550 disp = XVECEXP (disp, 0, 0);
9551 return (GET_CODE (disp) == SYMBOL_REF
9552 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9554 disp = XVECEXP (disp, 0, 0);
9555 return (GET_CODE (disp) == SYMBOL_REF
9556 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9562 /* Recognizes RTL expressions that are valid memory addresses for an
9563 instruction. The MODE argument is the machine mode for the MEM
9564 expression that wants to use this address.
9566 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9567 convert common non-canonical forms to canonical form so that they will
9571 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9572 rtx addr, bool strict)
9574 struct ix86_address parts;
9575 rtx base, index, disp;
9576 HOST_WIDE_INT scale;
9577 const char *reason = NULL;
9578 rtx reason_rtx = NULL_RTX;
9580 if (ix86_decompose_address (addr, &parts) <= 0)
9582 reason = "decomposition failed";
9587 index = parts.index;
9589 scale = parts.scale;
9591 /* Validate base register.
9593 Don't allow SUBREG's that span more than a word here. It can lead to spill
9594 failures when the base is one word out of a two word structure, which is
9595 represented internally as a DImode int. */
9604 else if (GET_CODE (base) == SUBREG
9605 && REG_P (SUBREG_REG (base))
9606 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9608 reg = SUBREG_REG (base);
9611 reason = "base is not a register";
9615 if (GET_MODE (base) != Pmode)
9617 reason = "base is not in Pmode";
9621 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9622 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9624 reason = "base is not valid";
9629 /* Validate index register.
9631 Don't allow SUBREG's that span more than a word here -- same as above. */
9640 else if (GET_CODE (index) == SUBREG
9641 && REG_P (SUBREG_REG (index))
9642 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9644 reg = SUBREG_REG (index);
9647 reason = "index is not a register";
9651 if (GET_MODE (index) != Pmode)
9653 reason = "index is not in Pmode";
9657 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9658 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9660 reason = "index is not valid";
9665 /* Validate scale factor. */
9668 reason_rtx = GEN_INT (scale);
9671 reason = "scale without index";
9675 if (scale != 2 && scale != 4 && scale != 8)
9677 reason = "scale is not a valid multiplier";
9682 /* Validate displacement. */
9687 if (GET_CODE (disp) == CONST
9688 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9689 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9690 switch (XINT (XEXP (disp, 0), 1))
9692 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9693 used. While ABI specify also 32bit relocations, we don't produce
9694 them at all and use IP relative instead. */
9697 gcc_assert (flag_pic);
9699 goto is_legitimate_pic;
9700 reason = "64bit address unspec";
9703 case UNSPEC_GOTPCREL:
9704 gcc_assert (flag_pic);
9705 goto is_legitimate_pic;
9707 case UNSPEC_GOTTPOFF:
9708 case UNSPEC_GOTNTPOFF:
9709 case UNSPEC_INDNTPOFF:
9715 reason = "invalid address unspec";
9719 else if (SYMBOLIC_CONST (disp)
9723 && MACHOPIC_INDIRECT
9724 && !machopic_operand_p (disp)
9730 if (TARGET_64BIT && (index || base))
9732 /* foo@dtpoff(%rX) is ok. */
9733 if (GET_CODE (disp) != CONST
9734 || GET_CODE (XEXP (disp, 0)) != PLUS
9735 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9736 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9737 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9738 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9740 reason = "non-constant pic memory reference";
9744 else if (! legitimate_pic_address_disp_p (disp))
9746 reason = "displacement is an invalid pic construct";
9750 /* This code used to verify that a symbolic pic displacement
9751 includes the pic_offset_table_rtx register.
9753 While this is good idea, unfortunately these constructs may
9754 be created by "adds using lea" optimization for incorrect
9763 This code is nonsensical, but results in addressing
9764 GOT table with pic_offset_table_rtx base. We can't
9765 just refuse it easily, since it gets matched by
9766 "addsi3" pattern, that later gets split to lea in the
9767 case output register differs from input. While this
9768 can be handled by separate addsi pattern for this case
9769 that never results in lea, this seems to be easier and
9770 correct fix for crash to disable this test. */
9772 else if (GET_CODE (disp) != LABEL_REF
9773 && !CONST_INT_P (disp)
9774 && (GET_CODE (disp) != CONST
9775 || !legitimate_constant_p (disp))
9776 && (GET_CODE (disp) != SYMBOL_REF
9777 || !legitimate_constant_p (disp)))
9779 reason = "displacement is not constant";
9782 else if (TARGET_64BIT
9783 && !x86_64_immediate_operand (disp, VOIDmode))
9785 reason = "displacement is out of range";
9790 /* Everything looks valid. */
9797 /* Determine if a given RTX is a valid constant address. */
9800 constant_address_p (rtx x)
9802 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9805 /* Return a unique alias set for the GOT. */
9807 static alias_set_type
9808 ix86_GOT_alias_set (void)
9810 static alias_set_type set = -1;
9812 set = new_alias_set ();
9816 /* Return a legitimate reference for ORIG (an address) using the
9817 register REG. If REG is 0, a new pseudo is generated.
9819 There are two types of references that must be handled:
9821 1. Global data references must load the address from the GOT, via
9822 the PIC reg. An insn is emitted to do this load, and the reg is
9825 2. Static data references, constant pool addresses, and code labels
9826 compute the address as an offset from the GOT, whose base is in
9827 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9828 differentiate them from global data objects. The returned
9829 address is the PIC reg + an unspec constant.
9831 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9832 reg also appears in the address. */
9835 legitimize_pic_address (rtx orig, rtx reg)
9842 if (TARGET_MACHO && !TARGET_64BIT)
9845 reg = gen_reg_rtx (Pmode);
9846 /* Use the generic Mach-O PIC machinery. */
9847 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9851 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9853 else if (TARGET_64BIT
9854 && ix86_cmodel != CM_SMALL_PIC
9855 && gotoff_operand (addr, Pmode))
9858 /* This symbol may be referenced via a displacement from the PIC
9859 base address (@GOTOFF). */
9861 if (reload_in_progress)
9862 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9863 if (GET_CODE (addr) == CONST)
9864 addr = XEXP (addr, 0);
9865 if (GET_CODE (addr) == PLUS)
9867 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9869 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9872 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9873 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9875 tmpreg = gen_reg_rtx (Pmode);
9878 emit_move_insn (tmpreg, new_rtx);
9882 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9883 tmpreg, 1, OPTAB_DIRECT);
9886 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9888 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9890 /* This symbol may be referenced via a displacement from the PIC
9891 base address (@GOTOFF). */
9893 if (reload_in_progress)
9894 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9895 if (GET_CODE (addr) == CONST)
9896 addr = XEXP (addr, 0);
9897 if (GET_CODE (addr) == PLUS)
9899 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9901 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9904 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9905 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9906 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9910 emit_move_insn (reg, new_rtx);
9914 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9915 /* We can't use @GOTOFF for text labels on VxWorks;
9916 see gotoff_operand. */
9917 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9919 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9921 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9922 return legitimize_dllimport_symbol (addr, true);
9923 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9924 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9925 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9927 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9928 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9932 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9934 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9935 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9936 new_rtx = gen_const_mem (Pmode, new_rtx);
9937 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9940 reg = gen_reg_rtx (Pmode);
9941 /* Use directly gen_movsi, otherwise the address is loaded
9942 into register for CSE. We don't want to CSE this addresses,
9943 instead we CSE addresses from the GOT table, so skip this. */
9944 emit_insn (gen_movsi (reg, new_rtx));
9949 /* This symbol must be referenced via a load from the
9950 Global Offset Table (@GOT). */
9952 if (reload_in_progress)
9953 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9954 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9955 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9957 new_rtx = force_reg (Pmode, new_rtx);
9958 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9959 new_rtx = gen_const_mem (Pmode, new_rtx);
9960 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9963 reg = gen_reg_rtx (Pmode);
9964 emit_move_insn (reg, new_rtx);
9970 if (CONST_INT_P (addr)
9971 && !x86_64_immediate_operand (addr, VOIDmode))
9975 emit_move_insn (reg, addr);
9979 new_rtx = force_reg (Pmode, addr);
9981 else if (GET_CODE (addr) == CONST)
9983 addr = XEXP (addr, 0);
9985 /* We must match stuff we generate before. Assume the only
9986 unspecs that can get here are ours. Not that we could do
9987 anything with them anyway.... */
9988 if (GET_CODE (addr) == UNSPEC
9989 || (GET_CODE (addr) == PLUS
9990 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9992 gcc_assert (GET_CODE (addr) == PLUS);
9994 if (GET_CODE (addr) == PLUS)
9996 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9998 /* Check first to see if this is a constant offset from a @GOTOFF
9999 symbol reference. */
10000 if (gotoff_operand (op0, Pmode)
10001 && CONST_INT_P (op1))
10005 if (reload_in_progress)
10006 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10007 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10009 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10010 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10011 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10015 emit_move_insn (reg, new_rtx);
10021 if (INTVAL (op1) < -16*1024*1024
10022 || INTVAL (op1) >= 16*1024*1024)
10024 if (!x86_64_immediate_operand (op1, Pmode))
10025 op1 = force_reg (Pmode, op1);
10026 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10032 base = legitimize_pic_address (XEXP (addr, 0), reg);
10033 new_rtx = legitimize_pic_address (XEXP (addr, 1),
10034 base == reg ? NULL_RTX : reg);
10036 if (CONST_INT_P (new_rtx))
10037 new_rtx = plus_constant (base, INTVAL (new_rtx));
10040 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
10042 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
10043 new_rtx = XEXP (new_rtx, 1);
10045 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
10053 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10056 get_thread_pointer (int to_reg)
10060 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10064 reg = gen_reg_rtx (Pmode);
10065 insn = gen_rtx_SET (VOIDmode, reg, tp);
10066 insn = emit_insn (insn);
10071 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10072 false if we expect this to be used for a memory address and true if
10073 we expect to load the address into a register. */
10076 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
10078 rtx dest, base, off, pic, tp;
10083 case TLS_MODEL_GLOBAL_DYNAMIC:
10084 dest = gen_reg_rtx (Pmode);
10085 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10087 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10089 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
10092 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
10093 insns = get_insns ();
10096 RTL_CONST_CALL_P (insns) = 1;
10097 emit_libcall_block (insns, dest, rax, x);
10099 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10100 emit_insn (gen_tls_global_dynamic_64 (dest, x));
10102 emit_insn (gen_tls_global_dynamic_32 (dest, x));
10104 if (TARGET_GNU2_TLS)
10106 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10108 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10112 case TLS_MODEL_LOCAL_DYNAMIC:
10113 base = gen_reg_rtx (Pmode);
10114 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10116 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10118 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
10121 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
10122 insns = get_insns ();
10125 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
10126 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
10127 RTL_CONST_CALL_P (insns) = 1;
10128 emit_libcall_block (insns, base, rax, note);
10130 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10131 emit_insn (gen_tls_local_dynamic_base_64 (base));
10133 emit_insn (gen_tls_local_dynamic_base_32 (base));
10135 if (TARGET_GNU2_TLS)
10137 rtx x = ix86_tls_module_base ();
10139 set_unique_reg_note (get_last_insn (), REG_EQUIV,
10140 gen_rtx_MINUS (Pmode, x, tp));
10143 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10144 off = gen_rtx_CONST (Pmode, off);
10146 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10148 if (TARGET_GNU2_TLS)
10150 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10152 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10157 case TLS_MODEL_INITIAL_EXEC:
10161 type = UNSPEC_GOTNTPOFF;
10165 if (reload_in_progress)
10166 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10167 pic = pic_offset_table_rtx;
10168 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10170 else if (!TARGET_ANY_GNU_TLS)
10172 pic = gen_reg_rtx (Pmode);
10173 emit_insn (gen_set_got (pic));
10174 type = UNSPEC_GOTTPOFF;
10179 type = UNSPEC_INDNTPOFF;
10182 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
10183 off = gen_rtx_CONST (Pmode, off);
10185 off = gen_rtx_PLUS (Pmode, pic, off);
10186 off = gen_const_mem (Pmode, off);
10187 set_mem_alias_set (off, ix86_GOT_alias_set ());
10189 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10191 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10192 off = force_reg (Pmode, off);
10193 return gen_rtx_PLUS (Pmode, base, off);
10197 base = get_thread_pointer (true);
10198 dest = gen_reg_rtx (Pmode);
10199 emit_insn (gen_subsi3 (dest, base, off));
10203 case TLS_MODEL_LOCAL_EXEC:
10204 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10205 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10206 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10207 off = gen_rtx_CONST (Pmode, off);
10209 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10211 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10212 return gen_rtx_PLUS (Pmode, base, off);
10216 base = get_thread_pointer (true);
10217 dest = gen_reg_rtx (Pmode);
10218 emit_insn (gen_subsi3 (dest, base, off));
10223 gcc_unreachable ();
10229 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10232 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10233 htab_t dllimport_map;
10236 get_dllimport_decl (tree decl)
10238 struct tree_map *h, in;
10241 const char *prefix;
10242 size_t namelen, prefixlen;
10247 if (!dllimport_map)
10248 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10250 in.hash = htab_hash_pointer (decl);
10251 in.base.from = decl;
10252 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10253 h = (struct tree_map *) *loc;
10257 *loc = h = GGC_NEW (struct tree_map);
10259 h->base.from = decl;
10260 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
10261 DECL_ARTIFICIAL (to) = 1;
10262 DECL_IGNORED_P (to) = 1;
10263 DECL_EXTERNAL (to) = 1;
10264 TREE_READONLY (to) = 1;
10266 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10267 name = targetm.strip_name_encoding (name);
10268 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10269 ? "*__imp_" : "*__imp__";
10270 namelen = strlen (name);
10271 prefixlen = strlen (prefix);
10272 imp_name = (char *) alloca (namelen + prefixlen + 1);
10273 memcpy (imp_name, prefix, prefixlen);
10274 memcpy (imp_name + prefixlen, name, namelen + 1);
10276 name = ggc_alloc_string (imp_name, namelen + prefixlen);
10277 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10278 SET_SYMBOL_REF_DECL (rtl, to);
10279 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10281 rtl = gen_const_mem (Pmode, rtl);
10282 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10284 SET_DECL_RTL (to, rtl);
10285 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10290 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10291 true if we require the result be a register. */
10294 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10299 gcc_assert (SYMBOL_REF_DECL (symbol));
10300 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10302 x = DECL_RTL (imp_decl);
10304 x = force_reg (Pmode, x);
10308 /* Try machine-dependent ways of modifying an illegitimate address
10309 to be legitimate. If we find one, return the new, valid address.
10310 This macro is used in only one place: `memory_address' in explow.c.
10312 OLDX is the address as it was before break_out_memory_refs was called.
10313 In some cases it is useful to look at this to decide what needs to be done.
10315 It is always safe for this macro to do nothing. It exists to recognize
10316 opportunities to optimize the output.
10318 For the 80386, we handle X+REG by loading X into a register R and
10319 using R+REG. R will go in a general reg and indexing will be used.
10320 However, if REG is a broken-out memory address or multiplication,
10321 nothing needs to be done because REG can certainly go in a general reg.
10323 When -fpic is used, special handling is needed for symbolic references.
10324 See comments by legitimize_pic_address in i386.c for details. */
10327 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10328 enum machine_mode mode)
10333 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10335 return legitimize_tls_address (x, (enum tls_model) log, false);
10336 if (GET_CODE (x) == CONST
10337 && GET_CODE (XEXP (x, 0)) == PLUS
10338 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10339 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10341 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10342 (enum tls_model) log, false);
10343 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10346 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10348 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10349 return legitimize_dllimport_symbol (x, true);
10350 if (GET_CODE (x) == CONST
10351 && GET_CODE (XEXP (x, 0)) == PLUS
10352 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10353 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10355 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10356 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10360 if (flag_pic && SYMBOLIC_CONST (x))
10361 return legitimize_pic_address (x, 0);
10363 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10364 if (GET_CODE (x) == ASHIFT
10365 && CONST_INT_P (XEXP (x, 1))
10366 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10369 log = INTVAL (XEXP (x, 1));
10370 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10371 GEN_INT (1 << log));
10374 if (GET_CODE (x) == PLUS)
10376 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10378 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10379 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10380 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10383 log = INTVAL (XEXP (XEXP (x, 0), 1));
10384 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10385 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10386 GEN_INT (1 << log));
10389 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10390 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10391 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10394 log = INTVAL (XEXP (XEXP (x, 1), 1));
10395 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10396 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10397 GEN_INT (1 << log));
10400 /* Put multiply first if it isn't already. */
10401 if (GET_CODE (XEXP (x, 1)) == MULT)
10403 rtx tmp = XEXP (x, 0);
10404 XEXP (x, 0) = XEXP (x, 1);
10409 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10410 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10411 created by virtual register instantiation, register elimination, and
10412 similar optimizations. */
10413 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10416 x = gen_rtx_PLUS (Pmode,
10417 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10418 XEXP (XEXP (x, 1), 0)),
10419 XEXP (XEXP (x, 1), 1));
10423 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10424 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10425 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10426 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10427 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10428 && CONSTANT_P (XEXP (x, 1)))
10431 rtx other = NULL_RTX;
10433 if (CONST_INT_P (XEXP (x, 1)))
10435 constant = XEXP (x, 1);
10436 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10438 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10440 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10441 other = XEXP (x, 1);
10449 x = gen_rtx_PLUS (Pmode,
10450 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10451 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10452 plus_constant (other, INTVAL (constant)));
10456 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10459 if (GET_CODE (XEXP (x, 0)) == MULT)
10462 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10465 if (GET_CODE (XEXP (x, 1)) == MULT)
10468 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10472 && REG_P (XEXP (x, 1))
10473 && REG_P (XEXP (x, 0)))
10476 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10479 x = legitimize_pic_address (x, 0);
10482 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10485 if (REG_P (XEXP (x, 0)))
10487 rtx temp = gen_reg_rtx (Pmode);
10488 rtx val = force_operand (XEXP (x, 1), temp);
10490 emit_move_insn (temp, val);
10492 XEXP (x, 1) = temp;
10496 else if (REG_P (XEXP (x, 1)))
10498 rtx temp = gen_reg_rtx (Pmode);
10499 rtx val = force_operand (XEXP (x, 0), temp);
10501 emit_move_insn (temp, val);
10503 XEXP (x, 0) = temp;
10511 /* Print an integer constant expression in assembler syntax. Addition
10512 and subtraction are the only arithmetic that may appear in these
10513 expressions. FILE is the stdio stream to write to, X is the rtx, and
10514 CODE is the operand print code from the output string. */
10517 output_pic_addr_const (FILE *file, rtx x, int code)
10521 switch (GET_CODE (x))
10524 gcc_assert (flag_pic);
10529 if (! TARGET_MACHO || TARGET_64BIT)
10530 output_addr_const (file, x);
10533 const char *name = XSTR (x, 0);
10535 /* Mark the decl as referenced so that cgraph will
10536 output the function. */
10537 if (SYMBOL_REF_DECL (x))
10538 mark_decl_referenced (SYMBOL_REF_DECL (x));
10541 if (MACHOPIC_INDIRECT
10542 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10543 name = machopic_indirection_name (x, /*stub_p=*/true);
10545 assemble_name (file, name);
10547 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10548 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10549 fputs ("@PLT", file);
10556 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10557 assemble_name (asm_out_file, buf);
10561 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10565 /* This used to output parentheses around the expression,
10566 but that does not work on the 386 (either ATT or BSD assembler). */
10567 output_pic_addr_const (file, XEXP (x, 0), code);
10571 if (GET_MODE (x) == VOIDmode)
10573 /* We can use %d if the number is <32 bits and positive. */
10574 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10575 fprintf (file, "0x%lx%08lx",
10576 (unsigned long) CONST_DOUBLE_HIGH (x),
10577 (unsigned long) CONST_DOUBLE_LOW (x));
10579 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10582 /* We can't handle floating point constants;
10583 PRINT_OPERAND must handle them. */
10584 output_operand_lossage ("floating constant misused");
10588 /* Some assemblers need integer constants to appear first. */
10589 if (CONST_INT_P (XEXP (x, 0)))
10591 output_pic_addr_const (file, XEXP (x, 0), code);
10593 output_pic_addr_const (file, XEXP (x, 1), code);
10597 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10598 output_pic_addr_const (file, XEXP (x, 1), code);
10600 output_pic_addr_const (file, XEXP (x, 0), code);
10606 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10607 output_pic_addr_const (file, XEXP (x, 0), code);
10609 output_pic_addr_const (file, XEXP (x, 1), code);
10611 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10615 gcc_assert (XVECLEN (x, 0) == 1);
10616 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10617 switch (XINT (x, 1))
10620 fputs ("@GOT", file);
10622 case UNSPEC_GOTOFF:
10623 fputs ("@GOTOFF", file);
10625 case UNSPEC_PLTOFF:
10626 fputs ("@PLTOFF", file);
10628 case UNSPEC_GOTPCREL:
10629 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10630 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10632 case UNSPEC_GOTTPOFF:
10633 /* FIXME: This might be @TPOFF in Sun ld too. */
10634 fputs ("@GOTTPOFF", file);
10637 fputs ("@TPOFF", file);
10639 case UNSPEC_NTPOFF:
10641 fputs ("@TPOFF", file);
10643 fputs ("@NTPOFF", file);
10645 case UNSPEC_DTPOFF:
10646 fputs ("@DTPOFF", file);
10648 case UNSPEC_GOTNTPOFF:
10650 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10651 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10653 fputs ("@GOTNTPOFF", file);
10655 case UNSPEC_INDNTPOFF:
10656 fputs ("@INDNTPOFF", file);
10659 case UNSPEC_MACHOPIC_OFFSET:
10661 machopic_output_function_base_name (file);
10665 output_operand_lossage ("invalid UNSPEC as operand");
10671 output_operand_lossage ("invalid expression as operand");
10675 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10676 We need to emit DTP-relative relocations. */
10678 static void ATTRIBUTE_UNUSED
10679 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10681 fputs (ASM_LONG, file);
10682 output_addr_const (file, x);
10683 fputs ("@DTPOFF", file);
10689 fputs (", 0", file);
10692 gcc_unreachable ();
10696 /* Return true if X is a representation of the PIC register. This copes
10697 with calls from ix86_find_base_term, where the register might have
10698 been replaced by a cselib value. */
10701 ix86_pic_register_p (rtx x)
10703 if (GET_CODE (x) == VALUE)
10704 return (pic_offset_table_rtx
10705 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10707 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10710 /* In the name of slightly smaller debug output, and to cater to
10711 general assembler lossage, recognize PIC+GOTOFF and turn it back
10712 into a direct symbol reference.
10714 On Darwin, this is necessary to avoid a crash, because Darwin
10715 has a different PIC label for each routine but the DWARF debugging
10716 information is not associated with any particular routine, so it's
10717 necessary to remove references to the PIC label from RTL stored by
10718 the DWARF output code. */
10721 ix86_delegitimize_address (rtx orig_x)
10724 /* reg_addend is NULL or a multiple of some register. */
10725 rtx reg_addend = NULL_RTX;
10726 /* const_addend is NULL or a const_int. */
10727 rtx const_addend = NULL_RTX;
10728 /* This is the result, or NULL. */
10729 rtx result = NULL_RTX;
10736 if (GET_CODE (x) != CONST
10737 || GET_CODE (XEXP (x, 0)) != UNSPEC
10738 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10739 || !MEM_P (orig_x))
10741 return XVECEXP (XEXP (x, 0), 0, 0);
10744 if (GET_CODE (x) != PLUS
10745 || GET_CODE (XEXP (x, 1)) != CONST)
10748 if (ix86_pic_register_p (XEXP (x, 0)))
10749 /* %ebx + GOT/GOTOFF */
10751 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10753 /* %ebx + %reg * scale + GOT/GOTOFF */
10754 reg_addend = XEXP (x, 0);
10755 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10756 reg_addend = XEXP (reg_addend, 1);
10757 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10758 reg_addend = XEXP (reg_addend, 0);
10761 if (!REG_P (reg_addend)
10762 && GET_CODE (reg_addend) != MULT
10763 && GET_CODE (reg_addend) != ASHIFT)
10769 x = XEXP (XEXP (x, 1), 0);
10770 if (GET_CODE (x) == PLUS
10771 && CONST_INT_P (XEXP (x, 1)))
10773 const_addend = XEXP (x, 1);
10777 if (GET_CODE (x) == UNSPEC
10778 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10779 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10780 result = XVECEXP (x, 0, 0);
10782 if (TARGET_MACHO && darwin_local_data_pic (x)
10783 && !MEM_P (orig_x))
10784 result = XVECEXP (x, 0, 0);
10790 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10792 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10796 /* If X is a machine specific address (i.e. a symbol or label being
10797 referenced as a displacement from the GOT implemented using an
10798 UNSPEC), then return the base term. Otherwise return X. */
10801 ix86_find_base_term (rtx x)
10807 if (GET_CODE (x) != CONST)
10809 term = XEXP (x, 0);
10810 if (GET_CODE (term) == PLUS
10811 && (CONST_INT_P (XEXP (term, 1))
10812 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10813 term = XEXP (term, 0);
10814 if (GET_CODE (term) != UNSPEC
10815 || XINT (term, 1) != UNSPEC_GOTPCREL)
10818 return XVECEXP (term, 0, 0);
10821 return ix86_delegitimize_address (x);
10825 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10826 int fp, FILE *file)
10828 const char *suffix;
10830 if (mode == CCFPmode || mode == CCFPUmode)
10832 enum rtx_code second_code, bypass_code;
10833 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10834 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10835 code = ix86_fp_compare_code_to_integer (code);
10839 code = reverse_condition (code);
10890 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10894 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10895 Those same assemblers have the same but opposite lossage on cmov. */
10896 if (mode == CCmode)
10897 suffix = fp ? "nbe" : "a";
10898 else if (mode == CCCmode)
10901 gcc_unreachable ();
10917 gcc_unreachable ();
10921 gcc_assert (mode == CCmode || mode == CCCmode);
10938 gcc_unreachable ();
10942 /* ??? As above. */
10943 gcc_assert (mode == CCmode || mode == CCCmode);
10944 suffix = fp ? "nb" : "ae";
10947 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10951 /* ??? As above. */
10952 if (mode == CCmode)
10954 else if (mode == CCCmode)
10955 suffix = fp ? "nb" : "ae";
10957 gcc_unreachable ();
10960 suffix = fp ? "u" : "p";
10963 suffix = fp ? "nu" : "np";
10966 gcc_unreachable ();
10968 fputs (suffix, file);
10971 /* Print the name of register X to FILE based on its machine mode and number.
10972 If CODE is 'w', pretend the mode is HImode.
10973 If CODE is 'b', pretend the mode is QImode.
10974 If CODE is 'k', pretend the mode is SImode.
10975 If CODE is 'q', pretend the mode is DImode.
10976 If CODE is 'x', pretend the mode is V4SFmode.
10977 If CODE is 't', pretend the mode is V8SFmode.
10978 If CODE is 'h', pretend the reg is the 'high' byte register.
10979 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10980 If CODE is 'd', duplicate the operand for AVX instruction.
10984 print_reg (rtx x, int code, FILE *file)
10987 bool duplicated = code == 'd' && TARGET_AVX;
10989 gcc_assert (x == pc_rtx
10990 || (REGNO (x) != ARG_POINTER_REGNUM
10991 && REGNO (x) != FRAME_POINTER_REGNUM
10992 && REGNO (x) != FLAGS_REG
10993 && REGNO (x) != FPSR_REG
10994 && REGNO (x) != FPCR_REG));
10996 if (ASSEMBLER_DIALECT == ASM_ATT)
11001 gcc_assert (TARGET_64BIT);
11002 fputs ("rip", file);
11006 if (code == 'w' || MMX_REG_P (x))
11008 else if (code == 'b')
11010 else if (code == 'k')
11012 else if (code == 'q')
11014 else if (code == 'y')
11016 else if (code == 'h')
11018 else if (code == 'x')
11020 else if (code == 't')
11023 code = GET_MODE_SIZE (GET_MODE (x));
11025 /* Irritatingly, AMD extended registers use different naming convention
11026 from the normal registers. */
11027 if (REX_INT_REG_P (x))
11029 gcc_assert (TARGET_64BIT);
11033 error ("extended registers have no high halves");
11036 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
11039 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
11042 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
11045 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
11048 error ("unsupported operand size for extended register");
11058 if (STACK_TOP_P (x))
11067 if (! ANY_FP_REG_P (x))
11068 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
11073 reg = hi_reg_name[REGNO (x)];
11076 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
11078 reg = qi_reg_name[REGNO (x)];
11081 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
11083 reg = qi_high_reg_name[REGNO (x)];
11088 gcc_assert (!duplicated);
11090 fputs (hi_reg_name[REGNO (x)] + 1, file);
11095 gcc_unreachable ();
11101 if (ASSEMBLER_DIALECT == ASM_ATT)
11102 fprintf (file, ", %%%s", reg);
11104 fprintf (file, ", %s", reg);
11108 /* Locate some local-dynamic symbol still in use by this function
11109 so that we can print its name in some tls_local_dynamic_base
11113 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11117 if (GET_CODE (x) == SYMBOL_REF
11118 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11120 cfun->machine->some_ld_name = XSTR (x, 0);
11127 static const char *
11128 get_some_local_dynamic_name (void)
11132 if (cfun->machine->some_ld_name)
11133 return cfun->machine->some_ld_name;
11135 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11137 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11138 return cfun->machine->some_ld_name;
11140 gcc_unreachable ();
11143 /* Meaning of CODE:
11144 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11145 C -- print opcode suffix for set/cmov insn.
11146 c -- like C, but print reversed condition
11147 E,e -- likewise, but for compare-and-branch fused insn.
11148 F,f -- likewise, but for floating-point.
11149 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11151 R -- print the prefix for register names.
11152 z -- print the opcode suffix for the size of the current operand.
11153 Z -- likewise, with special suffixes for x87 instructions.
11154 * -- print a star (in certain assembler syntax)
11155 A -- print an absolute memory reference.
11156 w -- print the operand as if it's a "word" (HImode) even if it isn't.
11157 s -- print a shift double count, followed by the assemblers argument
11159 b -- print the QImode name of the register for the indicated operand.
11160 %b0 would print %al if operands[0] is reg 0.
11161 w -- likewise, print the HImode name of the register.
11162 k -- likewise, print the SImode name of the register.
11163 q -- likewise, print the DImode name of the register.
11164 x -- likewise, print the V4SFmode name of the register.
11165 t -- likewise, print the V8SFmode name of the register.
11166 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11167 y -- print "st(0)" instead of "st" as a register.
11168 d -- print duplicated register operand for AVX instruction.
11169 D -- print condition for SSE cmp instruction.
11170 P -- if PIC, print an @PLT suffix.
11171 X -- don't print any sort of PIC '@' suffix for a symbol.
11172 & -- print some in-use local-dynamic symbol name.
11173 H -- print a memory address offset by 8; used for sse high-parts
11174 Y -- print condition for SSE5 com* instruction.
11175 + -- print a branch hint as 'cs' or 'ds' prefix
11176 ; -- print a semicolon (after prefixes due to bug in older gas).
11180 print_operand (FILE *file, rtx x, int code)
11187 if (ASSEMBLER_DIALECT == ASM_ATT)
11192 assemble_name (file, get_some_local_dynamic_name ());
11196 switch (ASSEMBLER_DIALECT)
11203 /* Intel syntax. For absolute addresses, registers should not
11204 be surrounded by braces. */
11208 PRINT_OPERAND (file, x, 0);
11215 gcc_unreachable ();
11218 PRINT_OPERAND (file, x, 0);
11223 if (ASSEMBLER_DIALECT == ASM_ATT)
11228 if (ASSEMBLER_DIALECT == ASM_ATT)
11233 if (ASSEMBLER_DIALECT == ASM_ATT)
11238 if (ASSEMBLER_DIALECT == ASM_ATT)
11243 if (ASSEMBLER_DIALECT == ASM_ATT)
11248 if (ASSEMBLER_DIALECT == ASM_ATT)
11253 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11255 /* Opcodes don't get size suffixes if using Intel opcodes. */
11256 if (ASSEMBLER_DIALECT == ASM_INTEL)
11259 switch (GET_MODE_SIZE (GET_MODE (x)))
11278 output_operand_lossage
11279 ("invalid operand size for operand code '%c'", code);
11284 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11286 (0, "non-integer operand used with operand code '%c'", code);
11290 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11291 if (ASSEMBLER_DIALECT == ASM_INTEL)
11294 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11296 switch (GET_MODE_SIZE (GET_MODE (x)))
11299 #ifdef HAVE_AS_IX86_FILDS
11309 #ifdef HAVE_AS_IX86_FILDQ
11312 fputs ("ll", file);
11320 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11322 /* 387 opcodes don't get size suffixes
11323 if the operands are registers. */
11324 if (STACK_REG_P (x))
11327 switch (GET_MODE_SIZE (GET_MODE (x)))
11348 output_operand_lossage
11349 ("invalid operand type used with operand code '%c'", code);
11353 output_operand_lossage
11354 ("invalid operand size for operand code '%c'", code);
11371 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11373 PRINT_OPERAND (file, x, 0);
11374 fputs (", ", file);
11379 /* Little bit of braindamage here. The SSE compare instructions
11380 does use completely different names for the comparisons that the
11381 fp conditional moves. */
11384 switch (GET_CODE (x))
11387 fputs ("eq", file);
11390 fputs ("eq_us", file);
11393 fputs ("lt", file);
11396 fputs ("nge", file);
11399 fputs ("le", file);
11402 fputs ("ngt", file);
11405 fputs ("unord", file);
11408 fputs ("neq", file);
11411 fputs ("neq_oq", file);
11414 fputs ("ge", file);
11417 fputs ("nlt", file);
11420 fputs ("gt", file);
11423 fputs ("nle", file);
11426 fputs ("ord", file);
11429 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11435 switch (GET_CODE (x))
11439 fputs ("eq", file);
11443 fputs ("lt", file);
11447 fputs ("le", file);
11450 fputs ("unord", file);
11454 fputs ("neq", file);
11458 fputs ("nlt", file);
11462 fputs ("nle", file);
11465 fputs ("ord", file);
11468 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11474 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11475 if (ASSEMBLER_DIALECT == ASM_ATT)
11477 switch (GET_MODE (x))
11479 case HImode: putc ('w', file); break;
11481 case SFmode: putc ('l', file); break;
11483 case DFmode: putc ('q', file); break;
11484 default: gcc_unreachable ();
11491 if (!COMPARISON_P (x))
11493 output_operand_lossage ("operand is neither a constant nor a "
11494 "condition code, invalid operand code "
11498 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11501 if (!COMPARISON_P (x))
11503 output_operand_lossage ("operand is neither a constant nor a "
11504 "condition code, invalid operand code "
11508 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11509 if (ASSEMBLER_DIALECT == ASM_ATT)
11512 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11515 /* Like above, but reverse condition */
11517 /* Check to see if argument to %c is really a constant
11518 and not a condition code which needs to be reversed. */
11519 if (!COMPARISON_P (x))
11521 output_operand_lossage ("operand is neither a constant nor a "
11522 "condition code, invalid operand "
11526 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11529 if (!COMPARISON_P (x))
11531 output_operand_lossage ("operand is neither a constant nor a "
11532 "condition code, invalid operand "
11536 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11537 if (ASSEMBLER_DIALECT == ASM_ATT)
11540 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11544 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11548 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11552 /* It doesn't actually matter what mode we use here, as we're
11553 only going to use this for printing. */
11554 x = adjust_address_nv (x, DImode, 8);
11562 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11565 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11568 int pred_val = INTVAL (XEXP (x, 0));
11570 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11571 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11573 int taken = pred_val > REG_BR_PROB_BASE / 2;
11574 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11576 /* Emit hints only in the case default branch prediction
11577 heuristics would fail. */
11578 if (taken != cputaken)
11580 /* We use 3e (DS) prefix for taken branches and
11581 2e (CS) prefix for not taken branches. */
11583 fputs ("ds ; ", file);
11585 fputs ("cs ; ", file);
11593 switch (GET_CODE (x))
11596 fputs ("neq", file);
11599 fputs ("eq", file);
11603 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11607 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11611 fputs ("le", file);
11615 fputs ("lt", file);
11618 fputs ("unord", file);
11621 fputs ("ord", file);
11624 fputs ("ueq", file);
11627 fputs ("nlt", file);
11630 fputs ("nle", file);
11633 fputs ("ule", file);
11636 fputs ("ult", file);
11639 fputs ("une", file);
11642 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11649 fputs (" ; ", file);
11656 output_operand_lossage ("invalid operand code '%c'", code);
11661 print_reg (x, code, file);
11663 else if (MEM_P (x))
11665 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11666 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11667 && GET_MODE (x) != BLKmode)
11670 switch (GET_MODE_SIZE (GET_MODE (x)))
11672 case 1: size = "BYTE"; break;
11673 case 2: size = "WORD"; break;
11674 case 4: size = "DWORD"; break;
11675 case 8: size = "QWORD"; break;
11676 case 12: size = "XWORD"; break;
11678 if (GET_MODE (x) == XFmode)
11684 gcc_unreachable ();
11687 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11690 else if (code == 'w')
11692 else if (code == 'k')
11695 fputs (size, file);
11696 fputs (" PTR ", file);
11700 /* Avoid (%rip) for call operands. */
11701 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11702 && !CONST_INT_P (x))
11703 output_addr_const (file, x);
11704 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11705 output_operand_lossage ("invalid constraints for operand");
11707 output_address (x);
11710 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11715 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11716 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11718 if (ASSEMBLER_DIALECT == ASM_ATT)
11720 fprintf (file, "0x%08lx", (long unsigned int) l);
11723 /* These float cases don't actually occur as immediate operands. */
11724 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11728 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11729 fprintf (file, "%s", dstr);
11732 else if (GET_CODE (x) == CONST_DOUBLE
11733 && GET_MODE (x) == XFmode)
11737 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11738 fprintf (file, "%s", dstr);
11743 /* We have patterns that allow zero sets of memory, for instance.
11744 In 64-bit mode, we should probably support all 8-byte vectors,
11745 since we can in fact encode that into an immediate. */
11746 if (GET_CODE (x) == CONST_VECTOR)
11748 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11754 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11756 if (ASSEMBLER_DIALECT == ASM_ATT)
11759 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11760 || GET_CODE (x) == LABEL_REF)
11762 if (ASSEMBLER_DIALECT == ASM_ATT)
11765 fputs ("OFFSET FLAT:", file);
11768 if (CONST_INT_P (x))
11769 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11771 output_pic_addr_const (file, x, code);
11773 output_addr_const (file, x);
11777 /* Print a memory operand whose address is ADDR. */
11780 print_operand_address (FILE *file, rtx addr)
11782 struct ix86_address parts;
11783 rtx base, index, disp;
11785 int ok = ix86_decompose_address (addr, &parts);
11790 index = parts.index;
11792 scale = parts.scale;
11800 if (ASSEMBLER_DIALECT == ASM_ATT)
11802 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11805 gcc_unreachable ();
11808 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11809 if (TARGET_64BIT && !base && !index)
11813 if (GET_CODE (disp) == CONST
11814 && GET_CODE (XEXP (disp, 0)) == PLUS
11815 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11816 symbol = XEXP (XEXP (disp, 0), 0);
11818 if (GET_CODE (symbol) == LABEL_REF
11819 || (GET_CODE (symbol) == SYMBOL_REF
11820 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11823 if (!base && !index)
11825 /* Displacement only requires special attention. */
11827 if (CONST_INT_P (disp))
11829 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11830 fputs ("ds:", file);
11831 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11834 output_pic_addr_const (file, disp, 0);
11836 output_addr_const (file, disp);
11840 if (ASSEMBLER_DIALECT == ASM_ATT)
11845 output_pic_addr_const (file, disp, 0);
11846 else if (GET_CODE (disp) == LABEL_REF)
11847 output_asm_label (disp);
11849 output_addr_const (file, disp);
11854 print_reg (base, 0, file);
11858 print_reg (index, 0, file);
11860 fprintf (file, ",%d", scale);
11866 rtx offset = NULL_RTX;
11870 /* Pull out the offset of a symbol; print any symbol itself. */
11871 if (GET_CODE (disp) == CONST
11872 && GET_CODE (XEXP (disp, 0)) == PLUS
11873 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11875 offset = XEXP (XEXP (disp, 0), 1);
11876 disp = gen_rtx_CONST (VOIDmode,
11877 XEXP (XEXP (disp, 0), 0));
11881 output_pic_addr_const (file, disp, 0);
11882 else if (GET_CODE (disp) == LABEL_REF)
11883 output_asm_label (disp);
11884 else if (CONST_INT_P (disp))
11887 output_addr_const (file, disp);
11893 print_reg (base, 0, file);
11896 if (INTVAL (offset) >= 0)
11898 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11902 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11909 print_reg (index, 0, file);
11911 fprintf (file, "*%d", scale);
11919 output_addr_const_extra (FILE *file, rtx x)
11923 if (GET_CODE (x) != UNSPEC)
11926 op = XVECEXP (x, 0, 0);
11927 switch (XINT (x, 1))
11929 case UNSPEC_GOTTPOFF:
11930 output_addr_const (file, op);
11931 /* FIXME: This might be @TPOFF in Sun ld. */
11932 fputs ("@GOTTPOFF", file);
11935 output_addr_const (file, op);
11936 fputs ("@TPOFF", file);
11938 case UNSPEC_NTPOFF:
11939 output_addr_const (file, op);
11941 fputs ("@TPOFF", file);
11943 fputs ("@NTPOFF", file);
11945 case UNSPEC_DTPOFF:
11946 output_addr_const (file, op);
11947 fputs ("@DTPOFF", file);
11949 case UNSPEC_GOTNTPOFF:
11950 output_addr_const (file, op);
11952 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11953 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11955 fputs ("@GOTNTPOFF", file);
11957 case UNSPEC_INDNTPOFF:
11958 output_addr_const (file, op);
11959 fputs ("@INDNTPOFF", file);
11962 case UNSPEC_MACHOPIC_OFFSET:
11963 output_addr_const (file, op);
11965 machopic_output_function_base_name (file);
11976 /* Split one or more DImode RTL references into pairs of SImode
11977 references. The RTL can be REG, offsettable MEM, integer constant, or
11978 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11979 split and "num" is its length. lo_half and hi_half are output arrays
11980 that parallel "operands". */
11983 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11987 rtx op = operands[num];
11989 /* simplify_subreg refuse to split volatile memory addresses,
11990 but we still have to handle it. */
11993 lo_half[num] = adjust_address (op, SImode, 0);
11994 hi_half[num] = adjust_address (op, SImode, 4);
11998 lo_half[num] = simplify_gen_subreg (SImode, op,
11999 GET_MODE (op) == VOIDmode
12000 ? DImode : GET_MODE (op), 0);
12001 hi_half[num] = simplify_gen_subreg (SImode, op,
12002 GET_MODE (op) == VOIDmode
12003 ? DImode : GET_MODE (op), 4);
12007 /* Split one or more TImode RTL references into pairs of DImode
12008 references. The RTL can be REG, offsettable MEM, integer constant, or
12009 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12010 split and "num" is its length. lo_half and hi_half are output arrays
12011 that parallel "operands". */
12014 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12018 rtx op = operands[num];
12020 /* simplify_subreg refuse to split volatile memory addresses, but we
12021 still have to handle it. */
12024 lo_half[num] = adjust_address (op, DImode, 0);
12025 hi_half[num] = adjust_address (op, DImode, 8);
12029 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
12030 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
12035 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
12036 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
12037 is the expression of the binary operation. The output may either be
12038 emitted here, or returned to the caller, like all output_* functions.
12040 There is no guarantee that the operands are the same mode, as they
12041 might be within FLOAT or FLOAT_EXTEND expressions. */
12043 #ifndef SYSV386_COMPAT
12044 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
12045 wants to fix the assemblers because that causes incompatibility
12046 with gcc. No-one wants to fix gcc because that causes
12047 incompatibility with assemblers... You can use the option of
12048 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
12049 #define SYSV386_COMPAT 1
12053 output_387_binary_op (rtx insn, rtx *operands)
12055 static char buf[40];
12058 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
12060 #ifdef ENABLE_CHECKING
12061 /* Even if we do not want to check the inputs, this documents input
12062 constraints. Which helps in understanding the following code. */
12063 if (STACK_REG_P (operands[0])
12064 && ((REG_P (operands[1])
12065 && REGNO (operands[0]) == REGNO (operands[1])
12066 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
12067 || (REG_P (operands[2])
12068 && REGNO (operands[0]) == REGNO (operands[2])
12069 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
12070 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
12073 gcc_assert (is_sse);
12076 switch (GET_CODE (operands[3]))
12079 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12080 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12088 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12089 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12097 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12098 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12106 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12107 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12115 gcc_unreachable ();
12122 strcpy (buf, ssep);
12123 if (GET_MODE (operands[0]) == SFmode)
12124 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
12126 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
12130 strcpy (buf, ssep + 1);
12131 if (GET_MODE (operands[0]) == SFmode)
12132 strcat (buf, "ss\t{%2, %0|%0, %2}");
12134 strcat (buf, "sd\t{%2, %0|%0, %2}");
12140 switch (GET_CODE (operands[3]))
12144 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
12146 rtx temp = operands[2];
12147 operands[2] = operands[1];
12148 operands[1] = temp;
12151 /* know operands[0] == operands[1]. */
12153 if (MEM_P (operands[2]))
12159 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12161 if (STACK_TOP_P (operands[0]))
12162 /* How is it that we are storing to a dead operand[2]?
12163 Well, presumably operands[1] is dead too. We can't
12164 store the result to st(0) as st(0) gets popped on this
12165 instruction. Instead store to operands[2] (which I
12166 think has to be st(1)). st(1) will be popped later.
12167 gcc <= 2.8.1 didn't have this check and generated
12168 assembly code that the Unixware assembler rejected. */
12169 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12171 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12175 if (STACK_TOP_P (operands[0]))
12176 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12178 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12183 if (MEM_P (operands[1]))
12189 if (MEM_P (operands[2]))
12195 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12198 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12199 derived assemblers, confusingly reverse the direction of
12200 the operation for fsub{r} and fdiv{r} when the
12201 destination register is not st(0). The Intel assembler
12202 doesn't have this brain damage. Read !SYSV386_COMPAT to
12203 figure out what the hardware really does. */
12204 if (STACK_TOP_P (operands[0]))
12205 p = "{p\t%0, %2|rp\t%2, %0}";
12207 p = "{rp\t%2, %0|p\t%0, %2}";
12209 if (STACK_TOP_P (operands[0]))
12210 /* As above for fmul/fadd, we can't store to st(0). */
12211 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12213 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12218 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
12221 if (STACK_TOP_P (operands[0]))
12222 p = "{rp\t%0, %1|p\t%1, %0}";
12224 p = "{p\t%1, %0|rp\t%0, %1}";
12226 if (STACK_TOP_P (operands[0]))
12227 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
12229 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
12234 if (STACK_TOP_P (operands[0]))
12236 if (STACK_TOP_P (operands[1]))
12237 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12239 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
12242 else if (STACK_TOP_P (operands[1]))
12245 p = "{\t%1, %0|r\t%0, %1}";
12247 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
12253 p = "{r\t%2, %0|\t%0, %2}";
12255 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12261 gcc_unreachable ();
12268 /* Return needed mode for entity in optimize_mode_switching pass. */
12271 ix86_mode_needed (int entity, rtx insn)
12273 enum attr_i387_cw mode;
12275 /* The mode UNINITIALIZED is used to store control word after a
12276 function call or ASM pattern. The mode ANY specify that function
12277 has no requirements on the control word and make no changes in the
12278 bits we are interested in. */
12281 || (NONJUMP_INSN_P (insn)
12282 && (asm_noperands (PATTERN (insn)) >= 0
12283 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12284 return I387_CW_UNINITIALIZED;
12286 if (recog_memoized (insn) < 0)
12287 return I387_CW_ANY;
12289 mode = get_attr_i387_cw (insn);
12294 if (mode == I387_CW_TRUNC)
12299 if (mode == I387_CW_FLOOR)
12304 if (mode == I387_CW_CEIL)
12309 if (mode == I387_CW_MASK_PM)
12314 gcc_unreachable ();
12317 return I387_CW_ANY;
12320 /* Output code to initialize control word copies used by trunc?f?i and
12321 rounding patterns. CURRENT_MODE is set to current control word,
12322 while NEW_MODE is set to new control word. */
12325 emit_i387_cw_initialization (int mode)
12327 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12330 enum ix86_stack_slot slot;
12332 rtx reg = gen_reg_rtx (HImode);
12334 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12335 emit_move_insn (reg, copy_rtx (stored_mode));
12337 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12338 || optimize_function_for_size_p (cfun))
12342 case I387_CW_TRUNC:
12343 /* round toward zero (truncate) */
12344 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12345 slot = SLOT_CW_TRUNC;
12348 case I387_CW_FLOOR:
12349 /* round down toward -oo */
12350 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12351 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12352 slot = SLOT_CW_FLOOR;
12356 /* round up toward +oo */
12357 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12358 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12359 slot = SLOT_CW_CEIL;
12362 case I387_CW_MASK_PM:
12363 /* mask precision exception for nearbyint() */
12364 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12365 slot = SLOT_CW_MASK_PM;
12369 gcc_unreachable ();
12376 case I387_CW_TRUNC:
12377 /* round toward zero (truncate) */
12378 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12379 slot = SLOT_CW_TRUNC;
12382 case I387_CW_FLOOR:
12383 /* round down toward -oo */
12384 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12385 slot = SLOT_CW_FLOOR;
12389 /* round up toward +oo */
12390 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12391 slot = SLOT_CW_CEIL;
12394 case I387_CW_MASK_PM:
12395 /* mask precision exception for nearbyint() */
12396 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12397 slot = SLOT_CW_MASK_PM;
12401 gcc_unreachable ();
12405 gcc_assert (slot < MAX_386_STACK_LOCALS);
12407 new_mode = assign_386_stack_local (HImode, slot);
12408 emit_move_insn (new_mode, reg);
12411 /* Output code for INSN to convert a float to a signed int. OPERANDS
12412 are the insn operands. The output may be [HSD]Imode and the input
12413 operand may be [SDX]Fmode. */
12416 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12418 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12419 int dimode_p = GET_MODE (operands[0]) == DImode;
12420 int round_mode = get_attr_i387_cw (insn);
12422 /* Jump through a hoop or two for DImode, since the hardware has no
12423 non-popping instruction. We used to do this a different way, but
12424 that was somewhat fragile and broke with post-reload splitters. */
12425 if ((dimode_p || fisttp) && !stack_top_dies)
12426 output_asm_insn ("fld\t%y1", operands);
12428 gcc_assert (STACK_TOP_P (operands[1]));
12429 gcc_assert (MEM_P (operands[0]));
12430 gcc_assert (GET_MODE (operands[1]) != TFmode);
12433 output_asm_insn ("fisttp%Z0\t%0", operands);
12436 if (round_mode != I387_CW_ANY)
12437 output_asm_insn ("fldcw\t%3", operands);
12438 if (stack_top_dies || dimode_p)
12439 output_asm_insn ("fistp%Z0\t%0", operands);
12441 output_asm_insn ("fist%Z0\t%0", operands);
12442 if (round_mode != I387_CW_ANY)
12443 output_asm_insn ("fldcw\t%2", operands);
12449 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12450 have the values zero or one, indicates the ffreep insn's operand
12451 from the OPERANDS array. */
12453 static const char *
12454 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12456 if (TARGET_USE_FFREEP)
12457 #if HAVE_AS_IX86_FFREEP
12458 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12461 static char retval[] = ".word\t0xc_df";
12462 int regno = REGNO (operands[opno]);
12464 gcc_assert (FP_REGNO_P (regno));
12466 retval[9] = '0' + (regno - FIRST_STACK_REG);
12471 return opno ? "fstp\t%y1" : "fstp\t%y0";
12475 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12476 should be used. UNORDERED_P is true when fucom should be used. */
12479 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12481 int stack_top_dies;
12482 rtx cmp_op0, cmp_op1;
12483 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12487 cmp_op0 = operands[0];
12488 cmp_op1 = operands[1];
12492 cmp_op0 = operands[1];
12493 cmp_op1 = operands[2];
12498 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12499 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12500 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12501 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12503 if (GET_MODE (operands[0]) == SFmode)
12505 return &ucomiss[TARGET_AVX ? 0 : 1];
12507 return &comiss[TARGET_AVX ? 0 : 1];
12510 return &ucomisd[TARGET_AVX ? 0 : 1];
12512 return &comisd[TARGET_AVX ? 0 : 1];
12515 gcc_assert (STACK_TOP_P (cmp_op0));
12517 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12519 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12521 if (stack_top_dies)
12523 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12524 return output_387_ffreep (operands, 1);
12527 return "ftst\n\tfnstsw\t%0";
12530 if (STACK_REG_P (cmp_op1)
12532 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12533 && REGNO (cmp_op1) != FIRST_STACK_REG)
12535 /* If both the top of the 387 stack dies, and the other operand
12536 is also a stack register that dies, then this must be a
12537 `fcompp' float compare */
12541 /* There is no double popping fcomi variant. Fortunately,
12542 eflags is immune from the fstp's cc clobbering. */
12544 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12546 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12547 return output_387_ffreep (operands, 0);
12552 return "fucompp\n\tfnstsw\t%0";
12554 return "fcompp\n\tfnstsw\t%0";
12559 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12561 static const char * const alt[16] =
12563 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12564 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12565 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12566 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12568 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12569 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12573 "fcomi\t{%y1, %0|%0, %y1}",
12574 "fcomip\t{%y1, %0|%0, %y1}",
12575 "fucomi\t{%y1, %0|%0, %y1}",
12576 "fucomip\t{%y1, %0|%0, %y1}",
12587 mask = eflags_p << 3;
12588 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12589 mask |= unordered_p << 1;
12590 mask |= stack_top_dies;
12592 gcc_assert (mask < 16);
12601 ix86_output_addr_vec_elt (FILE *file, int value)
12603 const char *directive = ASM_LONG;
12607 directive = ASM_QUAD;
12609 gcc_assert (!TARGET_64BIT);
12612 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12616 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12618 const char *directive = ASM_LONG;
12621 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12622 directive = ASM_QUAD;
12624 gcc_assert (!TARGET_64BIT);
12626 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12627 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12628 fprintf (file, "%s%s%d-%s%d\n",
12629 directive, LPREFIX, value, LPREFIX, rel);
12630 else if (HAVE_AS_GOTOFF_IN_DATA)
12631 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12633 else if (TARGET_MACHO)
12635 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12636 machopic_output_function_base_name (file);
12637 fprintf(file, "\n");
12641 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12642 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12645 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12649 ix86_expand_clear (rtx dest)
12653 /* We play register width games, which are only valid after reload. */
12654 gcc_assert (reload_completed);
12656 /* Avoid HImode and its attendant prefix byte. */
12657 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12658 dest = gen_rtx_REG (SImode, REGNO (dest));
12659 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12661 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12662 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12664 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12665 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12671 /* X is an unchanging MEM. If it is a constant pool reference, return
12672 the constant pool rtx, else NULL. */
12675 maybe_get_pool_constant (rtx x)
12677 x = ix86_delegitimize_address (XEXP (x, 0));
12679 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12680 return get_pool_constant (x);
12686 ix86_expand_move (enum machine_mode mode, rtx operands[])
12689 enum tls_model model;
12694 if (GET_CODE (op1) == SYMBOL_REF)
12696 model = SYMBOL_REF_TLS_MODEL (op1);
12699 op1 = legitimize_tls_address (op1, model, true);
12700 op1 = force_operand (op1, op0);
12704 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12705 && SYMBOL_REF_DLLIMPORT_P (op1))
12706 op1 = legitimize_dllimport_symbol (op1, false);
12708 else if (GET_CODE (op1) == CONST
12709 && GET_CODE (XEXP (op1, 0)) == PLUS
12710 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12712 rtx addend = XEXP (XEXP (op1, 0), 1);
12713 rtx symbol = XEXP (XEXP (op1, 0), 0);
12716 model = SYMBOL_REF_TLS_MODEL (symbol);
12718 tmp = legitimize_tls_address (symbol, model, true);
12719 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12720 && SYMBOL_REF_DLLIMPORT_P (symbol))
12721 tmp = legitimize_dllimport_symbol (symbol, true);
12725 tmp = force_operand (tmp, NULL);
12726 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12727 op0, 1, OPTAB_DIRECT);
12733 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12735 if (TARGET_MACHO && !TARGET_64BIT)
12740 rtx temp = ((reload_in_progress
12741 || ((op0 && REG_P (op0))
12743 ? op0 : gen_reg_rtx (Pmode));
12744 op1 = machopic_indirect_data_reference (op1, temp);
12745 op1 = machopic_legitimize_pic_address (op1, mode,
12746 temp == op1 ? 0 : temp);
12748 else if (MACHOPIC_INDIRECT)
12749 op1 = machopic_indirect_data_reference (op1, 0);
12757 op1 = force_reg (Pmode, op1);
12758 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12760 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12761 op1 = legitimize_pic_address (op1, reg);
12770 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12771 || !push_operand (op0, mode))
12773 op1 = force_reg (mode, op1);
12775 if (push_operand (op0, mode)
12776 && ! general_no_elim_operand (op1, mode))
12777 op1 = copy_to_mode_reg (mode, op1);
12779 /* Force large constants in 64bit compilation into register
12780 to get them CSEed. */
12781 if (can_create_pseudo_p ()
12782 && (mode == DImode) && TARGET_64BIT
12783 && immediate_operand (op1, mode)
12784 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12785 && !register_operand (op0, mode)
12787 op1 = copy_to_mode_reg (mode, op1);
12789 if (can_create_pseudo_p ()
12790 && FLOAT_MODE_P (mode)
12791 && GET_CODE (op1) == CONST_DOUBLE)
12793 /* If we are loading a floating point constant to a register,
12794 force the value to memory now, since we'll get better code
12795 out the back end. */
12797 op1 = validize_mem (force_const_mem (mode, op1));
12798 if (!register_operand (op0, mode))
12800 rtx temp = gen_reg_rtx (mode);
12801 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12802 emit_move_insn (op0, temp);
12808 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12812 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12814 rtx op0 = operands[0], op1 = operands[1];
12815 unsigned int align = GET_MODE_ALIGNMENT (mode);
12817 /* Force constants other than zero into memory. We do not know how
12818 the instructions used to build constants modify the upper 64 bits
12819 of the register, once we have that information we may be able
12820 to handle some of them more efficiently. */
12821 if (can_create_pseudo_p ()
12822 && register_operand (op0, mode)
12823 && (CONSTANT_P (op1)
12824 || (GET_CODE (op1) == SUBREG
12825 && CONSTANT_P (SUBREG_REG (op1))))
12826 && standard_sse_constant_p (op1) <= 0)
12827 op1 = validize_mem (force_const_mem (mode, op1));
12829 /* We need to check memory alignment for SSE mode since attribute
12830 can make operands unaligned. */
12831 if (can_create_pseudo_p ()
12832 && SSE_REG_MODE_P (mode)
12833 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12834 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12838 /* ix86_expand_vector_move_misalign() does not like constants ... */
12839 if (CONSTANT_P (op1)
12840 || (GET_CODE (op1) == SUBREG
12841 && CONSTANT_P (SUBREG_REG (op1))))
12842 op1 = validize_mem (force_const_mem (mode, op1));
12844 /* ... nor both arguments in memory. */
12845 if (!register_operand (op0, mode)
12846 && !register_operand (op1, mode))
12847 op1 = force_reg (mode, op1);
12849 tmp[0] = op0; tmp[1] = op1;
12850 ix86_expand_vector_move_misalign (mode, tmp);
12854 /* Make operand1 a register if it isn't already. */
12855 if (can_create_pseudo_p ()
12856 && !register_operand (op0, mode)
12857 && !register_operand (op1, mode))
12859 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12863 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12866 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12867 straight to ix86_expand_vector_move. */
12868 /* Code generation for scalar reg-reg moves of single and double precision data:
12869 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12873 if (x86_sse_partial_reg_dependency == true)
12878 Code generation for scalar loads of double precision data:
12879 if (x86_sse_split_regs == true)
12880 movlpd mem, reg (gas syntax)
12884 Code generation for unaligned packed loads of single precision data
12885 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12886 if (x86_sse_unaligned_move_optimal)
12889 if (x86_sse_partial_reg_dependency == true)
12901 Code generation for unaligned packed loads of double precision data
12902 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12903 if (x86_sse_unaligned_move_optimal)
12906 if (x86_sse_split_regs == true)
12919 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12928 switch (GET_MODE_CLASS (mode))
12930 case MODE_VECTOR_INT:
12932 switch (GET_MODE_SIZE (mode))
12935 op0 = gen_lowpart (V16QImode, op0);
12936 op1 = gen_lowpart (V16QImode, op1);
12937 emit_insn (gen_avx_movdqu (op0, op1));
12940 op0 = gen_lowpart (V32QImode, op0);
12941 op1 = gen_lowpart (V32QImode, op1);
12942 emit_insn (gen_avx_movdqu256 (op0, op1));
12945 gcc_unreachable ();
12948 case MODE_VECTOR_FLOAT:
12949 op0 = gen_lowpart (mode, op0);
12950 op1 = gen_lowpart (mode, op1);
12955 emit_insn (gen_avx_movups (op0, op1));
12958 emit_insn (gen_avx_movups256 (op0, op1));
12961 emit_insn (gen_avx_movupd (op0, op1));
12964 emit_insn (gen_avx_movupd256 (op0, op1));
12967 gcc_unreachable ();
12972 gcc_unreachable ();
12980 /* If we're optimizing for size, movups is the smallest. */
12981 if (optimize_insn_for_size_p ())
12983 op0 = gen_lowpart (V4SFmode, op0);
12984 op1 = gen_lowpart (V4SFmode, op1);
12985 emit_insn (gen_sse_movups (op0, op1));
12989 /* ??? If we have typed data, then it would appear that using
12990 movdqu is the only way to get unaligned data loaded with
12992 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12994 op0 = gen_lowpart (V16QImode, op0);
12995 op1 = gen_lowpart (V16QImode, op1);
12996 emit_insn (gen_sse2_movdqu (op0, op1));
13000 if (TARGET_SSE2 && mode == V2DFmode)
13004 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13006 op0 = gen_lowpart (V2DFmode, op0);
13007 op1 = gen_lowpart (V2DFmode, op1);
13008 emit_insn (gen_sse2_movupd (op0, op1));
13012 /* When SSE registers are split into halves, we can avoid
13013 writing to the top half twice. */
13014 if (TARGET_SSE_SPLIT_REGS)
13016 emit_clobber (op0);
13021 /* ??? Not sure about the best option for the Intel chips.
13022 The following would seem to satisfy; the register is
13023 entirely cleared, breaking the dependency chain. We
13024 then store to the upper half, with a dependency depth
13025 of one. A rumor has it that Intel recommends two movsd
13026 followed by an unpacklpd, but this is unconfirmed. And
13027 given that the dependency depth of the unpacklpd would
13028 still be one, I'm not sure why this would be better. */
13029 zero = CONST0_RTX (V2DFmode);
13032 m = adjust_address (op1, DFmode, 0);
13033 emit_insn (gen_sse2_loadlpd (op0, zero, m));
13034 m = adjust_address (op1, DFmode, 8);
13035 emit_insn (gen_sse2_loadhpd (op0, op0, m));
13039 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13041 op0 = gen_lowpart (V4SFmode, op0);
13042 op1 = gen_lowpart (V4SFmode, op1);
13043 emit_insn (gen_sse_movups (op0, op1));
13047 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
13048 emit_move_insn (op0, CONST0_RTX (mode));
13050 emit_clobber (op0);
13052 if (mode != V4SFmode)
13053 op0 = gen_lowpart (V4SFmode, op0);
13054 m = adjust_address (op1, V2SFmode, 0);
13055 emit_insn (gen_sse_loadlps (op0, op0, m));
13056 m = adjust_address (op1, V2SFmode, 8);
13057 emit_insn (gen_sse_loadhps (op0, op0, m));
13060 else if (MEM_P (op0))
13062 /* If we're optimizing for size, movups is the smallest. */
13063 if (optimize_insn_for_size_p ())
13065 op0 = gen_lowpart (V4SFmode, op0);
13066 op1 = gen_lowpart (V4SFmode, op1);
13067 emit_insn (gen_sse_movups (op0, op1));
13071 /* ??? Similar to above, only less clear because of quote
13072 typeless stores unquote. */
13073 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
13074 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13076 op0 = gen_lowpart (V16QImode, op0);
13077 op1 = gen_lowpart (V16QImode, op1);
13078 emit_insn (gen_sse2_movdqu (op0, op1));
13082 if (TARGET_SSE2 && mode == V2DFmode)
13084 m = adjust_address (op0, DFmode, 0);
13085 emit_insn (gen_sse2_storelpd (m, op1));
13086 m = adjust_address (op0, DFmode, 8);
13087 emit_insn (gen_sse2_storehpd (m, op1));
13091 if (mode != V4SFmode)
13092 op1 = gen_lowpart (V4SFmode, op1);
13093 m = adjust_address (op0, V2SFmode, 0);
13094 emit_insn (gen_sse_storelps (m, op1));
13095 m = adjust_address (op0, V2SFmode, 8);
13096 emit_insn (gen_sse_storehps (m, op1));
13100 gcc_unreachable ();
13103 /* Expand a push in MODE. This is some mode for which we do not support
13104 proper push instructions, at least from the registers that we expect
13105 the value to live in. */
13108 ix86_expand_push (enum machine_mode mode, rtx x)
13112 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
13113 GEN_INT (-GET_MODE_SIZE (mode)),
13114 stack_pointer_rtx, 1, OPTAB_DIRECT);
13115 if (tmp != stack_pointer_rtx)
13116 emit_move_insn (stack_pointer_rtx, tmp);
13118 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
13120 /* When we push an operand onto stack, it has to be aligned at least
13121 at the function argument boundary. However since we don't have
13122 the argument type, we can't determine the actual argument
13124 emit_move_insn (tmp, x);
13127 /* Helper function of ix86_fixup_binary_operands to canonicalize
13128 operand order. Returns true if the operands should be swapped. */
13131 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
13134 rtx dst = operands[0];
13135 rtx src1 = operands[1];
13136 rtx src2 = operands[2];
13138 /* If the operation is not commutative, we can't do anything. */
13139 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
13142 /* Highest priority is that src1 should match dst. */
13143 if (rtx_equal_p (dst, src1))
13145 if (rtx_equal_p (dst, src2))
13148 /* Next highest priority is that immediate constants come second. */
13149 if (immediate_operand (src2, mode))
13151 if (immediate_operand (src1, mode))
13154 /* Lowest priority is that memory references should come second. */
13164 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
13165 destination to use for the operation. If different from the true
13166 destination in operands[0], a copy operation will be required. */
13169 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
13172 rtx dst = operands[0];
13173 rtx src1 = operands[1];
13174 rtx src2 = operands[2];
13176 /* Canonicalize operand order. */
13177 if (ix86_swap_binary_operands_p (code, mode, operands))
13181 /* It is invalid to swap operands of different modes. */
13182 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
13189 /* Both source operands cannot be in memory. */
13190 if (MEM_P (src1) && MEM_P (src2))
13192 /* Optimization: Only read from memory once. */
13193 if (rtx_equal_p (src1, src2))
13195 src2 = force_reg (mode, src2);
13199 src2 = force_reg (mode, src2);
13202 /* If the destination is memory, and we do not have matching source
13203 operands, do things in registers. */
13204 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13205 dst = gen_reg_rtx (mode);
13207 /* Source 1 cannot be a constant. */
13208 if (CONSTANT_P (src1))
13209 src1 = force_reg (mode, src1);
13211 /* Source 1 cannot be a non-matching memory. */
13212 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13213 src1 = force_reg (mode, src1);
13215 operands[1] = src1;
13216 operands[2] = src2;
13220 /* Similarly, but assume that the destination has already been
13221 set up properly. */
13224 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
13225 enum machine_mode mode, rtx operands[])
13227 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
13228 gcc_assert (dst == operands[0]);
13231 /* Attempt to expand a binary operator. Make the expansion closer to the
13232 actual machine, then just general_operand, which will allow 3 separate
13233 memory references (one output, two input) in a single insn. */
13236 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
13239 rtx src1, src2, dst, op, clob;
13241 dst = ix86_fixup_binary_operands (code, mode, operands);
13242 src1 = operands[1];
13243 src2 = operands[2];
13245 /* Emit the instruction. */
13247 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
13248 if (reload_in_progress)
13250 /* Reload doesn't know about the flags register, and doesn't know that
13251 it doesn't want to clobber it. We can only do this with PLUS. */
13252 gcc_assert (code == PLUS);
13257 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13258 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13261 /* Fix up the destination if needed. */
13262 if (dst != operands[0])
13263 emit_move_insn (operands[0], dst);
13266 /* Return TRUE or FALSE depending on whether the binary operator meets the
13267 appropriate constraints. */
13270 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13273 rtx dst = operands[0];
13274 rtx src1 = operands[1];
13275 rtx src2 = operands[2];
13277 /* Both source operands cannot be in memory. */
13278 if (MEM_P (src1) && MEM_P (src2))
13281 /* Canonicalize operand order for commutative operators. */
13282 if (ix86_swap_binary_operands_p (code, mode, operands))
13289 /* If the destination is memory, we must have a matching source operand. */
13290 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13293 /* Source 1 cannot be a constant. */
13294 if (CONSTANT_P (src1))
13297 /* Source 1 cannot be a non-matching memory. */
13298 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13304 /* Attempt to expand a unary operator. Make the expansion closer to the
13305 actual machine, then just general_operand, which will allow 2 separate
13306 memory references (one output, one input) in a single insn. */
13309 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13312 int matching_memory;
13313 rtx src, dst, op, clob;
13318 /* If the destination is memory, and we do not have matching source
13319 operands, do things in registers. */
13320 matching_memory = 0;
13323 if (rtx_equal_p (dst, src))
13324 matching_memory = 1;
13326 dst = gen_reg_rtx (mode);
13329 /* When source operand is memory, destination must match. */
13330 if (MEM_P (src) && !matching_memory)
13331 src = force_reg (mode, src);
13333 /* Emit the instruction. */
13335 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13336 if (reload_in_progress || code == NOT)
13338 /* Reload doesn't know about the flags register, and doesn't know that
13339 it doesn't want to clobber it. */
13340 gcc_assert (code == NOT);
13345 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13346 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13349 /* Fix up the destination if needed. */
13350 if (dst != operands[0])
13351 emit_move_insn (operands[0], dst);
13354 #define LEA_SEARCH_THRESHOLD 12
13356 /* Search backward for non-agu definition of register number REGNO1
13357 or register number REGNO2 in INSN's basic block until
13358 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13359 2. Reach BB boundary, or
13360 3. Reach agu definition.
13361 Returns the distance between the non-agu definition point and INSN.
13362 If no definition point, returns -1. */
13365 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13368 basic_block bb = BLOCK_FOR_INSN (insn);
13371 enum attr_type insn_type;
13373 if (insn != BB_HEAD (bb))
13375 rtx prev = PREV_INSN (insn);
13376 while (prev && distance < LEA_SEARCH_THRESHOLD)
13381 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13382 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13383 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13384 && (regno1 == DF_REF_REGNO (*def_rec)
13385 || regno2 == DF_REF_REGNO (*def_rec)))
13387 insn_type = get_attr_type (prev);
13388 if (insn_type != TYPE_LEA)
13392 if (prev == BB_HEAD (bb))
13394 prev = PREV_INSN (prev);
13398 if (distance < LEA_SEARCH_THRESHOLD)
13402 bool simple_loop = false;
13404 FOR_EACH_EDGE (e, ei, bb->preds)
13407 simple_loop = true;
13413 rtx prev = BB_END (bb);
13416 && distance < LEA_SEARCH_THRESHOLD)
13421 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13422 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13423 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13424 && (regno1 == DF_REF_REGNO (*def_rec)
13425 || regno2 == DF_REF_REGNO (*def_rec)))
13427 insn_type = get_attr_type (prev);
13428 if (insn_type != TYPE_LEA)
13432 prev = PREV_INSN (prev);
13440 /* get_attr_type may modify recog data. We want to make sure
13441 that recog data is valid for instruction INSN, on which
13442 distance_non_agu_define is called. INSN is unchanged here. */
13443 extract_insn_cached (insn);
13447 /* Return the distance between INSN and the next insn that uses
13448 register number REGNO0 in memory address. Return -1 if no such
13449 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13452 distance_agu_use (unsigned int regno0, rtx insn)
13454 basic_block bb = BLOCK_FOR_INSN (insn);
13459 if (insn != BB_END (bb))
13461 rtx next = NEXT_INSN (insn);
13462 while (next && distance < LEA_SEARCH_THRESHOLD)
13468 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13469 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13470 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13471 && regno0 == DF_REF_REGNO (*use_rec))
13473 /* Return DISTANCE if OP0 is used in memory
13474 address in NEXT. */
13478 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13479 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13480 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13481 && regno0 == DF_REF_REGNO (*def_rec))
13483 /* Return -1 if OP0 is set in NEXT. */
13487 if (next == BB_END (bb))
13489 next = NEXT_INSN (next);
13493 if (distance < LEA_SEARCH_THRESHOLD)
13497 bool simple_loop = false;
13499 FOR_EACH_EDGE (e, ei, bb->succs)
13502 simple_loop = true;
13508 rtx next = BB_HEAD (bb);
13511 && distance < LEA_SEARCH_THRESHOLD)
13517 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13518 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13519 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13520 && regno0 == DF_REF_REGNO (*use_rec))
13522 /* Return DISTANCE if OP0 is used in memory
13523 address in NEXT. */
13527 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13528 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13529 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13530 && regno0 == DF_REF_REGNO (*def_rec))
13532 /* Return -1 if OP0 is set in NEXT. */
13537 next = NEXT_INSN (next);
13545 /* Define this macro to tune LEA priority vs ADD, it take effect when
13546 there is a dilemma of choicing LEA or ADD
13547 Negative value: ADD is more preferred than LEA
13549 Positive value: LEA is more preferred than ADD*/
13550 #define IX86_LEA_PRIORITY 2
13552 /* Return true if it is ok to optimize an ADD operation to LEA
13553 operation to avoid flag register consumation. For the processors
13554 like ATOM, if the destination register of LEA holds an actual
13555 address which will be used soon, LEA is better and otherwise ADD
13559 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13560 rtx insn, rtx operands[])
13562 unsigned int regno0 = true_regnum (operands[0]);
13563 unsigned int regno1 = true_regnum (operands[1]);
13564 unsigned int regno2;
13566 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13567 return regno0 != regno1;
13569 regno2 = true_regnum (operands[2]);
13571 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13572 if (regno0 != regno1 && regno0 != regno2)
13576 int dist_define, dist_use;
13577 dist_define = distance_non_agu_define (regno1, regno2, insn);
13578 if (dist_define <= 0)
13581 /* If this insn has both backward non-agu dependence and forward
13582 agu dependence, the one with short distance take effect. */
13583 dist_use = distance_agu_use (regno0, insn);
13585 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13592 /* Return true if destination reg of SET_BODY is shift count of
13596 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13602 /* Retrieve destination of SET_BODY. */
13603 switch (GET_CODE (set_body))
13606 set_dest = SET_DEST (set_body);
13607 if (!set_dest || !REG_P (set_dest))
13611 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13612 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13620 /* Retrieve shift count of USE_BODY. */
13621 switch (GET_CODE (use_body))
13624 shift_rtx = XEXP (use_body, 1);
13627 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13628 if (ix86_dep_by_shift_count_body (set_body,
13629 XVECEXP (use_body, 0, i)))
13637 && (GET_CODE (shift_rtx) == ASHIFT
13638 || GET_CODE (shift_rtx) == LSHIFTRT
13639 || GET_CODE (shift_rtx) == ASHIFTRT
13640 || GET_CODE (shift_rtx) == ROTATE
13641 || GET_CODE (shift_rtx) == ROTATERT))
13643 rtx shift_count = XEXP (shift_rtx, 1);
13645 /* Return true if shift count is dest of SET_BODY. */
13646 if (REG_P (shift_count)
13647 && true_regnum (set_dest) == true_regnum (shift_count))
13654 /* Return true if destination reg of SET_INSN is shift count of
13658 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13660 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13661 PATTERN (use_insn));
13664 /* Return TRUE or FALSE depending on whether the unary operator meets the
13665 appropriate constraints. */
13668 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13669 enum machine_mode mode ATTRIBUTE_UNUSED,
13670 rtx operands[2] ATTRIBUTE_UNUSED)
13672 /* If one of operands is memory, source and destination must match. */
13673 if ((MEM_P (operands[0])
13674 || MEM_P (operands[1]))
13675 && ! rtx_equal_p (operands[0], operands[1]))
13680 /* Post-reload splitter for converting an SF or DFmode value in an
13681 SSE register into an unsigned SImode. */
13684 ix86_split_convert_uns_si_sse (rtx operands[])
13686 enum machine_mode vecmode;
13687 rtx value, large, zero_or_two31, input, two31, x;
13689 large = operands[1];
13690 zero_or_two31 = operands[2];
13691 input = operands[3];
13692 two31 = operands[4];
13693 vecmode = GET_MODE (large);
13694 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13696 /* Load up the value into the low element. We must ensure that the other
13697 elements are valid floats -- zero is the easiest such value. */
13700 if (vecmode == V4SFmode)
13701 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13703 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13707 input = gen_rtx_REG (vecmode, REGNO (input));
13708 emit_move_insn (value, CONST0_RTX (vecmode));
13709 if (vecmode == V4SFmode)
13710 emit_insn (gen_sse_movss (value, value, input));
13712 emit_insn (gen_sse2_movsd (value, value, input));
13715 emit_move_insn (large, two31);
13716 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13718 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13719 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13721 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13722 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13724 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13725 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13727 large = gen_rtx_REG (V4SImode, REGNO (large));
13728 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13730 x = gen_rtx_REG (V4SImode, REGNO (value));
13731 if (vecmode == V4SFmode)
13732 emit_insn (gen_sse2_cvttps2dq (x, value));
13734 emit_insn (gen_sse2_cvttpd2dq (x, value));
13737 emit_insn (gen_xorv4si3 (value, value, large));
13740 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13741 Expects the 64-bit DImode to be supplied in a pair of integral
13742 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13743 -mfpmath=sse, !optimize_size only. */
13746 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13748 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13749 rtx int_xmm, fp_xmm;
13750 rtx biases, exponents;
13753 int_xmm = gen_reg_rtx (V4SImode);
13754 if (TARGET_INTER_UNIT_MOVES)
13755 emit_insn (gen_movdi_to_sse (int_xmm, input));
13756 else if (TARGET_SSE_SPLIT_REGS)
13758 emit_clobber (int_xmm);
13759 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13763 x = gen_reg_rtx (V2DImode);
13764 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13765 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13768 x = gen_rtx_CONST_VECTOR (V4SImode,
13769 gen_rtvec (4, GEN_INT (0x43300000UL),
13770 GEN_INT (0x45300000UL),
13771 const0_rtx, const0_rtx));
13772 exponents = validize_mem (force_const_mem (V4SImode, x));
13774 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13775 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13777 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13778 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13779 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13780 (0x1.0p84 + double(fp_value_hi_xmm)).
13781 Note these exponents differ by 32. */
13783 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13785 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13786 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13787 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13788 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13789 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13790 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13791 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13792 biases = validize_mem (force_const_mem (V2DFmode, biases));
13793 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13795 /* Add the upper and lower DFmode values together. */
13797 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13800 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13801 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13802 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13805 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13808 /* Not used, but eases macroization of patterns. */
13810 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13811 rtx input ATTRIBUTE_UNUSED)
13813 gcc_unreachable ();
13816 /* Convert an unsigned SImode value into a DFmode. Only currently used
13817 for SSE, but applicable anywhere. */
13820 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13822 REAL_VALUE_TYPE TWO31r;
13825 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13826 NULL, 1, OPTAB_DIRECT);
13828 fp = gen_reg_rtx (DFmode);
13829 emit_insn (gen_floatsidf2 (fp, x));
13831 real_ldexp (&TWO31r, &dconst1, 31);
13832 x = const_double_from_real_value (TWO31r, DFmode);
13834 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13836 emit_move_insn (target, x);
13839 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13840 32-bit mode; otherwise we have a direct convert instruction. */
13843 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13845 REAL_VALUE_TYPE TWO32r;
13846 rtx fp_lo, fp_hi, x;
13848 fp_lo = gen_reg_rtx (DFmode);
13849 fp_hi = gen_reg_rtx (DFmode);
13851 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13853 real_ldexp (&TWO32r, &dconst1, 32);
13854 x = const_double_from_real_value (TWO32r, DFmode);
13855 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13857 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13859 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13862 emit_move_insn (target, x);
13865 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13866 For x86_32, -mfpmath=sse, !optimize_size only. */
13868 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13870 REAL_VALUE_TYPE ONE16r;
13871 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13873 real_ldexp (&ONE16r, &dconst1, 16);
13874 x = const_double_from_real_value (ONE16r, SFmode);
13875 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13876 NULL, 0, OPTAB_DIRECT);
13877 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13878 NULL, 0, OPTAB_DIRECT);
13879 fp_hi = gen_reg_rtx (SFmode);
13880 fp_lo = gen_reg_rtx (SFmode);
13881 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13882 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13883 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13885 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13887 if (!rtx_equal_p (target, fp_hi))
13888 emit_move_insn (target, fp_hi);
13891 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13892 then replicate the value for all elements of the vector
13896 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13903 v = gen_rtvec (4, value, value, value, value);
13904 return gen_rtx_CONST_VECTOR (V4SImode, v);
13908 v = gen_rtvec (2, value, value);
13909 return gen_rtx_CONST_VECTOR (V2DImode, v);
13913 v = gen_rtvec (4, value, value, value, value);
13915 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13916 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13917 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13921 v = gen_rtvec (2, value, value);
13923 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13924 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13927 gcc_unreachable ();
13931 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13932 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13933 for an SSE register. If VECT is true, then replicate the mask for
13934 all elements of the vector register. If INVERT is true, then create
13935 a mask excluding the sign bit. */
13938 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13940 enum machine_mode vec_mode, imode;
13941 HOST_WIDE_INT hi, lo;
13946 /* Find the sign bit, sign extended to 2*HWI. */
13952 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13953 lo = 0x80000000, hi = lo < 0;
13959 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13960 if (HOST_BITS_PER_WIDE_INT >= 64)
13961 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13963 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13968 vec_mode = VOIDmode;
13969 if (HOST_BITS_PER_WIDE_INT >= 64)
13972 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13979 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13983 lo = ~lo, hi = ~hi;
13989 mask = immed_double_const (lo, hi, imode);
13991 vec = gen_rtvec (2, v, mask);
13992 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13993 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
14000 gcc_unreachable ();
14004 lo = ~lo, hi = ~hi;
14006 /* Force this value into the low part of a fp vector constant. */
14007 mask = immed_double_const (lo, hi, imode);
14008 mask = gen_lowpart (mode, mask);
14010 if (vec_mode == VOIDmode)
14011 return force_reg (mode, mask);
14013 v = ix86_build_const_vector (mode, vect, mask);
14014 return force_reg (vec_mode, v);
14017 /* Generate code for floating point ABS or NEG. */
14020 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
14023 rtx mask, set, use, clob, dst, src;
14024 bool use_sse = false;
14025 bool vector_mode = VECTOR_MODE_P (mode);
14026 enum machine_mode elt_mode = mode;
14030 elt_mode = GET_MODE_INNER (mode);
14033 else if (mode == TFmode)
14035 else if (TARGET_SSE_MATH)
14036 use_sse = SSE_FLOAT_MODE_P (mode);
14038 /* NEG and ABS performed with SSE use bitwise mask operations.
14039 Create the appropriate mask now. */
14041 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
14050 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
14051 set = gen_rtx_SET (VOIDmode, dst, set);
14056 set = gen_rtx_fmt_e (code, mode, src);
14057 set = gen_rtx_SET (VOIDmode, dst, set);
14060 use = gen_rtx_USE (VOIDmode, mask);
14061 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14062 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14063 gen_rtvec (3, set, use, clob)));
14070 /* Expand a copysign operation. Special case operand 0 being a constant. */
14073 ix86_expand_copysign (rtx operands[])
14075 enum machine_mode mode;
14076 rtx dest, op0, op1, mask, nmask;
14078 dest = operands[0];
14082 mode = GET_MODE (dest);
14084 if (GET_CODE (op0) == CONST_DOUBLE)
14086 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
14088 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
14089 op0 = simplify_unary_operation (ABS, mode, op0, mode);
14091 if (mode == SFmode || mode == DFmode)
14093 enum machine_mode vmode;
14095 vmode = mode == SFmode ? V4SFmode : V2DFmode;
14097 if (op0 == CONST0_RTX (mode))
14098 op0 = CONST0_RTX (vmode);
14103 if (mode == SFmode)
14104 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
14105 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14107 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
14109 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
14112 else if (op0 != CONST0_RTX (mode))
14113 op0 = force_reg (mode, op0);
14115 mask = ix86_build_signbit_mask (mode, 0, 0);
14117 if (mode == SFmode)
14118 copysign_insn = gen_copysignsf3_const;
14119 else if (mode == DFmode)
14120 copysign_insn = gen_copysigndf3_const;
14122 copysign_insn = gen_copysigntf3_const;
14124 emit_insn (copysign_insn (dest, op0, op1, mask));
14128 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
14130 nmask = ix86_build_signbit_mask (mode, 0, 1);
14131 mask = ix86_build_signbit_mask (mode, 0, 0);
14133 if (mode == SFmode)
14134 copysign_insn = gen_copysignsf3_var;
14135 else if (mode == DFmode)
14136 copysign_insn = gen_copysigndf3_var;
14138 copysign_insn = gen_copysigntf3_var;
14140 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
14144 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
14145 be a constant, and so has already been expanded into a vector constant. */
14148 ix86_split_copysign_const (rtx operands[])
14150 enum machine_mode mode, vmode;
14151 rtx dest, op0, op1, mask, x;
14153 dest = operands[0];
14156 mask = operands[3];
14158 mode = GET_MODE (dest);
14159 vmode = GET_MODE (mask);
14161 dest = simplify_gen_subreg (vmode, dest, mode, 0);
14162 x = gen_rtx_AND (vmode, dest, mask);
14163 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14165 if (op0 != CONST0_RTX (vmode))
14167 x = gen_rtx_IOR (vmode, dest, op0);
14168 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14172 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
14173 so we have to do two masks. */
14176 ix86_split_copysign_var (rtx operands[])
14178 enum machine_mode mode, vmode;
14179 rtx dest, scratch, op0, op1, mask, nmask, x;
14181 dest = operands[0];
14182 scratch = operands[1];
14185 nmask = operands[4];
14186 mask = operands[5];
14188 mode = GET_MODE (dest);
14189 vmode = GET_MODE (mask);
14191 if (rtx_equal_p (op0, op1))
14193 /* Shouldn't happen often (it's useless, obviously), but when it does
14194 we'd generate incorrect code if we continue below. */
14195 emit_move_insn (dest, op0);
14199 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
14201 gcc_assert (REGNO (op1) == REGNO (scratch));
14203 x = gen_rtx_AND (vmode, scratch, mask);
14204 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14207 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14208 x = gen_rtx_NOT (vmode, dest);
14209 x = gen_rtx_AND (vmode, x, op0);
14210 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14214 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
14216 x = gen_rtx_AND (vmode, scratch, mask);
14218 else /* alternative 2,4 */
14220 gcc_assert (REGNO (mask) == REGNO (scratch));
14221 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
14222 x = gen_rtx_AND (vmode, scratch, op1);
14224 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14226 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
14228 dest = simplify_gen_subreg (vmode, op0, mode, 0);
14229 x = gen_rtx_AND (vmode, dest, nmask);
14231 else /* alternative 3,4 */
14233 gcc_assert (REGNO (nmask) == REGNO (dest));
14235 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14236 x = gen_rtx_AND (vmode, dest, op0);
14238 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14241 x = gen_rtx_IOR (vmode, dest, scratch);
14242 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14245 /* Return TRUE or FALSE depending on whether the first SET in INSN
14246 has source and destination with matching CC modes, and that the
14247 CC mode is at least as constrained as REQ_MODE. */
14250 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
14253 enum machine_mode set_mode;
14255 set = PATTERN (insn);
14256 if (GET_CODE (set) == PARALLEL)
14257 set = XVECEXP (set, 0, 0);
14258 gcc_assert (GET_CODE (set) == SET);
14259 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14261 set_mode = GET_MODE (SET_DEST (set));
14265 if (req_mode != CCNOmode
14266 && (req_mode != CCmode
14267 || XEXP (SET_SRC (set), 1) != const0_rtx))
14271 if (req_mode == CCGCmode)
14275 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14279 if (req_mode == CCZmode)
14290 gcc_unreachable ();
14293 return (GET_MODE (SET_SRC (set)) == set_mode);
14296 /* Generate insn patterns to do an integer compare of OPERANDS. */
14299 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14301 enum machine_mode cmpmode;
14304 cmpmode = SELECT_CC_MODE (code, op0, op1);
14305 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14307 /* This is very simple, but making the interface the same as in the
14308 FP case makes the rest of the code easier. */
14309 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14310 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14312 /* Return the test that should be put into the flags user, i.e.
14313 the bcc, scc, or cmov instruction. */
14314 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14317 /* Figure out whether to use ordered or unordered fp comparisons.
14318 Return the appropriate mode to use. */
14321 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14323 /* ??? In order to make all comparisons reversible, we do all comparisons
14324 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14325 all forms trapping and nontrapping comparisons, we can make inequality
14326 comparisons trapping again, since it results in better code when using
14327 FCOM based compares. */
14328 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14332 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14334 enum machine_mode mode = GET_MODE (op0);
14336 if (SCALAR_FLOAT_MODE_P (mode))
14338 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14339 return ix86_fp_compare_mode (code);
14344 /* Only zero flag is needed. */
14345 case EQ: /* ZF=0 */
14346 case NE: /* ZF!=0 */
14348 /* Codes needing carry flag. */
14349 case GEU: /* CF=0 */
14350 case LTU: /* CF=1 */
14351 /* Detect overflow checks. They need just the carry flag. */
14352 if (GET_CODE (op0) == PLUS
14353 && rtx_equal_p (op1, XEXP (op0, 0)))
14357 case GTU: /* CF=0 & ZF=0 */
14358 case LEU: /* CF=1 | ZF=1 */
14359 /* Detect overflow checks. They need just the carry flag. */
14360 if (GET_CODE (op0) == MINUS
14361 && rtx_equal_p (op1, XEXP (op0, 0)))
14365 /* Codes possibly doable only with sign flag when
14366 comparing against zero. */
14367 case GE: /* SF=OF or SF=0 */
14368 case LT: /* SF<>OF or SF=1 */
14369 if (op1 == const0_rtx)
14372 /* For other cases Carry flag is not required. */
14374 /* Codes doable only with sign flag when comparing
14375 against zero, but we miss jump instruction for it
14376 so we need to use relational tests against overflow
14377 that thus needs to be zero. */
14378 case GT: /* ZF=0 & SF=OF */
14379 case LE: /* ZF=1 | SF<>OF */
14380 if (op1 == const0_rtx)
14384 /* strcmp pattern do (use flags) and combine may ask us for proper
14389 gcc_unreachable ();
14393 /* Return the fixed registers used for condition codes. */
14396 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14403 /* If two condition code modes are compatible, return a condition code
14404 mode which is compatible with both. Otherwise, return
14407 static enum machine_mode
14408 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14413 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14416 if ((m1 == CCGCmode && m2 == CCGOCmode)
14417 || (m1 == CCGOCmode && m2 == CCGCmode))
14423 gcc_unreachable ();
14453 /* These are only compatible with themselves, which we already
14459 /* Split comparison code CODE into comparisons we can do using branch
14460 instructions. BYPASS_CODE is comparison code for branch that will
14461 branch around FIRST_CODE and SECOND_CODE. If some of branches
14462 is not required, set value to UNKNOWN.
14463 We never require more than two branches. */
14466 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14467 enum rtx_code *first_code,
14468 enum rtx_code *second_code)
14470 *first_code = code;
14471 *bypass_code = UNKNOWN;
14472 *second_code = UNKNOWN;
14474 /* The fcomi comparison sets flags as follows:
14484 case GT: /* GTU - CF=0 & ZF=0 */
14485 case GE: /* GEU - CF=0 */
14486 case ORDERED: /* PF=0 */
14487 case UNORDERED: /* PF=1 */
14488 case UNEQ: /* EQ - ZF=1 */
14489 case UNLT: /* LTU - CF=1 */
14490 case UNLE: /* LEU - CF=1 | ZF=1 */
14491 case LTGT: /* EQ - ZF=0 */
14493 case LT: /* LTU - CF=1 - fails on unordered */
14494 *first_code = UNLT;
14495 *bypass_code = UNORDERED;
14497 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14498 *first_code = UNLE;
14499 *bypass_code = UNORDERED;
14501 case EQ: /* EQ - ZF=1 - fails on unordered */
14502 *first_code = UNEQ;
14503 *bypass_code = UNORDERED;
14505 case NE: /* NE - ZF=0 - fails on unordered */
14506 *first_code = LTGT;
14507 *second_code = UNORDERED;
14509 case UNGE: /* GEU - CF=0 - fails on unordered */
14511 *second_code = UNORDERED;
14513 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14515 *second_code = UNORDERED;
14518 gcc_unreachable ();
14520 if (!TARGET_IEEE_FP)
14522 *second_code = UNKNOWN;
14523 *bypass_code = UNKNOWN;
14527 /* Return cost of comparison done fcom + arithmetics operations on AX.
14528 All following functions do use number of instructions as a cost metrics.
14529 In future this should be tweaked to compute bytes for optimize_size and
14530 take into account performance of various instructions on various CPUs. */
14532 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14534 if (!TARGET_IEEE_FP)
14536 /* The cost of code output by ix86_expand_fp_compare. */
14560 gcc_unreachable ();
14564 /* Return cost of comparison done using fcomi operation.
14565 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14567 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14569 enum rtx_code bypass_code, first_code, second_code;
14570 /* Return arbitrarily high cost when instruction is not supported - this
14571 prevents gcc from using it. */
14574 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14575 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14578 /* Return cost of comparison done using sahf operation.
14579 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14581 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14583 enum rtx_code bypass_code, first_code, second_code;
14584 /* Return arbitrarily high cost when instruction is not preferred - this
14585 avoids gcc from using it. */
14586 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14588 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14589 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14592 /* Compute cost of the comparison done using any method.
14593 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14595 ix86_fp_comparison_cost (enum rtx_code code)
14597 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14600 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14601 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14603 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14604 if (min > sahf_cost)
14606 if (min > fcomi_cost)
14611 /* Return true if we should use an FCOMI instruction for this
14615 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14617 enum rtx_code swapped_code = swap_condition (code);
14619 return ((ix86_fp_comparison_cost (code)
14620 == ix86_fp_comparison_fcomi_cost (code))
14621 || (ix86_fp_comparison_cost (swapped_code)
14622 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14625 /* Swap, force into registers, or otherwise massage the two operands
14626 to a fp comparison. The operands are updated in place; the new
14627 comparison code is returned. */
14629 static enum rtx_code
14630 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14632 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14633 rtx op0 = *pop0, op1 = *pop1;
14634 enum machine_mode op_mode = GET_MODE (op0);
14635 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14637 /* All of the unordered compare instructions only work on registers.
14638 The same is true of the fcomi compare instructions. The XFmode
14639 compare instructions require registers except when comparing
14640 against zero or when converting operand 1 from fixed point to
14644 && (fpcmp_mode == CCFPUmode
14645 || (op_mode == XFmode
14646 && ! (standard_80387_constant_p (op0) == 1
14647 || standard_80387_constant_p (op1) == 1)
14648 && GET_CODE (op1) != FLOAT)
14649 || ix86_use_fcomi_compare (code)))
14651 op0 = force_reg (op_mode, op0);
14652 op1 = force_reg (op_mode, op1);
14656 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14657 things around if they appear profitable, otherwise force op0
14658 into a register. */
14660 if (standard_80387_constant_p (op0) == 0
14662 && ! (standard_80387_constant_p (op1) == 0
14666 tmp = op0, op0 = op1, op1 = tmp;
14667 code = swap_condition (code);
14671 op0 = force_reg (op_mode, op0);
14673 if (CONSTANT_P (op1))
14675 int tmp = standard_80387_constant_p (op1);
14677 op1 = validize_mem (force_const_mem (op_mode, op1));
14681 op1 = force_reg (op_mode, op1);
14684 op1 = force_reg (op_mode, op1);
14688 /* Try to rearrange the comparison to make it cheaper. */
14689 if (ix86_fp_comparison_cost (code)
14690 > ix86_fp_comparison_cost (swap_condition (code))
14691 && (REG_P (op1) || can_create_pseudo_p ()))
14694 tmp = op0, op0 = op1, op1 = tmp;
14695 code = swap_condition (code);
14697 op0 = force_reg (op_mode, op0);
14705 /* Convert comparison codes we use to represent FP comparison to integer
14706 code that will result in proper branch. Return UNKNOWN if no such code
14710 ix86_fp_compare_code_to_integer (enum rtx_code code)
14739 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14742 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14743 rtx *second_test, rtx *bypass_test)
14745 enum machine_mode fpcmp_mode, intcmp_mode;
14747 int cost = ix86_fp_comparison_cost (code);
14748 enum rtx_code bypass_code, first_code, second_code;
14750 fpcmp_mode = ix86_fp_compare_mode (code);
14751 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14754 *second_test = NULL_RTX;
14756 *bypass_test = NULL_RTX;
14758 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14760 /* Do fcomi/sahf based test when profitable. */
14761 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14762 && (bypass_code == UNKNOWN || bypass_test)
14763 && (second_code == UNKNOWN || second_test))
14765 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14766 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14772 gcc_assert (TARGET_SAHF);
14775 scratch = gen_reg_rtx (HImode);
14776 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14778 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14781 /* The FP codes work out to act like unsigned. */
14782 intcmp_mode = fpcmp_mode;
14784 if (bypass_code != UNKNOWN)
14785 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14786 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14788 if (second_code != UNKNOWN)
14789 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14790 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14795 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14796 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14797 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14799 scratch = gen_reg_rtx (HImode);
14800 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14802 /* In the unordered case, we have to check C2 for NaN's, which
14803 doesn't happen to work out to anything nice combination-wise.
14804 So do some bit twiddling on the value we've got in AH to come
14805 up with an appropriate set of condition codes. */
14807 intcmp_mode = CCNOmode;
14812 if (code == GT || !TARGET_IEEE_FP)
14814 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14819 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14820 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14821 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14822 intcmp_mode = CCmode;
14828 if (code == LT && TARGET_IEEE_FP)
14830 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14831 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14832 intcmp_mode = CCmode;
14837 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14843 if (code == GE || !TARGET_IEEE_FP)
14845 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14850 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14851 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14858 if (code == LE && TARGET_IEEE_FP)
14860 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14861 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14862 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14863 intcmp_mode = CCmode;
14868 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14874 if (code == EQ && TARGET_IEEE_FP)
14876 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14877 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14878 intcmp_mode = CCmode;
14883 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14890 if (code == NE && TARGET_IEEE_FP)
14892 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14893 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14899 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14905 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14909 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14914 gcc_unreachable ();
14918 /* Return the test that should be put into the flags user, i.e.
14919 the bcc, scc, or cmov instruction. */
14920 return gen_rtx_fmt_ee (code, VOIDmode,
14921 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14926 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14929 op0 = ix86_compare_op0;
14930 op1 = ix86_compare_op1;
14933 *second_test = NULL_RTX;
14935 *bypass_test = NULL_RTX;
14937 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14938 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14940 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14942 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14943 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14944 second_test, bypass_test);
14947 ret = ix86_expand_int_compare (code, op0, op1);
14952 /* Return true if the CODE will result in nontrivial jump sequence. */
14954 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14956 enum rtx_code bypass_code, first_code, second_code;
14959 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14960 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14964 ix86_expand_branch (enum rtx_code code, rtx label)
14968 switch (GET_MODE (ix86_compare_op0))
14974 tmp = ix86_expand_compare (code, NULL, NULL);
14975 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14976 gen_rtx_LABEL_REF (VOIDmode, label),
14978 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14987 enum rtx_code bypass_code, first_code, second_code;
14989 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14990 &ix86_compare_op1);
14992 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14994 /* Check whether we will use the natural sequence with one jump. If
14995 so, we can expand jump early. Otherwise delay expansion by
14996 creating compound insn to not confuse optimizers. */
14997 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14999 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
15000 gen_rtx_LABEL_REF (VOIDmode, label),
15001 pc_rtx, NULL_RTX, NULL_RTX);
15005 tmp = gen_rtx_fmt_ee (code, VOIDmode,
15006 ix86_compare_op0, ix86_compare_op1);
15007 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15008 gen_rtx_LABEL_REF (VOIDmode, label),
15010 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
15012 use_fcomi = ix86_use_fcomi_compare (code);
15013 vec = rtvec_alloc (3 + !use_fcomi);
15014 RTVEC_ELT (vec, 0) = tmp;
15016 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
15018 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
15021 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
15023 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
15032 /* Expand DImode branch into multiple compare+branch. */
15034 rtx lo[2], hi[2], label2;
15035 enum rtx_code code1, code2, code3;
15036 enum machine_mode submode;
15038 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
15040 tmp = ix86_compare_op0;
15041 ix86_compare_op0 = ix86_compare_op1;
15042 ix86_compare_op1 = tmp;
15043 code = swap_condition (code);
15045 if (GET_MODE (ix86_compare_op0) == DImode)
15047 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
15048 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
15053 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
15054 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
15058 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
15059 avoid two branches. This costs one extra insn, so disable when
15060 optimizing for size. */
15062 if ((code == EQ || code == NE)
15063 && (!optimize_insn_for_size_p ()
15064 || hi[1] == const0_rtx || lo[1] == const0_rtx))
15069 if (hi[1] != const0_rtx)
15070 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
15071 NULL_RTX, 0, OPTAB_WIDEN);
15074 if (lo[1] != const0_rtx)
15075 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
15076 NULL_RTX, 0, OPTAB_WIDEN);
15078 tmp = expand_binop (submode, ior_optab, xor1, xor0,
15079 NULL_RTX, 0, OPTAB_WIDEN);
15081 ix86_compare_op0 = tmp;
15082 ix86_compare_op1 = const0_rtx;
15083 ix86_expand_branch (code, label);
15087 /* Otherwise, if we are doing less-than or greater-or-equal-than,
15088 op1 is a constant and the low word is zero, then we can just
15089 examine the high word. Similarly for low word -1 and
15090 less-or-equal-than or greater-than. */
15092 if (CONST_INT_P (hi[1]))
15095 case LT: case LTU: case GE: case GEU:
15096 if (lo[1] == const0_rtx)
15098 ix86_compare_op0 = hi[0];
15099 ix86_compare_op1 = hi[1];
15100 ix86_expand_branch (code, label);
15104 case LE: case LEU: case GT: case GTU:
15105 if (lo[1] == constm1_rtx)
15107 ix86_compare_op0 = hi[0];
15108 ix86_compare_op1 = hi[1];
15109 ix86_expand_branch (code, label);
15117 /* Otherwise, we need two or three jumps. */
15119 label2 = gen_label_rtx ();
15122 code2 = swap_condition (code);
15123 code3 = unsigned_condition (code);
15127 case LT: case GT: case LTU: case GTU:
15130 case LE: code1 = LT; code2 = GT; break;
15131 case GE: code1 = GT; code2 = LT; break;
15132 case LEU: code1 = LTU; code2 = GTU; break;
15133 case GEU: code1 = GTU; code2 = LTU; break;
15135 case EQ: code1 = UNKNOWN; code2 = NE; break;
15136 case NE: code2 = UNKNOWN; break;
15139 gcc_unreachable ();
15144 * if (hi(a) < hi(b)) goto true;
15145 * if (hi(a) > hi(b)) goto false;
15146 * if (lo(a) < lo(b)) goto true;
15150 ix86_compare_op0 = hi[0];
15151 ix86_compare_op1 = hi[1];
15153 if (code1 != UNKNOWN)
15154 ix86_expand_branch (code1, label);
15155 if (code2 != UNKNOWN)
15156 ix86_expand_branch (code2, label2);
15158 ix86_compare_op0 = lo[0];
15159 ix86_compare_op1 = lo[1];
15160 ix86_expand_branch (code3, label);
15162 if (code2 != UNKNOWN)
15163 emit_label (label2);
15168 /* If we have already emitted a compare insn, go straight to simple.
15169 ix86_expand_compare won't emit anything if ix86_compare_emitted
15171 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
15176 /* Split branch based on floating point condition. */
15178 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
15179 rtx target1, rtx target2, rtx tmp, rtx pushed)
15181 rtx second, bypass;
15182 rtx label = NULL_RTX;
15184 int bypass_probability = -1, second_probability = -1, probability = -1;
15187 if (target2 != pc_rtx)
15190 code = reverse_condition_maybe_unordered (code);
15195 condition = ix86_expand_fp_compare (code, op1, op2,
15196 tmp, &second, &bypass);
15198 /* Remove pushed operand from stack. */
15200 ix86_free_from_memory (GET_MODE (pushed));
15202 if (split_branch_probability >= 0)
15204 /* Distribute the probabilities across the jumps.
15205 Assume the BYPASS and SECOND to be always test
15207 probability = split_branch_probability;
15209 /* Value of 1 is low enough to make no need for probability
15210 to be updated. Later we may run some experiments and see
15211 if unordered values are more frequent in practice. */
15213 bypass_probability = 1;
15215 second_probability = 1;
15217 if (bypass != NULL_RTX)
15219 label = gen_label_rtx ();
15220 i = emit_jump_insn (gen_rtx_SET
15222 gen_rtx_IF_THEN_ELSE (VOIDmode,
15224 gen_rtx_LABEL_REF (VOIDmode,
15227 if (bypass_probability >= 0)
15228 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
15230 i = emit_jump_insn (gen_rtx_SET
15232 gen_rtx_IF_THEN_ELSE (VOIDmode,
15233 condition, target1, target2)));
15234 if (probability >= 0)
15235 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
15236 if (second != NULL_RTX)
15238 i = emit_jump_insn (gen_rtx_SET
15240 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
15242 if (second_probability >= 0)
15243 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
15245 if (label != NULL_RTX)
15246 emit_label (label);
15250 ix86_expand_setcc (enum rtx_code code, rtx dest)
15252 rtx ret, tmp, tmpreg, equiv;
15253 rtx second_test, bypass_test;
15255 gcc_assert (GET_MODE (dest) == QImode);
15257 ret = ix86_expand_compare (code, &second_test, &bypass_test);
15258 PUT_MODE (ret, QImode);
15263 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
15264 if (bypass_test || second_test)
15266 rtx test = second_test;
15268 rtx tmp2 = gen_reg_rtx (QImode);
15271 gcc_assert (!second_test);
15272 test = bypass_test;
15274 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
15276 PUT_MODE (test, QImode);
15277 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
15280 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
15282 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
15285 /* Attach a REG_EQUAL note describing the comparison result. */
15286 if (ix86_compare_op0 && ix86_compare_op1)
15288 equiv = simplify_gen_relational (code, QImode,
15289 GET_MODE (ix86_compare_op0),
15290 ix86_compare_op0, ix86_compare_op1);
15291 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
15295 /* Expand comparison setting or clearing carry flag. Return true when
15296 successful and set pop for the operation. */
15298 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15300 enum machine_mode mode =
15301 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15303 /* Do not handle DImode compares that go through special path. */
15304 if (mode == (TARGET_64BIT ? TImode : DImode))
15307 if (SCALAR_FLOAT_MODE_P (mode))
15309 rtx second_test = NULL, bypass_test = NULL;
15310 rtx compare_op, compare_seq;
15312 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15314 /* Shortcut: following common codes never translate
15315 into carry flag compares. */
15316 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15317 || code == ORDERED || code == UNORDERED)
15320 /* These comparisons require zero flag; swap operands so they won't. */
15321 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15322 && !TARGET_IEEE_FP)
15327 code = swap_condition (code);
15330 /* Try to expand the comparison and verify that we end up with
15331 carry flag based comparison. This fails to be true only when
15332 we decide to expand comparison using arithmetic that is not
15333 too common scenario. */
15335 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15336 &second_test, &bypass_test);
15337 compare_seq = get_insns ();
15340 if (second_test || bypass_test)
15343 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15344 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15345 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15347 code = GET_CODE (compare_op);
15349 if (code != LTU && code != GEU)
15352 emit_insn (compare_seq);
15357 if (!INTEGRAL_MODE_P (mode))
15366 /* Convert a==0 into (unsigned)a<1. */
15369 if (op1 != const0_rtx)
15372 code = (code == EQ ? LTU : GEU);
15375 /* Convert a>b into b<a or a>=b-1. */
15378 if (CONST_INT_P (op1))
15380 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15381 /* Bail out on overflow. We still can swap operands but that
15382 would force loading of the constant into register. */
15383 if (op1 == const0_rtx
15384 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15386 code = (code == GTU ? GEU : LTU);
15393 code = (code == GTU ? LTU : GEU);
15397 /* Convert a>=0 into (unsigned)a<0x80000000. */
15400 if (mode == DImode || op1 != const0_rtx)
15402 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15403 code = (code == LT ? GEU : LTU);
15407 if (mode == DImode || op1 != constm1_rtx)
15409 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15410 code = (code == LE ? GEU : LTU);
15416 /* Swapping operands may cause constant to appear as first operand. */
15417 if (!nonimmediate_operand (op0, VOIDmode))
15419 if (!can_create_pseudo_p ())
15421 op0 = force_reg (mode, op0);
15423 ix86_compare_op0 = op0;
15424 ix86_compare_op1 = op1;
15425 *pop = ix86_expand_compare (code, NULL, NULL);
15426 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15431 ix86_expand_int_movcc (rtx operands[])
15433 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15434 rtx compare_seq, compare_op;
15435 rtx second_test, bypass_test;
15436 enum machine_mode mode = GET_MODE (operands[0]);
15437 bool sign_bit_compare_p = false;;
15440 ix86_compare_op0 = XEXP (operands[1], 0);
15441 ix86_compare_op1 = XEXP (operands[1], 1);
15442 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15443 compare_seq = get_insns ();
15446 compare_code = GET_CODE (compare_op);
15448 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15449 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15450 sign_bit_compare_p = true;
15452 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15453 HImode insns, we'd be swallowed in word prefix ops. */
15455 if ((mode != HImode || TARGET_FAST_PREFIX)
15456 && (mode != (TARGET_64BIT ? TImode : DImode))
15457 && CONST_INT_P (operands[2])
15458 && CONST_INT_P (operands[3]))
15460 rtx out = operands[0];
15461 HOST_WIDE_INT ct = INTVAL (operands[2]);
15462 HOST_WIDE_INT cf = INTVAL (operands[3]);
15463 HOST_WIDE_INT diff;
15466 /* Sign bit compares are better done using shifts than we do by using
15468 if (sign_bit_compare_p
15469 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15470 ix86_compare_op1, &compare_op))
15472 /* Detect overlap between destination and compare sources. */
15475 if (!sign_bit_compare_p)
15477 bool fpcmp = false;
15479 compare_code = GET_CODE (compare_op);
15481 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15482 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15485 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15488 /* To simplify rest of code, restrict to the GEU case. */
15489 if (compare_code == LTU)
15491 HOST_WIDE_INT tmp = ct;
15494 compare_code = reverse_condition (compare_code);
15495 code = reverse_condition (code);
15500 PUT_CODE (compare_op,
15501 reverse_condition_maybe_unordered
15502 (GET_CODE (compare_op)));
15504 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15508 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15509 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15510 tmp = gen_reg_rtx (mode);
15512 if (mode == DImode)
15513 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15515 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15519 if (code == GT || code == GE)
15520 code = reverse_condition (code);
15523 HOST_WIDE_INT tmp = ct;
15528 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15529 ix86_compare_op1, VOIDmode, 0, -1);
15542 tmp = expand_simple_binop (mode, PLUS,
15544 copy_rtx (tmp), 1, OPTAB_DIRECT);
15555 tmp = expand_simple_binop (mode, IOR,
15557 copy_rtx (tmp), 1, OPTAB_DIRECT);
15559 else if (diff == -1 && ct)
15569 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15571 tmp = expand_simple_binop (mode, PLUS,
15572 copy_rtx (tmp), GEN_INT (cf),
15573 copy_rtx (tmp), 1, OPTAB_DIRECT);
15581 * andl cf - ct, dest
15591 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15594 tmp = expand_simple_binop (mode, AND,
15596 gen_int_mode (cf - ct, mode),
15597 copy_rtx (tmp), 1, OPTAB_DIRECT);
15599 tmp = expand_simple_binop (mode, PLUS,
15600 copy_rtx (tmp), GEN_INT (ct),
15601 copy_rtx (tmp), 1, OPTAB_DIRECT);
15604 if (!rtx_equal_p (tmp, out))
15605 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15607 return 1; /* DONE */
15612 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15615 tmp = ct, ct = cf, cf = tmp;
15618 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15620 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15622 /* We may be reversing unordered compare to normal compare, that
15623 is not valid in general (we may convert non-trapping condition
15624 to trapping one), however on i386 we currently emit all
15625 comparisons unordered. */
15626 compare_code = reverse_condition_maybe_unordered (compare_code);
15627 code = reverse_condition_maybe_unordered (code);
15631 compare_code = reverse_condition (compare_code);
15632 code = reverse_condition (code);
15636 compare_code = UNKNOWN;
15637 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15638 && CONST_INT_P (ix86_compare_op1))
15640 if (ix86_compare_op1 == const0_rtx
15641 && (code == LT || code == GE))
15642 compare_code = code;
15643 else if (ix86_compare_op1 == constm1_rtx)
15647 else if (code == GT)
15652 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15653 if (compare_code != UNKNOWN
15654 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15655 && (cf == -1 || ct == -1))
15657 /* If lea code below could be used, only optimize
15658 if it results in a 2 insn sequence. */
15660 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15661 || diff == 3 || diff == 5 || diff == 9)
15662 || (compare_code == LT && ct == -1)
15663 || (compare_code == GE && cf == -1))
15666 * notl op1 (if necessary)
15674 code = reverse_condition (code);
15677 out = emit_store_flag (out, code, ix86_compare_op0,
15678 ix86_compare_op1, VOIDmode, 0, -1);
15680 out = expand_simple_binop (mode, IOR,
15682 out, 1, OPTAB_DIRECT);
15683 if (out != operands[0])
15684 emit_move_insn (operands[0], out);
15686 return 1; /* DONE */
15691 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15692 || diff == 3 || diff == 5 || diff == 9)
15693 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15695 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15701 * lea cf(dest*(ct-cf)),dest
15705 * This also catches the degenerate setcc-only case.
15711 out = emit_store_flag (out, code, ix86_compare_op0,
15712 ix86_compare_op1, VOIDmode, 0, 1);
15715 /* On x86_64 the lea instruction operates on Pmode, so we need
15716 to get arithmetics done in proper mode to match. */
15718 tmp = copy_rtx (out);
15722 out1 = copy_rtx (out);
15723 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15727 tmp = gen_rtx_PLUS (mode, tmp, out1);
15733 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15736 if (!rtx_equal_p (tmp, out))
15739 out = force_operand (tmp, copy_rtx (out));
15741 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15743 if (!rtx_equal_p (out, operands[0]))
15744 emit_move_insn (operands[0], copy_rtx (out));
15746 return 1; /* DONE */
15750 * General case: Jumpful:
15751 * xorl dest,dest cmpl op1, op2
15752 * cmpl op1, op2 movl ct, dest
15753 * setcc dest jcc 1f
15754 * decl dest movl cf, dest
15755 * andl (cf-ct),dest 1:
15758 * Size 20. Size 14.
15760 * This is reasonably steep, but branch mispredict costs are
15761 * high on modern cpus, so consider failing only if optimizing
15765 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15766 && BRANCH_COST (optimize_insn_for_speed_p (),
15771 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15776 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15778 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15780 /* We may be reversing unordered compare to normal compare,
15781 that is not valid in general (we may convert non-trapping
15782 condition to trapping one), however on i386 we currently
15783 emit all comparisons unordered. */
15784 code = reverse_condition_maybe_unordered (code);
15788 code = reverse_condition (code);
15789 if (compare_code != UNKNOWN)
15790 compare_code = reverse_condition (compare_code);
15794 if (compare_code != UNKNOWN)
15796 /* notl op1 (if needed)
15801 For x < 0 (resp. x <= -1) there will be no notl,
15802 so if possible swap the constants to get rid of the
15804 True/false will be -1/0 while code below (store flag
15805 followed by decrement) is 0/-1, so the constants need
15806 to be exchanged once more. */
15808 if (compare_code == GE || !cf)
15810 code = reverse_condition (code);
15815 HOST_WIDE_INT tmp = cf;
15820 out = emit_store_flag (out, code, ix86_compare_op0,
15821 ix86_compare_op1, VOIDmode, 0, -1);
15825 out = emit_store_flag (out, code, ix86_compare_op0,
15826 ix86_compare_op1, VOIDmode, 0, 1);
15828 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15829 copy_rtx (out), 1, OPTAB_DIRECT);
15832 out = expand_simple_binop (mode, AND, copy_rtx (out),
15833 gen_int_mode (cf - ct, mode),
15834 copy_rtx (out), 1, OPTAB_DIRECT);
15836 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15837 copy_rtx (out), 1, OPTAB_DIRECT);
15838 if (!rtx_equal_p (out, operands[0]))
15839 emit_move_insn (operands[0], copy_rtx (out));
15841 return 1; /* DONE */
15845 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15847 /* Try a few things more with specific constants and a variable. */
15850 rtx var, orig_out, out, tmp;
15852 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15853 return 0; /* FAIL */
15855 /* If one of the two operands is an interesting constant, load a
15856 constant with the above and mask it in with a logical operation. */
15858 if (CONST_INT_P (operands[2]))
15861 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15862 operands[3] = constm1_rtx, op = and_optab;
15863 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15864 operands[3] = const0_rtx, op = ior_optab;
15866 return 0; /* FAIL */
15868 else if (CONST_INT_P (operands[3]))
15871 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15872 operands[2] = constm1_rtx, op = and_optab;
15873 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15874 operands[2] = const0_rtx, op = ior_optab;
15876 return 0; /* FAIL */
15879 return 0; /* FAIL */
15881 orig_out = operands[0];
15882 tmp = gen_reg_rtx (mode);
15885 /* Recurse to get the constant loaded. */
15886 if (ix86_expand_int_movcc (operands) == 0)
15887 return 0; /* FAIL */
15889 /* Mask in the interesting variable. */
15890 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15892 if (!rtx_equal_p (out, orig_out))
15893 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15895 return 1; /* DONE */
15899 * For comparison with above,
15909 if (! nonimmediate_operand (operands[2], mode))
15910 operands[2] = force_reg (mode, operands[2]);
15911 if (! nonimmediate_operand (operands[3], mode))
15912 operands[3] = force_reg (mode, operands[3]);
15914 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15916 rtx tmp = gen_reg_rtx (mode);
15917 emit_move_insn (tmp, operands[3]);
15920 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15922 rtx tmp = gen_reg_rtx (mode);
15923 emit_move_insn (tmp, operands[2]);
15927 if (! register_operand (operands[2], VOIDmode)
15929 || ! register_operand (operands[3], VOIDmode)))
15930 operands[2] = force_reg (mode, operands[2]);
15933 && ! register_operand (operands[3], VOIDmode))
15934 operands[3] = force_reg (mode, operands[3]);
15936 emit_insn (compare_seq);
15937 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15938 gen_rtx_IF_THEN_ELSE (mode,
15939 compare_op, operands[2],
15942 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15943 gen_rtx_IF_THEN_ELSE (mode,
15945 copy_rtx (operands[3]),
15946 copy_rtx (operands[0]))));
15948 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15949 gen_rtx_IF_THEN_ELSE (mode,
15951 copy_rtx (operands[2]),
15952 copy_rtx (operands[0]))));
15954 return 1; /* DONE */
15957 /* Swap, force into registers, or otherwise massage the two operands
15958 to an sse comparison with a mask result. Thus we differ a bit from
15959 ix86_prepare_fp_compare_args which expects to produce a flags result.
15961 The DEST operand exists to help determine whether to commute commutative
15962 operators. The POP0/POP1 operands are updated in place. The new
15963 comparison code is returned, or UNKNOWN if not implementable. */
15965 static enum rtx_code
15966 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15967 rtx *pop0, rtx *pop1)
15975 /* We have no LTGT as an operator. We could implement it with
15976 NE & ORDERED, but this requires an extra temporary. It's
15977 not clear that it's worth it. */
15984 /* These are supported directly. */
15991 /* For commutative operators, try to canonicalize the destination
15992 operand to be first in the comparison - this helps reload to
15993 avoid extra moves. */
15994 if (!dest || !rtx_equal_p (dest, *pop1))
16002 /* These are not supported directly. Swap the comparison operands
16003 to transform into something that is supported. */
16007 code = swap_condition (code);
16011 gcc_unreachable ();
16017 /* Detect conditional moves that exactly match min/max operational
16018 semantics. Note that this is IEEE safe, as long as we don't
16019 interchange the operands.
16021 Returns FALSE if this conditional move doesn't match a MIN/MAX,
16022 and TRUE if the operation is successful and instructions are emitted. */
16025 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
16026 rtx cmp_op1, rtx if_true, rtx if_false)
16028 enum machine_mode mode;
16034 else if (code == UNGE)
16037 if_true = if_false;
16043 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
16045 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
16050 mode = GET_MODE (dest);
16052 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
16053 but MODE may be a vector mode and thus not appropriate. */
16054 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
16056 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
16059 if_true = force_reg (mode, if_true);
16060 v = gen_rtvec (2, if_true, if_false);
16061 tmp = gen_rtx_UNSPEC (mode, v, u);
16065 code = is_min ? SMIN : SMAX;
16066 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
16069 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
16073 /* Expand an sse vector comparison. Return the register with the result. */
16076 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
16077 rtx op_true, rtx op_false)
16079 enum machine_mode mode = GET_MODE (dest);
16082 cmp_op0 = force_reg (mode, cmp_op0);
16083 if (!nonimmediate_operand (cmp_op1, mode))
16084 cmp_op1 = force_reg (mode, cmp_op1);
16087 || reg_overlap_mentioned_p (dest, op_true)
16088 || reg_overlap_mentioned_p (dest, op_false))
16089 dest = gen_reg_rtx (mode);
16091 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
16092 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16097 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
16098 operations. This is used for both scalar and vector conditional moves. */
16101 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
16103 enum machine_mode mode = GET_MODE (dest);
16106 if (op_false == CONST0_RTX (mode))
16108 op_true = force_reg (mode, op_true);
16109 x = gen_rtx_AND (mode, cmp, op_true);
16110 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16112 else if (op_true == CONST0_RTX (mode))
16114 op_false = force_reg (mode, op_false);
16115 x = gen_rtx_NOT (mode, cmp);
16116 x = gen_rtx_AND (mode, x, op_false);
16117 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16119 else if (TARGET_SSE5)
16121 rtx pcmov = gen_rtx_SET (mode, dest,
16122 gen_rtx_IF_THEN_ELSE (mode, cmp,
16129 op_true = force_reg (mode, op_true);
16130 op_false = force_reg (mode, op_false);
16132 t2 = gen_reg_rtx (mode);
16134 t3 = gen_reg_rtx (mode);
16138 x = gen_rtx_AND (mode, op_true, cmp);
16139 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
16141 x = gen_rtx_NOT (mode, cmp);
16142 x = gen_rtx_AND (mode, x, op_false);
16143 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
16145 x = gen_rtx_IOR (mode, t3, t2);
16146 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16150 /* Expand a floating-point conditional move. Return true if successful. */
16153 ix86_expand_fp_movcc (rtx operands[])
16155 enum machine_mode mode = GET_MODE (operands[0]);
16156 enum rtx_code code = GET_CODE (operands[1]);
16157 rtx tmp, compare_op, second_test, bypass_test;
16159 ix86_compare_op0 = XEXP (operands[1], 0);
16160 ix86_compare_op1 = XEXP (operands[1], 1);
16161 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16163 enum machine_mode cmode;
16165 /* Since we've no cmove for sse registers, don't force bad register
16166 allocation just to gain access to it. Deny movcc when the
16167 comparison mode doesn't match the move mode. */
16168 cmode = GET_MODE (ix86_compare_op0);
16169 if (cmode == VOIDmode)
16170 cmode = GET_MODE (ix86_compare_op1);
16174 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16176 &ix86_compare_op1);
16177 if (code == UNKNOWN)
16180 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
16181 ix86_compare_op1, operands[2],
16185 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
16186 ix86_compare_op1, operands[2], operands[3]);
16187 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
16191 /* The floating point conditional move instructions don't directly
16192 support conditions resulting from a signed integer comparison. */
16194 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
16196 /* The floating point conditional move instructions don't directly
16197 support signed integer comparisons. */
16199 if (!fcmov_comparison_operator (compare_op, VOIDmode))
16201 gcc_assert (!second_test && !bypass_test);
16202 tmp = gen_reg_rtx (QImode);
16203 ix86_expand_setcc (code, tmp);
16205 ix86_compare_op0 = tmp;
16206 ix86_compare_op1 = const0_rtx;
16207 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
16209 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
16211 tmp = gen_reg_rtx (mode);
16212 emit_move_insn (tmp, operands[3]);
16215 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
16217 tmp = gen_reg_rtx (mode);
16218 emit_move_insn (tmp, operands[2]);
16222 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16223 gen_rtx_IF_THEN_ELSE (mode, compare_op,
16224 operands[2], operands[3])));
16226 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16227 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
16228 operands[3], operands[0])));
16230 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16231 gen_rtx_IF_THEN_ELSE (mode, second_test,
16232 operands[2], operands[0])));
16237 /* Expand a floating-point vector conditional move; a vcond operation
16238 rather than a movcc operation. */
16241 ix86_expand_fp_vcond (rtx operands[])
16243 enum rtx_code code = GET_CODE (operands[3]);
16246 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16247 &operands[4], &operands[5]);
16248 if (code == UNKNOWN)
16251 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
16252 operands[5], operands[1], operands[2]))
16255 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
16256 operands[1], operands[2]);
16257 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
16261 /* Expand a signed/unsigned integral vector conditional move. */
16264 ix86_expand_int_vcond (rtx operands[])
16266 enum machine_mode mode = GET_MODE (operands[0]);
16267 enum rtx_code code = GET_CODE (operands[3]);
16268 bool negate = false;
16271 cop0 = operands[4];
16272 cop1 = operands[5];
16274 /* SSE5 supports all of the comparisons on all vector int types. */
16277 /* Canonicalize the comparison to EQ, GT, GTU. */
16288 code = reverse_condition (code);
16294 code = reverse_condition (code);
16300 code = swap_condition (code);
16301 x = cop0, cop0 = cop1, cop1 = x;
16305 gcc_unreachable ();
16308 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16309 if (mode == V2DImode)
16314 /* SSE4.1 supports EQ. */
16315 if (!TARGET_SSE4_1)
16321 /* SSE4.2 supports GT/GTU. */
16322 if (!TARGET_SSE4_2)
16327 gcc_unreachable ();
16331 /* Unsigned parallel compare is not supported by the hardware. Play some
16332 tricks to turn this into a signed comparison against 0. */
16335 cop0 = force_reg (mode, cop0);
16344 /* Perform a parallel modulo subtraction. */
16345 t1 = gen_reg_rtx (mode);
16346 emit_insn ((mode == V4SImode
16348 : gen_subv2di3) (t1, cop0, cop1));
16350 /* Extract the original sign bit of op0. */
16351 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16353 t2 = gen_reg_rtx (mode);
16354 emit_insn ((mode == V4SImode
16356 : gen_andv2di3) (t2, cop0, mask));
16358 /* XOR it back into the result of the subtraction. This results
16359 in the sign bit set iff we saw unsigned underflow. */
16360 x = gen_reg_rtx (mode);
16361 emit_insn ((mode == V4SImode
16363 : gen_xorv2di3) (x, t1, t2));
16371 /* Perform a parallel unsigned saturating subtraction. */
16372 x = gen_reg_rtx (mode);
16373 emit_insn (gen_rtx_SET (VOIDmode, x,
16374 gen_rtx_US_MINUS (mode, cop0, cop1)));
16381 gcc_unreachable ();
16385 cop1 = CONST0_RTX (mode);
16389 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16390 operands[1+negate], operands[2-negate]);
16392 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16393 operands[2-negate]);
16397 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16398 true if we should do zero extension, else sign extension. HIGH_P is
16399 true if we want the N/2 high elements, else the low elements. */
16402 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16404 enum machine_mode imode = GET_MODE (operands[1]);
16405 rtx (*unpack)(rtx, rtx, rtx);
16412 unpack = gen_vec_interleave_highv16qi;
16414 unpack = gen_vec_interleave_lowv16qi;
16418 unpack = gen_vec_interleave_highv8hi;
16420 unpack = gen_vec_interleave_lowv8hi;
16424 unpack = gen_vec_interleave_highv4si;
16426 unpack = gen_vec_interleave_lowv4si;
16429 gcc_unreachable ();
16432 dest = gen_lowpart (imode, operands[0]);
16435 se = force_reg (imode, CONST0_RTX (imode));
16437 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16438 operands[1], pc_rtx, pc_rtx);
16440 emit_insn (unpack (dest, operands[1], se));
16443 /* This function performs the same task as ix86_expand_sse_unpack,
16444 but with SSE4.1 instructions. */
16447 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16449 enum machine_mode imode = GET_MODE (operands[1]);
16450 rtx (*unpack)(rtx, rtx);
16457 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16459 unpack = gen_sse4_1_extendv8qiv8hi2;
16463 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16465 unpack = gen_sse4_1_extendv4hiv4si2;
16469 unpack = gen_sse4_1_zero_extendv2siv2di2;
16471 unpack = gen_sse4_1_extendv2siv2di2;
16474 gcc_unreachable ();
16477 dest = operands[0];
16480 /* Shift higher 8 bytes to lower 8 bytes. */
16481 src = gen_reg_rtx (imode);
16482 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16483 gen_lowpart (TImode, operands[1]),
16489 emit_insn (unpack (dest, src));
16492 /* This function performs the same task as ix86_expand_sse_unpack,
16493 but with sse5 instructions. */
16496 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16498 enum machine_mode imode = GET_MODE (operands[1]);
16499 int pperm_bytes[16];
16501 int h = (high_p) ? 8 : 0;
16504 rtvec v = rtvec_alloc (16);
16507 rtx op0 = operands[0], op1 = operands[1];
16512 vs = rtvec_alloc (8);
16513 h2 = (high_p) ? 8 : 0;
16514 for (i = 0; i < 8; i++)
16516 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16517 pperm_bytes[2*i+1] = ((unsigned_p)
16519 : PPERM_SIGN | PPERM_SRC2 | i | h);
16522 for (i = 0; i < 16; i++)
16523 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16525 for (i = 0; i < 8; i++)
16526 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16528 p = gen_rtx_PARALLEL (VOIDmode, vs);
16529 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16531 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16533 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16537 vs = rtvec_alloc (4);
16538 h2 = (high_p) ? 4 : 0;
16539 for (i = 0; i < 4; i++)
16541 sign_extend = ((unsigned_p)
16543 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16544 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16545 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16546 pperm_bytes[4*i+2] = sign_extend;
16547 pperm_bytes[4*i+3] = sign_extend;
16550 for (i = 0; i < 16; i++)
16551 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16553 for (i = 0; i < 4; i++)
16554 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16556 p = gen_rtx_PARALLEL (VOIDmode, vs);
16557 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16559 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16561 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16565 vs = rtvec_alloc (2);
16566 h2 = (high_p) ? 2 : 0;
16567 for (i = 0; i < 2; i++)
16569 sign_extend = ((unsigned_p)
16571 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16572 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16573 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16574 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16575 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16576 pperm_bytes[8*i+4] = sign_extend;
16577 pperm_bytes[8*i+5] = sign_extend;
16578 pperm_bytes[8*i+6] = sign_extend;
16579 pperm_bytes[8*i+7] = sign_extend;
16582 for (i = 0; i < 16; i++)
16583 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16585 for (i = 0; i < 2; i++)
16586 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16588 p = gen_rtx_PARALLEL (VOIDmode, vs);
16589 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16591 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16593 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16597 gcc_unreachable ();
16603 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16604 next narrower integer vector type */
16606 ix86_expand_sse5_pack (rtx operands[3])
16608 enum machine_mode imode = GET_MODE (operands[0]);
16609 int pperm_bytes[16];
16611 rtvec v = rtvec_alloc (16);
16613 rtx op0 = operands[0];
16614 rtx op1 = operands[1];
16615 rtx op2 = operands[2];
16620 for (i = 0; i < 8; i++)
16622 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16623 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16626 for (i = 0; i < 16; i++)
16627 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16629 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16630 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16634 for (i = 0; i < 4; i++)
16636 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16637 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16638 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16639 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16642 for (i = 0; i < 16; i++)
16643 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16645 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16646 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16650 for (i = 0; i < 2; i++)
16652 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16653 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16654 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16655 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16656 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16657 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16658 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16659 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16662 for (i = 0; i < 16; i++)
16663 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16665 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16666 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16670 gcc_unreachable ();
16676 /* Expand conditional increment or decrement using adb/sbb instructions.
16677 The default case using setcc followed by the conditional move can be
16678 done by generic code. */
16680 ix86_expand_int_addcc (rtx operands[])
16682 enum rtx_code code = GET_CODE (operands[1]);
16684 rtx val = const0_rtx;
16685 bool fpcmp = false;
16686 enum machine_mode mode = GET_MODE (operands[0]);
16688 ix86_compare_op0 = XEXP (operands[1], 0);
16689 ix86_compare_op1 = XEXP (operands[1], 1);
16690 if (operands[3] != const1_rtx
16691 && operands[3] != constm1_rtx)
16693 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16694 ix86_compare_op1, &compare_op))
16696 code = GET_CODE (compare_op);
16698 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16699 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16702 code = ix86_fp_compare_code_to_integer (code);
16709 PUT_CODE (compare_op,
16710 reverse_condition_maybe_unordered
16711 (GET_CODE (compare_op)));
16713 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16715 PUT_MODE (compare_op, mode);
16717 /* Construct either adc or sbb insn. */
16718 if ((code == LTU) == (operands[3] == constm1_rtx))
16720 switch (GET_MODE (operands[0]))
16723 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16726 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16729 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16732 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16735 gcc_unreachable ();
16740 switch (GET_MODE (operands[0]))
16743 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16746 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16749 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16752 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16755 gcc_unreachable ();
16758 return 1; /* DONE */
16762 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16763 works for floating pointer parameters and nonoffsetable memories.
16764 For pushes, it returns just stack offsets; the values will be saved
16765 in the right order. Maximally three parts are generated. */
16768 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16773 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16775 size = (GET_MODE_SIZE (mode) + 4) / 8;
16777 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16778 gcc_assert (size >= 2 && size <= 4);
16780 /* Optimize constant pool reference to immediates. This is used by fp
16781 moves, that force all constants to memory to allow combining. */
16782 if (MEM_P (operand) && MEM_READONLY_P (operand))
16784 rtx tmp = maybe_get_pool_constant (operand);
16789 if (MEM_P (operand) && !offsettable_memref_p (operand))
16791 /* The only non-offsetable memories we handle are pushes. */
16792 int ok = push_operand (operand, VOIDmode);
16796 operand = copy_rtx (operand);
16797 PUT_MODE (operand, Pmode);
16798 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16802 if (GET_CODE (operand) == CONST_VECTOR)
16804 enum machine_mode imode = int_mode_for_mode (mode);
16805 /* Caution: if we looked through a constant pool memory above,
16806 the operand may actually have a different mode now. That's
16807 ok, since we want to pun this all the way back to an integer. */
16808 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16809 gcc_assert (operand != NULL);
16815 if (mode == DImode)
16816 split_di (&operand, 1, &parts[0], &parts[1]);
16821 if (REG_P (operand))
16823 gcc_assert (reload_completed);
16824 for (i = 0; i < size; i++)
16825 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16827 else if (offsettable_memref_p (operand))
16829 operand = adjust_address (operand, SImode, 0);
16830 parts[0] = operand;
16831 for (i = 1; i < size; i++)
16832 parts[i] = adjust_address (operand, SImode, 4 * i);
16834 else if (GET_CODE (operand) == CONST_DOUBLE)
16839 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16843 real_to_target (l, &r, mode);
16844 parts[3] = gen_int_mode (l[3], SImode);
16845 parts[2] = gen_int_mode (l[2], SImode);
16848 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16849 parts[2] = gen_int_mode (l[2], SImode);
16852 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16855 gcc_unreachable ();
16857 parts[1] = gen_int_mode (l[1], SImode);
16858 parts[0] = gen_int_mode (l[0], SImode);
16861 gcc_unreachable ();
16866 if (mode == TImode)
16867 split_ti (&operand, 1, &parts[0], &parts[1]);
16868 if (mode == XFmode || mode == TFmode)
16870 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16871 if (REG_P (operand))
16873 gcc_assert (reload_completed);
16874 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16875 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16877 else if (offsettable_memref_p (operand))
16879 operand = adjust_address (operand, DImode, 0);
16880 parts[0] = operand;
16881 parts[1] = adjust_address (operand, upper_mode, 8);
16883 else if (GET_CODE (operand) == CONST_DOUBLE)
16888 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16889 real_to_target (l, &r, mode);
16891 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16892 if (HOST_BITS_PER_WIDE_INT >= 64)
16895 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16896 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16899 parts[0] = immed_double_const (l[0], l[1], DImode);
16901 if (upper_mode == SImode)
16902 parts[1] = gen_int_mode (l[2], SImode);
16903 else if (HOST_BITS_PER_WIDE_INT >= 64)
16906 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16907 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16910 parts[1] = immed_double_const (l[2], l[3], DImode);
16913 gcc_unreachable ();
16920 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16921 Return false when normal moves are needed; true when all required
16922 insns have been emitted. Operands 2-4 contain the input values
16923 int the correct order; operands 5-7 contain the output values. */
16926 ix86_split_long_move (rtx operands[])
16931 int collisions = 0;
16932 enum machine_mode mode = GET_MODE (operands[0]);
16933 bool collisionparts[4];
16935 /* The DFmode expanders may ask us to move double.
16936 For 64bit target this is single move. By hiding the fact
16937 here we simplify i386.md splitters. */
16938 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16940 /* Optimize constant pool reference to immediates. This is used by
16941 fp moves, that force all constants to memory to allow combining. */
16943 if (MEM_P (operands[1])
16944 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16945 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16946 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16947 if (push_operand (operands[0], VOIDmode))
16949 operands[0] = copy_rtx (operands[0]);
16950 PUT_MODE (operands[0], Pmode);
16953 operands[0] = gen_lowpart (DImode, operands[0]);
16954 operands[1] = gen_lowpart (DImode, operands[1]);
16955 emit_move_insn (operands[0], operands[1]);
16959 /* The only non-offsettable memory we handle is push. */
16960 if (push_operand (operands[0], VOIDmode))
16963 gcc_assert (!MEM_P (operands[0])
16964 || offsettable_memref_p (operands[0]));
16966 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16967 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16969 /* When emitting push, take care for source operands on the stack. */
16970 if (push && MEM_P (operands[1])
16971 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16972 for (i = 0; i < nparts - 1; i++)
16973 part[1][i] = change_address (part[1][i],
16974 GET_MODE (part[1][i]),
16975 XEXP (part[1][i + 1], 0));
16977 /* We need to do copy in the right order in case an address register
16978 of the source overlaps the destination. */
16979 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16983 for (i = 0; i < nparts; i++)
16986 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16987 if (collisionparts[i])
16991 /* Collision in the middle part can be handled by reordering. */
16992 if (collisions == 1 && nparts == 3 && collisionparts [1])
16994 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16995 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16997 else if (collisions == 1
16999 && (collisionparts [1] || collisionparts [2]))
17001 if (collisionparts [1])
17003 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17004 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17008 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17009 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17013 /* If there are more collisions, we can't handle it by reordering.
17014 Do an lea to the last part and use only one colliding move. */
17015 else if (collisions > 1)
17021 base = part[0][nparts - 1];
17023 /* Handle the case when the last part isn't valid for lea.
17024 Happens in 64-bit mode storing the 12-byte XFmode. */
17025 if (GET_MODE (base) != Pmode)
17026 base = gen_rtx_REG (Pmode, REGNO (base));
17028 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17029 part[1][0] = replace_equiv_address (part[1][0], base);
17030 for (i = 1; i < nparts; i++)
17032 tmp = plus_constant (base, UNITS_PER_WORD * i);
17033 part[1][i] = replace_equiv_address (part[1][i], tmp);
17044 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17045 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
17046 emit_move_insn (part[0][2], part[1][2]);
17048 else if (nparts == 4)
17050 emit_move_insn (part[0][3], part[1][3]);
17051 emit_move_insn (part[0][2], part[1][2]);
17056 /* In 64bit mode we don't have 32bit push available. In case this is
17057 register, it is OK - we will just use larger counterpart. We also
17058 retype memory - these comes from attempt to avoid REX prefix on
17059 moving of second half of TFmode value. */
17060 if (GET_MODE (part[1][1]) == SImode)
17062 switch (GET_CODE (part[1][1]))
17065 part[1][1] = adjust_address (part[1][1], DImode, 0);
17069 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
17073 gcc_unreachable ();
17076 if (GET_MODE (part[1][0]) == SImode)
17077 part[1][0] = part[1][1];
17080 emit_move_insn (part[0][1], part[1][1]);
17081 emit_move_insn (part[0][0], part[1][0]);
17085 /* Choose correct order to not overwrite the source before it is copied. */
17086 if ((REG_P (part[0][0])
17087 && REG_P (part[1][1])
17088 && (REGNO (part[0][0]) == REGNO (part[1][1])
17090 && REGNO (part[0][0]) == REGNO (part[1][2]))
17092 && REGNO (part[0][0]) == REGNO (part[1][3]))))
17094 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
17096 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
17098 operands[2 + i] = part[0][j];
17099 operands[6 + i] = part[1][j];
17104 for (i = 0; i < nparts; i++)
17106 operands[2 + i] = part[0][i];
17107 operands[6 + i] = part[1][i];
17111 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17112 if (optimize_insn_for_size_p ())
17114 for (j = 0; j < nparts - 1; j++)
17115 if (CONST_INT_P (operands[6 + j])
17116 && operands[6 + j] != const0_rtx
17117 && REG_P (operands[2 + j]))
17118 for (i = j; i < nparts - 1; i++)
17119 if (CONST_INT_P (operands[7 + i])
17120 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17121 operands[7 + i] = operands[2 + j];
17124 for (i = 0; i < nparts; i++)
17125 emit_move_insn (operands[2 + i], operands[6 + i]);
17130 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17131 left shift by a constant, either using a single shift or
17132 a sequence of add instructions. */
17135 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17139 emit_insn ((mode == DImode
17141 : gen_adddi3) (operand, operand, operand));
17143 else if (!optimize_insn_for_size_p ()
17144 && count * ix86_cost->add <= ix86_cost->shift_const)
17147 for (i=0; i<count; i++)
17149 emit_insn ((mode == DImode
17151 : gen_adddi3) (operand, operand, operand));
17155 emit_insn ((mode == DImode
17157 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17161 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17163 rtx low[2], high[2];
17165 const int single_width = mode == DImode ? 32 : 64;
17167 if (CONST_INT_P (operands[2]))
17169 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17170 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17172 if (count >= single_width)
17174 emit_move_insn (high[0], low[1]);
17175 emit_move_insn (low[0], const0_rtx);
17177 if (count > single_width)
17178 ix86_expand_ashl_const (high[0], count - single_width, mode);
17182 if (!rtx_equal_p (operands[0], operands[1]))
17183 emit_move_insn (operands[0], operands[1]);
17184 emit_insn ((mode == DImode
17186 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17187 ix86_expand_ashl_const (low[0], count, mode);
17192 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17194 if (operands[1] == const1_rtx)
17196 /* Assuming we've chosen a QImode capable registers, then 1 << N
17197 can be done with two 32/64-bit shifts, no branches, no cmoves. */
17198 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17200 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17202 ix86_expand_clear (low[0]);
17203 ix86_expand_clear (high[0]);
17204 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17206 d = gen_lowpart (QImode, low[0]);
17207 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17208 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17209 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17211 d = gen_lowpart (QImode, high[0]);
17212 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17213 s = gen_rtx_NE (QImode, flags, const0_rtx);
17214 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17217 /* Otherwise, we can get the same results by manually performing
17218 a bit extract operation on bit 5/6, and then performing the two
17219 shifts. The two methods of getting 0/1 into low/high are exactly
17220 the same size. Avoiding the shift in the bit extract case helps
17221 pentium4 a bit; no one else seems to care much either way. */
17226 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17227 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17229 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17230 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17232 emit_insn ((mode == DImode
17234 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
17235 emit_insn ((mode == DImode
17237 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
17238 emit_move_insn (low[0], high[0]);
17239 emit_insn ((mode == DImode
17241 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
17244 emit_insn ((mode == DImode
17246 : gen_ashldi3) (low[0], low[0], operands[2]));
17247 emit_insn ((mode == DImode
17249 : gen_ashldi3) (high[0], high[0], operands[2]));
17253 if (operands[1] == constm1_rtx)
17255 /* For -1 << N, we can avoid the shld instruction, because we
17256 know that we're shifting 0...31/63 ones into a -1. */
17257 emit_move_insn (low[0], constm1_rtx);
17258 if (optimize_insn_for_size_p ())
17259 emit_move_insn (high[0], low[0]);
17261 emit_move_insn (high[0], constm1_rtx);
17265 if (!rtx_equal_p (operands[0], operands[1]))
17266 emit_move_insn (operands[0], operands[1]);
17268 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17269 emit_insn ((mode == DImode
17271 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17274 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
17276 if (TARGET_CMOVE && scratch)
17278 ix86_expand_clear (scratch);
17279 emit_insn ((mode == DImode
17280 ? gen_x86_shift_adj_1
17281 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17285 emit_insn ((mode == DImode
17286 ? gen_x86_shift_adj_2
17287 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17291 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17293 rtx low[2], high[2];
17295 const int single_width = mode == DImode ? 32 : 64;
17297 if (CONST_INT_P (operands[2]))
17299 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17300 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17302 if (count == single_width * 2 - 1)
17304 emit_move_insn (high[0], high[1]);
17305 emit_insn ((mode == DImode
17307 : gen_ashrdi3) (high[0], high[0],
17308 GEN_INT (single_width - 1)));
17309 emit_move_insn (low[0], high[0]);
17312 else if (count >= single_width)
17314 emit_move_insn (low[0], high[1]);
17315 emit_move_insn (high[0], low[0]);
17316 emit_insn ((mode == DImode
17318 : gen_ashrdi3) (high[0], high[0],
17319 GEN_INT (single_width - 1)));
17320 if (count > single_width)
17321 emit_insn ((mode == DImode
17323 : gen_ashrdi3) (low[0], low[0],
17324 GEN_INT (count - single_width)));
17328 if (!rtx_equal_p (operands[0], operands[1]))
17329 emit_move_insn (operands[0], operands[1]);
17330 emit_insn ((mode == DImode
17332 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17333 emit_insn ((mode == DImode
17335 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17340 if (!rtx_equal_p (operands[0], operands[1]))
17341 emit_move_insn (operands[0], operands[1]);
17343 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17345 emit_insn ((mode == DImode
17347 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17348 emit_insn ((mode == DImode
17350 : gen_ashrdi3) (high[0], high[0], operands[2]));
17352 if (TARGET_CMOVE && scratch)
17354 emit_move_insn (scratch, high[0]);
17355 emit_insn ((mode == DImode
17357 : gen_ashrdi3) (scratch, scratch,
17358 GEN_INT (single_width - 1)));
17359 emit_insn ((mode == DImode
17360 ? gen_x86_shift_adj_1
17361 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17365 emit_insn ((mode == DImode
17366 ? gen_x86_shift_adj_3
17367 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17372 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17374 rtx low[2], high[2];
17376 const int single_width = mode == DImode ? 32 : 64;
17378 if (CONST_INT_P (operands[2]))
17380 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17381 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17383 if (count >= single_width)
17385 emit_move_insn (low[0], high[1]);
17386 ix86_expand_clear (high[0]);
17388 if (count > single_width)
17389 emit_insn ((mode == DImode
17391 : gen_lshrdi3) (low[0], low[0],
17392 GEN_INT (count - single_width)));
17396 if (!rtx_equal_p (operands[0], operands[1]))
17397 emit_move_insn (operands[0], operands[1]);
17398 emit_insn ((mode == DImode
17400 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17401 emit_insn ((mode == DImode
17403 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17408 if (!rtx_equal_p (operands[0], operands[1]))
17409 emit_move_insn (operands[0], operands[1]);
17411 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17413 emit_insn ((mode == DImode
17415 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17416 emit_insn ((mode == DImode
17418 : gen_lshrdi3) (high[0], high[0], operands[2]));
17420 /* Heh. By reversing the arguments, we can reuse this pattern. */
17421 if (TARGET_CMOVE && scratch)
17423 ix86_expand_clear (scratch);
17424 emit_insn ((mode == DImode
17425 ? gen_x86_shift_adj_1
17426 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17430 emit_insn ((mode == DImode
17431 ? gen_x86_shift_adj_2
17432 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17436 /* Predict just emitted jump instruction to be taken with probability PROB. */
17438 predict_jump (int prob)
17440 rtx insn = get_last_insn ();
17441 gcc_assert (JUMP_P (insn));
17442 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17445 /* Helper function for the string operations below. Dest VARIABLE whether
17446 it is aligned to VALUE bytes. If true, jump to the label. */
17448 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17450 rtx label = gen_label_rtx ();
17451 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17452 if (GET_MODE (variable) == DImode)
17453 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17455 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17456 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17459 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17461 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17465 /* Adjust COUNTER by the VALUE. */
17467 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17469 if (GET_MODE (countreg) == DImode)
17470 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17472 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17475 /* Zero extend possibly SImode EXP to Pmode register. */
17477 ix86_zero_extend_to_Pmode (rtx exp)
17480 if (GET_MODE (exp) == VOIDmode)
17481 return force_reg (Pmode, exp);
17482 if (GET_MODE (exp) == Pmode)
17483 return copy_to_mode_reg (Pmode, exp);
17484 r = gen_reg_rtx (Pmode);
17485 emit_insn (gen_zero_extendsidi2 (r, exp));
17489 /* Divide COUNTREG by SCALE. */
17491 scale_counter (rtx countreg, int scale)
17494 rtx piece_size_mask;
17498 if (CONST_INT_P (countreg))
17499 return GEN_INT (INTVAL (countreg) / scale);
17500 gcc_assert (REG_P (countreg));
17502 piece_size_mask = GEN_INT (scale - 1);
17503 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17504 GEN_INT (exact_log2 (scale)),
17505 NULL, 1, OPTAB_DIRECT);
17509 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17510 DImode for constant loop counts. */
17512 static enum machine_mode
17513 counter_mode (rtx count_exp)
17515 if (GET_MODE (count_exp) != VOIDmode)
17516 return GET_MODE (count_exp);
17517 if (!CONST_INT_P (count_exp))
17519 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17524 /* When SRCPTR is non-NULL, output simple loop to move memory
17525 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17526 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17527 equivalent loop to set memory by VALUE (supposed to be in MODE).
17529 The size is rounded down to whole number of chunk size moved at once.
17530 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17534 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17535 rtx destptr, rtx srcptr, rtx value,
17536 rtx count, enum machine_mode mode, int unroll,
17539 rtx out_label, top_label, iter, tmp;
17540 enum machine_mode iter_mode = counter_mode (count);
17541 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17542 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17548 top_label = gen_label_rtx ();
17549 out_label = gen_label_rtx ();
17550 iter = gen_reg_rtx (iter_mode);
17552 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17553 NULL, 1, OPTAB_DIRECT);
17554 /* Those two should combine. */
17555 if (piece_size == const1_rtx)
17557 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17559 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17561 emit_move_insn (iter, const0_rtx);
17563 emit_label (top_label);
17565 tmp = convert_modes (Pmode, iter_mode, iter, true);
17566 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17567 destmem = change_address (destmem, mode, x_addr);
17571 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17572 srcmem = change_address (srcmem, mode, y_addr);
17574 /* When unrolling for chips that reorder memory reads and writes,
17575 we can save registers by using single temporary.
17576 Also using 4 temporaries is overkill in 32bit mode. */
17577 if (!TARGET_64BIT && 0)
17579 for (i = 0; i < unroll; i++)
17584 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17586 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17588 emit_move_insn (destmem, srcmem);
17594 gcc_assert (unroll <= 4);
17595 for (i = 0; i < unroll; i++)
17597 tmpreg[i] = gen_reg_rtx (mode);
17601 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17603 emit_move_insn (tmpreg[i], srcmem);
17605 for (i = 0; i < unroll; i++)
17610 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17612 emit_move_insn (destmem, tmpreg[i]);
17617 for (i = 0; i < unroll; i++)
17621 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17622 emit_move_insn (destmem, value);
17625 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17626 true, OPTAB_LIB_WIDEN);
17628 emit_move_insn (iter, tmp);
17630 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17632 if (expected_size != -1)
17634 expected_size /= GET_MODE_SIZE (mode) * unroll;
17635 if (expected_size == 0)
17637 else if (expected_size > REG_BR_PROB_BASE)
17638 predict_jump (REG_BR_PROB_BASE - 1);
17640 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17643 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17644 iter = ix86_zero_extend_to_Pmode (iter);
17645 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17646 true, OPTAB_LIB_WIDEN);
17647 if (tmp != destptr)
17648 emit_move_insn (destptr, tmp);
17651 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17652 true, OPTAB_LIB_WIDEN);
17654 emit_move_insn (srcptr, tmp);
17656 emit_label (out_label);
17659 /* Output "rep; mov" instruction.
17660 Arguments have same meaning as for previous function */
17662 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17663 rtx destptr, rtx srcptr,
17665 enum machine_mode mode)
17671 /* If the size is known, it is shorter to use rep movs. */
17672 if (mode == QImode && CONST_INT_P (count)
17673 && !(INTVAL (count) & 3))
17676 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17677 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17678 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17679 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17680 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17681 if (mode != QImode)
17683 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17684 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17685 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17686 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17687 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17688 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17692 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17693 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17695 if (CONST_INT_P (count))
17697 count = GEN_INT (INTVAL (count)
17698 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17699 destmem = shallow_copy_rtx (destmem);
17700 srcmem = shallow_copy_rtx (srcmem);
17701 set_mem_size (destmem, count);
17702 set_mem_size (srcmem, count);
17706 if (MEM_SIZE (destmem))
17707 set_mem_size (destmem, NULL_RTX);
17708 if (MEM_SIZE (srcmem))
17709 set_mem_size (srcmem, NULL_RTX);
17711 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17715 /* Output "rep; stos" instruction.
17716 Arguments have same meaning as for previous function */
17718 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17719 rtx count, enum machine_mode mode,
17725 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17726 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17727 value = force_reg (mode, gen_lowpart (mode, value));
17728 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17729 if (mode != QImode)
17731 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17732 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17733 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17736 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17737 if (orig_value == const0_rtx && CONST_INT_P (count))
17739 count = GEN_INT (INTVAL (count)
17740 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17741 destmem = shallow_copy_rtx (destmem);
17742 set_mem_size (destmem, count);
17744 else if (MEM_SIZE (destmem))
17745 set_mem_size (destmem, NULL_RTX);
17746 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17750 emit_strmov (rtx destmem, rtx srcmem,
17751 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17753 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17754 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17755 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17758 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17760 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17761 rtx destptr, rtx srcptr, rtx count, int max_size)
17764 if (CONST_INT_P (count))
17766 HOST_WIDE_INT countval = INTVAL (count);
17769 if ((countval & 0x10) && max_size > 16)
17773 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17774 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17777 gcc_unreachable ();
17780 if ((countval & 0x08) && max_size > 8)
17783 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17786 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17787 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17791 if ((countval & 0x04) && max_size > 4)
17793 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17796 if ((countval & 0x02) && max_size > 2)
17798 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17801 if ((countval & 0x01) && max_size > 1)
17803 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17810 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17811 count, 1, OPTAB_DIRECT);
17812 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17813 count, QImode, 1, 4);
17817 /* When there are stringops, we can cheaply increase dest and src pointers.
17818 Otherwise we save code size by maintaining offset (zero is readily
17819 available from preceding rep operation) and using x86 addressing modes.
17821 if (TARGET_SINGLE_STRINGOP)
17825 rtx label = ix86_expand_aligntest (count, 4, true);
17826 src = change_address (srcmem, SImode, srcptr);
17827 dest = change_address (destmem, SImode, destptr);
17828 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17829 emit_label (label);
17830 LABEL_NUSES (label) = 1;
17834 rtx label = ix86_expand_aligntest (count, 2, true);
17835 src = change_address (srcmem, HImode, srcptr);
17836 dest = change_address (destmem, HImode, destptr);
17837 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17838 emit_label (label);
17839 LABEL_NUSES (label) = 1;
17843 rtx label = ix86_expand_aligntest (count, 1, true);
17844 src = change_address (srcmem, QImode, srcptr);
17845 dest = change_address (destmem, QImode, destptr);
17846 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17847 emit_label (label);
17848 LABEL_NUSES (label) = 1;
17853 rtx offset = force_reg (Pmode, const0_rtx);
17858 rtx label = ix86_expand_aligntest (count, 4, true);
17859 src = change_address (srcmem, SImode, srcptr);
17860 dest = change_address (destmem, SImode, destptr);
17861 emit_move_insn (dest, src);
17862 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17863 true, OPTAB_LIB_WIDEN);
17865 emit_move_insn (offset, tmp);
17866 emit_label (label);
17867 LABEL_NUSES (label) = 1;
17871 rtx label = ix86_expand_aligntest (count, 2, true);
17872 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17873 src = change_address (srcmem, HImode, tmp);
17874 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17875 dest = change_address (destmem, HImode, tmp);
17876 emit_move_insn (dest, src);
17877 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17878 true, OPTAB_LIB_WIDEN);
17880 emit_move_insn (offset, tmp);
17881 emit_label (label);
17882 LABEL_NUSES (label) = 1;
17886 rtx label = ix86_expand_aligntest (count, 1, true);
17887 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17888 src = change_address (srcmem, QImode, tmp);
17889 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17890 dest = change_address (destmem, QImode, tmp);
17891 emit_move_insn (dest, src);
17892 emit_label (label);
17893 LABEL_NUSES (label) = 1;
17898 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17900 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17901 rtx count, int max_size)
17904 expand_simple_binop (counter_mode (count), AND, count,
17905 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17906 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17907 gen_lowpart (QImode, value), count, QImode,
17911 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17913 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17917 if (CONST_INT_P (count))
17919 HOST_WIDE_INT countval = INTVAL (count);
17922 if ((countval & 0x10) && max_size > 16)
17926 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17927 emit_insn (gen_strset (destptr, dest, value));
17928 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17929 emit_insn (gen_strset (destptr, dest, value));
17932 gcc_unreachable ();
17935 if ((countval & 0x08) && max_size > 8)
17939 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17940 emit_insn (gen_strset (destptr, dest, value));
17944 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17945 emit_insn (gen_strset (destptr, dest, value));
17946 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17947 emit_insn (gen_strset (destptr, dest, value));
17951 if ((countval & 0x04) && max_size > 4)
17953 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17954 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17957 if ((countval & 0x02) && max_size > 2)
17959 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17960 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17963 if ((countval & 0x01) && max_size > 1)
17965 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17966 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17973 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17978 rtx label = ix86_expand_aligntest (count, 16, true);
17981 dest = change_address (destmem, DImode, destptr);
17982 emit_insn (gen_strset (destptr, dest, value));
17983 emit_insn (gen_strset (destptr, dest, value));
17987 dest = change_address (destmem, SImode, destptr);
17988 emit_insn (gen_strset (destptr, dest, value));
17989 emit_insn (gen_strset (destptr, dest, value));
17990 emit_insn (gen_strset (destptr, dest, value));
17991 emit_insn (gen_strset (destptr, dest, value));
17993 emit_label (label);
17994 LABEL_NUSES (label) = 1;
17998 rtx label = ix86_expand_aligntest (count, 8, true);
18001 dest = change_address (destmem, DImode, destptr);
18002 emit_insn (gen_strset (destptr, dest, value));
18006 dest = change_address (destmem, SImode, destptr);
18007 emit_insn (gen_strset (destptr, dest, value));
18008 emit_insn (gen_strset (destptr, dest, value));
18010 emit_label (label);
18011 LABEL_NUSES (label) = 1;
18015 rtx label = ix86_expand_aligntest (count, 4, true);
18016 dest = change_address (destmem, SImode, destptr);
18017 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18018 emit_label (label);
18019 LABEL_NUSES (label) = 1;
18023 rtx label = ix86_expand_aligntest (count, 2, true);
18024 dest = change_address (destmem, HImode, destptr);
18025 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18026 emit_label (label);
18027 LABEL_NUSES (label) = 1;
18031 rtx label = ix86_expand_aligntest (count, 1, true);
18032 dest = change_address (destmem, QImode, destptr);
18033 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18034 emit_label (label);
18035 LABEL_NUSES (label) = 1;
18039 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18040 DESIRED_ALIGNMENT. */
18042 expand_movmem_prologue (rtx destmem, rtx srcmem,
18043 rtx destptr, rtx srcptr, rtx count,
18044 int align, int desired_alignment)
18046 if (align <= 1 && desired_alignment > 1)
18048 rtx label = ix86_expand_aligntest (destptr, 1, false);
18049 srcmem = change_address (srcmem, QImode, srcptr);
18050 destmem = change_address (destmem, QImode, destptr);
18051 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18052 ix86_adjust_counter (count, 1);
18053 emit_label (label);
18054 LABEL_NUSES (label) = 1;
18056 if (align <= 2 && desired_alignment > 2)
18058 rtx label = ix86_expand_aligntest (destptr, 2, false);
18059 srcmem = change_address (srcmem, HImode, srcptr);
18060 destmem = change_address (destmem, HImode, destptr);
18061 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18062 ix86_adjust_counter (count, 2);
18063 emit_label (label);
18064 LABEL_NUSES (label) = 1;
18066 if (align <= 4 && desired_alignment > 4)
18068 rtx label = ix86_expand_aligntest (destptr, 4, false);
18069 srcmem = change_address (srcmem, SImode, srcptr);
18070 destmem = change_address (destmem, SImode, destptr);
18071 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18072 ix86_adjust_counter (count, 4);
18073 emit_label (label);
18074 LABEL_NUSES (label) = 1;
18076 gcc_assert (desired_alignment <= 8);
18079 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
18080 ALIGN_BYTES is how many bytes need to be copied. */
18082 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
18083 int desired_align, int align_bytes)
18086 rtx src_size, dst_size;
18088 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
18089 if (src_align_bytes >= 0)
18090 src_align_bytes = desired_align - src_align_bytes;
18091 src_size = MEM_SIZE (src);
18092 dst_size = MEM_SIZE (dst);
18093 if (align_bytes & 1)
18095 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18096 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
18098 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18100 if (align_bytes & 2)
18102 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18103 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
18104 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18105 set_mem_align (dst, 2 * BITS_PER_UNIT);
18106 if (src_align_bytes >= 0
18107 && (src_align_bytes & 1) == (align_bytes & 1)
18108 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18109 set_mem_align (src, 2 * BITS_PER_UNIT);
18111 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18113 if (align_bytes & 4)
18115 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18116 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18117 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18118 set_mem_align (dst, 4 * BITS_PER_UNIT);
18119 if (src_align_bytes >= 0)
18121 unsigned int src_align = 0;
18122 if ((src_align_bytes & 3) == (align_bytes & 3))
18124 else if ((src_align_bytes & 1) == (align_bytes & 1))
18126 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18127 set_mem_align (src, src_align * BITS_PER_UNIT);
18130 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18132 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18133 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18134 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18135 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18136 if (src_align_bytes >= 0)
18138 unsigned int src_align = 0;
18139 if ((src_align_bytes & 7) == (align_bytes & 7))
18141 else if ((src_align_bytes & 3) == (align_bytes & 3))
18143 else if ((src_align_bytes & 1) == (align_bytes & 1))
18145 if (src_align > (unsigned int) desired_align)
18146 src_align = desired_align;
18147 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18148 set_mem_align (src, src_align * BITS_PER_UNIT);
18151 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18153 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18158 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18159 DESIRED_ALIGNMENT. */
18161 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18162 int align, int desired_alignment)
18164 if (align <= 1 && desired_alignment > 1)
18166 rtx label = ix86_expand_aligntest (destptr, 1, false);
18167 destmem = change_address (destmem, QImode, destptr);
18168 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18169 ix86_adjust_counter (count, 1);
18170 emit_label (label);
18171 LABEL_NUSES (label) = 1;
18173 if (align <= 2 && desired_alignment > 2)
18175 rtx label = ix86_expand_aligntest (destptr, 2, false);
18176 destmem = change_address (destmem, HImode, destptr);
18177 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18178 ix86_adjust_counter (count, 2);
18179 emit_label (label);
18180 LABEL_NUSES (label) = 1;
18182 if (align <= 4 && desired_alignment > 4)
18184 rtx label = ix86_expand_aligntest (destptr, 4, false);
18185 destmem = change_address (destmem, SImode, destptr);
18186 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18187 ix86_adjust_counter (count, 4);
18188 emit_label (label);
18189 LABEL_NUSES (label) = 1;
18191 gcc_assert (desired_alignment <= 8);
18194 /* Set enough from DST to align DST known to by aligned by ALIGN to
18195 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
18197 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18198 int desired_align, int align_bytes)
18201 rtx dst_size = MEM_SIZE (dst);
18202 if (align_bytes & 1)
18204 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18206 emit_insn (gen_strset (destreg, dst,
18207 gen_lowpart (QImode, value)));
18209 if (align_bytes & 2)
18211 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18212 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18213 set_mem_align (dst, 2 * BITS_PER_UNIT);
18215 emit_insn (gen_strset (destreg, dst,
18216 gen_lowpart (HImode, value)));
18218 if (align_bytes & 4)
18220 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18221 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18222 set_mem_align (dst, 4 * BITS_PER_UNIT);
18224 emit_insn (gen_strset (destreg, dst,
18225 gen_lowpart (SImode, value)));
18227 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18228 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18229 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18231 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18235 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18236 static enum stringop_alg
18237 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18238 int *dynamic_check)
18240 const struct stringop_algs * algs;
18241 bool optimize_for_speed;
18242 /* Algorithms using the rep prefix want at least edi and ecx;
18243 additionally, memset wants eax and memcpy wants esi. Don't
18244 consider such algorithms if the user has appropriated those
18245 registers for their own purposes. */
18246 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18248 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18250 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18251 || (alg != rep_prefix_1_byte \
18252 && alg != rep_prefix_4_byte \
18253 && alg != rep_prefix_8_byte))
18254 const struct processor_costs *cost;
18256 /* Even if the string operation call is cold, we still might spend a lot
18257 of time processing large blocks. */
18258 if (optimize_function_for_size_p (cfun)
18259 || (optimize_insn_for_size_p ()
18260 && expected_size != -1 && expected_size < 256))
18261 optimize_for_speed = false;
18263 optimize_for_speed = true;
18265 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18267 *dynamic_check = -1;
18269 algs = &cost->memset[TARGET_64BIT != 0];
18271 algs = &cost->memcpy[TARGET_64BIT != 0];
18272 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18273 return stringop_alg;
18274 /* rep; movq or rep; movl is the smallest variant. */
18275 else if (!optimize_for_speed)
18277 if (!count || (count & 3))
18278 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18280 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18282 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18284 else if (expected_size != -1 && expected_size < 4)
18285 return loop_1_byte;
18286 else if (expected_size != -1)
18289 enum stringop_alg alg = libcall;
18290 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18292 /* We get here if the algorithms that were not libcall-based
18293 were rep-prefix based and we are unable to use rep prefixes
18294 based on global register usage. Break out of the loop and
18295 use the heuristic below. */
18296 if (algs->size[i].max == 0)
18298 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18300 enum stringop_alg candidate = algs->size[i].alg;
18302 if (candidate != libcall && ALG_USABLE_P (candidate))
18304 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18305 last non-libcall inline algorithm. */
18306 if (TARGET_INLINE_ALL_STRINGOPS)
18308 /* When the current size is best to be copied by a libcall,
18309 but we are still forced to inline, run the heuristic below
18310 that will pick code for medium sized blocks. */
18311 if (alg != libcall)
18315 else if (ALG_USABLE_P (candidate))
18319 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18321 /* When asked to inline the call anyway, try to pick meaningful choice.
18322 We look for maximal size of block that is faster to copy by hand and
18323 take blocks of at most of that size guessing that average size will
18324 be roughly half of the block.
18326 If this turns out to be bad, we might simply specify the preferred
18327 choice in ix86_costs. */
18328 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18329 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18332 enum stringop_alg alg;
18334 bool any_alg_usable_p = true;
18336 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18338 enum stringop_alg candidate = algs->size[i].alg;
18339 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18341 if (candidate != libcall && candidate
18342 && ALG_USABLE_P (candidate))
18343 max = algs->size[i].max;
18345 /* If there aren't any usable algorithms, then recursing on
18346 smaller sizes isn't going to find anything. Just return the
18347 simple byte-at-a-time copy loop. */
18348 if (!any_alg_usable_p)
18350 /* Pick something reasonable. */
18351 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18352 *dynamic_check = 128;
18353 return loop_1_byte;
18357 alg = decide_alg (count, max / 2, memset, dynamic_check);
18358 gcc_assert (*dynamic_check == -1);
18359 gcc_assert (alg != libcall);
18360 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18361 *dynamic_check = max;
18364 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18365 #undef ALG_USABLE_P
18368 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18369 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18371 decide_alignment (int align,
18372 enum stringop_alg alg,
18375 int desired_align = 0;
18379 gcc_unreachable ();
18381 case unrolled_loop:
18382 desired_align = GET_MODE_SIZE (Pmode);
18384 case rep_prefix_8_byte:
18387 case rep_prefix_4_byte:
18388 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18389 copying whole cacheline at once. */
18390 if (TARGET_PENTIUMPRO)
18395 case rep_prefix_1_byte:
18396 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18397 copying whole cacheline at once. */
18398 if (TARGET_PENTIUMPRO)
18412 if (desired_align < align)
18413 desired_align = align;
18414 if (expected_size != -1 && expected_size < 4)
18415 desired_align = align;
18416 return desired_align;
18419 /* Return the smallest power of 2 greater than VAL. */
18421 smallest_pow2_greater_than (int val)
18429 /* Expand string move (memcpy) operation. Use i386 string operations when
18430 profitable. expand_setmem contains similar code. The code depends upon
18431 architecture, block size and alignment, but always has the same
18434 1) Prologue guard: Conditional that jumps up to epilogues for small
18435 blocks that can be handled by epilogue alone. This is faster but
18436 also needed for correctness, since prologue assume the block is larger
18437 than the desired alignment.
18439 Optional dynamic check for size and libcall for large
18440 blocks is emitted here too, with -minline-stringops-dynamically.
18442 2) Prologue: copy first few bytes in order to get destination aligned
18443 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18444 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18445 We emit either a jump tree on power of two sized blocks, or a byte loop.
18447 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18448 with specified algorithm.
18450 4) Epilogue: code copying tail of the block that is too small to be
18451 handled by main body (or up to size guarded by prologue guard). */
18454 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18455 rtx expected_align_exp, rtx expected_size_exp)
18461 rtx jump_around_label = NULL;
18462 HOST_WIDE_INT align = 1;
18463 unsigned HOST_WIDE_INT count = 0;
18464 HOST_WIDE_INT expected_size = -1;
18465 int size_needed = 0, epilogue_size_needed;
18466 int desired_align = 0, align_bytes = 0;
18467 enum stringop_alg alg;
18469 bool need_zero_guard = false;
18471 if (CONST_INT_P (align_exp))
18472 align = INTVAL (align_exp);
18473 /* i386 can do misaligned access on reasonably increased cost. */
18474 if (CONST_INT_P (expected_align_exp)
18475 && INTVAL (expected_align_exp) > align)
18476 align = INTVAL (expected_align_exp);
18477 /* ALIGN is the minimum of destination and source alignment, but we care here
18478 just about destination alignment. */
18479 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18480 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18482 if (CONST_INT_P (count_exp))
18483 count = expected_size = INTVAL (count_exp);
18484 if (CONST_INT_P (expected_size_exp) && count == 0)
18485 expected_size = INTVAL (expected_size_exp);
18487 /* Make sure we don't need to care about overflow later on. */
18488 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18491 /* Step 0: Decide on preferred algorithm, desired alignment and
18492 size of chunks to be copied by main loop. */
18494 alg = decide_alg (count, expected_size, false, &dynamic_check);
18495 desired_align = decide_alignment (align, alg, expected_size);
18497 if (!TARGET_ALIGN_STRINGOPS)
18498 align = desired_align;
18500 if (alg == libcall)
18502 gcc_assert (alg != no_stringop);
18504 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18505 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18506 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18511 gcc_unreachable ();
18513 need_zero_guard = true;
18514 size_needed = GET_MODE_SIZE (Pmode);
18516 case unrolled_loop:
18517 need_zero_guard = true;
18518 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18520 case rep_prefix_8_byte:
18523 case rep_prefix_4_byte:
18526 case rep_prefix_1_byte:
18530 need_zero_guard = true;
18535 epilogue_size_needed = size_needed;
18537 /* Step 1: Prologue guard. */
18539 /* Alignment code needs count to be in register. */
18540 if (CONST_INT_P (count_exp) && desired_align > align)
18542 if (INTVAL (count_exp) > desired_align
18543 && INTVAL (count_exp) > size_needed)
18546 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18547 if (align_bytes <= 0)
18550 align_bytes = desired_align - align_bytes;
18552 if (align_bytes == 0)
18553 count_exp = force_reg (counter_mode (count_exp), count_exp);
18555 gcc_assert (desired_align >= 1 && align >= 1);
18557 /* Ensure that alignment prologue won't copy past end of block. */
18558 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18560 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18561 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18562 Make sure it is power of 2. */
18563 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18567 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18569 /* If main algorithm works on QImode, no epilogue is needed.
18570 For small sizes just don't align anything. */
18571 if (size_needed == 1)
18572 desired_align = align;
18579 label = gen_label_rtx ();
18580 emit_cmp_and_jump_insns (count_exp,
18581 GEN_INT (epilogue_size_needed),
18582 LTU, 0, counter_mode (count_exp), 1, label);
18583 if (expected_size == -1 || expected_size < epilogue_size_needed)
18584 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18586 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18590 /* Emit code to decide on runtime whether library call or inline should be
18592 if (dynamic_check != -1)
18594 if (CONST_INT_P (count_exp))
18596 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18598 emit_block_move_via_libcall (dst, src, count_exp, false);
18599 count_exp = const0_rtx;
18605 rtx hot_label = gen_label_rtx ();
18606 jump_around_label = gen_label_rtx ();
18607 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18608 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18609 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18610 emit_block_move_via_libcall (dst, src, count_exp, false);
18611 emit_jump (jump_around_label);
18612 emit_label (hot_label);
18616 /* Step 2: Alignment prologue. */
18618 if (desired_align > align)
18620 if (align_bytes == 0)
18622 /* Except for the first move in epilogue, we no longer know
18623 constant offset in aliasing info. It don't seems to worth
18624 the pain to maintain it for the first move, so throw away
18626 src = change_address (src, BLKmode, srcreg);
18627 dst = change_address (dst, BLKmode, destreg);
18628 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18633 /* If we know how many bytes need to be stored before dst is
18634 sufficiently aligned, maintain aliasing info accurately. */
18635 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18636 desired_align, align_bytes);
18637 count_exp = plus_constant (count_exp, -align_bytes);
18638 count -= align_bytes;
18640 if (need_zero_guard
18641 && (count < (unsigned HOST_WIDE_INT) size_needed
18642 || (align_bytes == 0
18643 && count < ((unsigned HOST_WIDE_INT) size_needed
18644 + desired_align - align))))
18646 /* It is possible that we copied enough so the main loop will not
18648 gcc_assert (size_needed > 1);
18649 if (label == NULL_RTX)
18650 label = gen_label_rtx ();
18651 emit_cmp_and_jump_insns (count_exp,
18652 GEN_INT (size_needed),
18653 LTU, 0, counter_mode (count_exp), 1, label);
18654 if (expected_size == -1
18655 || expected_size < (desired_align - align) / 2 + size_needed)
18656 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18658 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18661 if (label && size_needed == 1)
18663 emit_label (label);
18664 LABEL_NUSES (label) = 1;
18666 epilogue_size_needed = 1;
18668 else if (label == NULL_RTX)
18669 epilogue_size_needed = size_needed;
18671 /* Step 3: Main loop. */
18677 gcc_unreachable ();
18679 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18680 count_exp, QImode, 1, expected_size);
18683 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18684 count_exp, Pmode, 1, expected_size);
18686 case unrolled_loop:
18687 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18688 registers for 4 temporaries anyway. */
18689 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18690 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18693 case rep_prefix_8_byte:
18694 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18697 case rep_prefix_4_byte:
18698 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18701 case rep_prefix_1_byte:
18702 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18706 /* Adjust properly the offset of src and dest memory for aliasing. */
18707 if (CONST_INT_P (count_exp))
18709 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18710 (count / size_needed) * size_needed);
18711 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18712 (count / size_needed) * size_needed);
18716 src = change_address (src, BLKmode, srcreg);
18717 dst = change_address (dst, BLKmode, destreg);
18720 /* Step 4: Epilogue to copy the remaining bytes. */
18724 /* When the main loop is done, COUNT_EXP might hold original count,
18725 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18726 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18727 bytes. Compensate if needed. */
18729 if (size_needed < epilogue_size_needed)
18732 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18733 GEN_INT (size_needed - 1), count_exp, 1,
18735 if (tmp != count_exp)
18736 emit_move_insn (count_exp, tmp);
18738 emit_label (label);
18739 LABEL_NUSES (label) = 1;
18742 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18743 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18744 epilogue_size_needed);
18745 if (jump_around_label)
18746 emit_label (jump_around_label);
18750 /* Helper function for memcpy. For QImode value 0xXY produce
18751 0xXYXYXYXY of wide specified by MODE. This is essentially
18752 a * 0x10101010, but we can do slightly better than
18753 synth_mult by unwinding the sequence by hand on CPUs with
18756 promote_duplicated_reg (enum machine_mode mode, rtx val)
18758 enum machine_mode valmode = GET_MODE (val);
18760 int nops = mode == DImode ? 3 : 2;
18762 gcc_assert (mode == SImode || mode == DImode);
18763 if (val == const0_rtx)
18764 return copy_to_mode_reg (mode, const0_rtx);
18765 if (CONST_INT_P (val))
18767 HOST_WIDE_INT v = INTVAL (val) & 255;
18771 if (mode == DImode)
18772 v |= (v << 16) << 16;
18773 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18776 if (valmode == VOIDmode)
18778 if (valmode != QImode)
18779 val = gen_lowpart (QImode, val);
18780 if (mode == QImode)
18782 if (!TARGET_PARTIAL_REG_STALL)
18784 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18785 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18786 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18787 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18789 rtx reg = convert_modes (mode, QImode, val, true);
18790 tmp = promote_duplicated_reg (mode, const1_rtx);
18791 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18796 rtx reg = convert_modes (mode, QImode, val, true);
18798 if (!TARGET_PARTIAL_REG_STALL)
18799 if (mode == SImode)
18800 emit_insn (gen_movsi_insv_1 (reg, reg));
18802 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18805 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18806 NULL, 1, OPTAB_DIRECT);
18808 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18810 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18811 NULL, 1, OPTAB_DIRECT);
18812 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18813 if (mode == SImode)
18815 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18816 NULL, 1, OPTAB_DIRECT);
18817 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18822 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18823 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18824 alignment from ALIGN to DESIRED_ALIGN. */
18826 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18831 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18832 promoted_val = promote_duplicated_reg (DImode, val);
18833 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18834 promoted_val = promote_duplicated_reg (SImode, val);
18835 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18836 promoted_val = promote_duplicated_reg (HImode, val);
18838 promoted_val = val;
18840 return promoted_val;
18843 /* Expand string clear operation (bzero). Use i386 string operations when
18844 profitable. See expand_movmem comment for explanation of individual
18845 steps performed. */
18847 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18848 rtx expected_align_exp, rtx expected_size_exp)
18853 rtx jump_around_label = NULL;
18854 HOST_WIDE_INT align = 1;
18855 unsigned HOST_WIDE_INT count = 0;
18856 HOST_WIDE_INT expected_size = -1;
18857 int size_needed = 0, epilogue_size_needed;
18858 int desired_align = 0, align_bytes = 0;
18859 enum stringop_alg alg;
18860 rtx promoted_val = NULL;
18861 bool force_loopy_epilogue = false;
18863 bool need_zero_guard = false;
18865 if (CONST_INT_P (align_exp))
18866 align = INTVAL (align_exp);
18867 /* i386 can do misaligned access on reasonably increased cost. */
18868 if (CONST_INT_P (expected_align_exp)
18869 && INTVAL (expected_align_exp) > align)
18870 align = INTVAL (expected_align_exp);
18871 if (CONST_INT_P (count_exp))
18872 count = expected_size = INTVAL (count_exp);
18873 if (CONST_INT_P (expected_size_exp) && count == 0)
18874 expected_size = INTVAL (expected_size_exp);
18876 /* Make sure we don't need to care about overflow later on. */
18877 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18880 /* Step 0: Decide on preferred algorithm, desired alignment and
18881 size of chunks to be copied by main loop. */
18883 alg = decide_alg (count, expected_size, true, &dynamic_check);
18884 desired_align = decide_alignment (align, alg, expected_size);
18886 if (!TARGET_ALIGN_STRINGOPS)
18887 align = desired_align;
18889 if (alg == libcall)
18891 gcc_assert (alg != no_stringop);
18893 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18894 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18899 gcc_unreachable ();
18901 need_zero_guard = true;
18902 size_needed = GET_MODE_SIZE (Pmode);
18904 case unrolled_loop:
18905 need_zero_guard = true;
18906 size_needed = GET_MODE_SIZE (Pmode) * 4;
18908 case rep_prefix_8_byte:
18911 case rep_prefix_4_byte:
18914 case rep_prefix_1_byte:
18918 need_zero_guard = true;
18922 epilogue_size_needed = size_needed;
18924 /* Step 1: Prologue guard. */
18926 /* Alignment code needs count to be in register. */
18927 if (CONST_INT_P (count_exp) && desired_align > align)
18929 if (INTVAL (count_exp) > desired_align
18930 && INTVAL (count_exp) > size_needed)
18933 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18934 if (align_bytes <= 0)
18937 align_bytes = desired_align - align_bytes;
18939 if (align_bytes == 0)
18941 enum machine_mode mode = SImode;
18942 if (TARGET_64BIT && (count & ~0xffffffff))
18944 count_exp = force_reg (mode, count_exp);
18947 /* Do the cheap promotion to allow better CSE across the
18948 main loop and epilogue (ie one load of the big constant in the
18949 front of all code. */
18950 if (CONST_INT_P (val_exp))
18951 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18952 desired_align, align);
18953 /* Ensure that alignment prologue won't copy past end of block. */
18954 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18956 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18957 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18958 Make sure it is power of 2. */
18959 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18961 /* To improve performance of small blocks, we jump around the VAL
18962 promoting mode. This mean that if the promoted VAL is not constant,
18963 we might not use it in the epilogue and have to use byte
18965 if (epilogue_size_needed > 2 && !promoted_val)
18966 force_loopy_epilogue = true;
18969 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18971 /* If main algorithm works on QImode, no epilogue is needed.
18972 For small sizes just don't align anything. */
18973 if (size_needed == 1)
18974 desired_align = align;
18981 label = gen_label_rtx ();
18982 emit_cmp_and_jump_insns (count_exp,
18983 GEN_INT (epilogue_size_needed),
18984 LTU, 0, counter_mode (count_exp), 1, label);
18985 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18986 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18988 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18991 if (dynamic_check != -1)
18993 rtx hot_label = gen_label_rtx ();
18994 jump_around_label = gen_label_rtx ();
18995 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18996 LEU, 0, counter_mode (count_exp), 1, hot_label);
18997 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18998 set_storage_via_libcall (dst, count_exp, val_exp, false);
18999 emit_jump (jump_around_label);
19000 emit_label (hot_label);
19003 /* Step 2: Alignment prologue. */
19005 /* Do the expensive promotion once we branched off the small blocks. */
19007 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19008 desired_align, align);
19009 gcc_assert (desired_align >= 1 && align >= 1);
19011 if (desired_align > align)
19013 if (align_bytes == 0)
19015 /* Except for the first move in epilogue, we no longer know
19016 constant offset in aliasing info. It don't seems to worth
19017 the pain to maintain it for the first move, so throw away
19019 dst = change_address (dst, BLKmode, destreg);
19020 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19025 /* If we know how many bytes need to be stored before dst is
19026 sufficiently aligned, maintain aliasing info accurately. */
19027 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19028 desired_align, align_bytes);
19029 count_exp = plus_constant (count_exp, -align_bytes);
19030 count -= align_bytes;
19032 if (need_zero_guard
19033 && (count < (unsigned HOST_WIDE_INT) size_needed
19034 || (align_bytes == 0
19035 && count < ((unsigned HOST_WIDE_INT) size_needed
19036 + desired_align - align))))
19038 /* It is possible that we copied enough so the main loop will not
19040 gcc_assert (size_needed > 1);
19041 if (label == NULL_RTX)
19042 label = gen_label_rtx ();
19043 emit_cmp_and_jump_insns (count_exp,
19044 GEN_INT (size_needed),
19045 LTU, 0, counter_mode (count_exp), 1, label);
19046 if (expected_size == -1
19047 || expected_size < (desired_align - align) / 2 + size_needed)
19048 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19050 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19053 if (label && size_needed == 1)
19055 emit_label (label);
19056 LABEL_NUSES (label) = 1;
19058 promoted_val = val_exp;
19059 epilogue_size_needed = 1;
19061 else if (label == NULL_RTX)
19062 epilogue_size_needed = size_needed;
19064 /* Step 3: Main loop. */
19070 gcc_unreachable ();
19072 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19073 count_exp, QImode, 1, expected_size);
19076 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19077 count_exp, Pmode, 1, expected_size);
19079 case unrolled_loop:
19080 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19081 count_exp, Pmode, 4, expected_size);
19083 case rep_prefix_8_byte:
19084 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19087 case rep_prefix_4_byte:
19088 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19091 case rep_prefix_1_byte:
19092 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19096 /* Adjust properly the offset of src and dest memory for aliasing. */
19097 if (CONST_INT_P (count_exp))
19098 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19099 (count / size_needed) * size_needed);
19101 dst = change_address (dst, BLKmode, destreg);
19103 /* Step 4: Epilogue to copy the remaining bytes. */
19107 /* When the main loop is done, COUNT_EXP might hold original count,
19108 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19109 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19110 bytes. Compensate if needed. */
19112 if (size_needed < epilogue_size_needed)
19115 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19116 GEN_INT (size_needed - 1), count_exp, 1,
19118 if (tmp != count_exp)
19119 emit_move_insn (count_exp, tmp);
19121 emit_label (label);
19122 LABEL_NUSES (label) = 1;
19125 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19127 if (force_loopy_epilogue)
19128 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19129 epilogue_size_needed);
19131 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19132 epilogue_size_needed);
19134 if (jump_around_label)
19135 emit_label (jump_around_label);
19139 /* Expand the appropriate insns for doing strlen if not just doing
19142 out = result, initialized with the start address
19143 align_rtx = alignment of the address.
19144 scratch = scratch register, initialized with the startaddress when
19145 not aligned, otherwise undefined
19147 This is just the body. It needs the initializations mentioned above and
19148 some address computing at the end. These things are done in i386.md. */
19151 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19155 rtx align_2_label = NULL_RTX;
19156 rtx align_3_label = NULL_RTX;
19157 rtx align_4_label = gen_label_rtx ();
19158 rtx end_0_label = gen_label_rtx ();
19160 rtx tmpreg = gen_reg_rtx (SImode);
19161 rtx scratch = gen_reg_rtx (SImode);
19165 if (CONST_INT_P (align_rtx))
19166 align = INTVAL (align_rtx);
19168 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19170 /* Is there a known alignment and is it less than 4? */
19173 rtx scratch1 = gen_reg_rtx (Pmode);
19174 emit_move_insn (scratch1, out);
19175 /* Is there a known alignment and is it not 2? */
19178 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19179 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19181 /* Leave just the 3 lower bits. */
19182 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19183 NULL_RTX, 0, OPTAB_WIDEN);
19185 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19186 Pmode, 1, align_4_label);
19187 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19188 Pmode, 1, align_2_label);
19189 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19190 Pmode, 1, align_3_label);
19194 /* Since the alignment is 2, we have to check 2 or 0 bytes;
19195 check if is aligned to 4 - byte. */
19197 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19198 NULL_RTX, 0, OPTAB_WIDEN);
19200 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19201 Pmode, 1, align_4_label);
19204 mem = change_address (src, QImode, out);
19206 /* Now compare the bytes. */
19208 /* Compare the first n unaligned byte on a byte per byte basis. */
19209 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19210 QImode, 1, end_0_label);
19212 /* Increment the address. */
19213 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19215 /* Not needed with an alignment of 2 */
19218 emit_label (align_2_label);
19220 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19223 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19225 emit_label (align_3_label);
19228 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19231 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19234 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19235 align this loop. It gives only huge programs, but does not help to
19237 emit_label (align_4_label);
19239 mem = change_address (src, SImode, out);
19240 emit_move_insn (scratch, mem);
19241 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
19243 /* This formula yields a nonzero result iff one of the bytes is zero.
19244 This saves three branches inside loop and many cycles. */
19246 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19247 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19248 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19249 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19250 gen_int_mode (0x80808080, SImode)));
19251 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19256 rtx reg = gen_reg_rtx (SImode);
19257 rtx reg2 = gen_reg_rtx (Pmode);
19258 emit_move_insn (reg, tmpreg);
19259 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19261 /* If zero is not in the first two bytes, move two bytes forward. */
19262 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19263 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19264 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19265 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19266 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19269 /* Emit lea manually to avoid clobbering of flags. */
19270 emit_insn (gen_rtx_SET (SImode, reg2,
19271 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19273 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19274 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19275 emit_insn (gen_rtx_SET (VOIDmode, out,
19276 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19283 rtx end_2_label = gen_label_rtx ();
19284 /* Is zero in the first two bytes? */
19286 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19287 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19288 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19289 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19290 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19292 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19293 JUMP_LABEL (tmp) = end_2_label;
19295 /* Not in the first two. Move two bytes forward. */
19296 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19297 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19299 emit_label (end_2_label);
19303 /* Avoid branch in fixing the byte. */
19304 tmpreg = gen_lowpart (QImode, tmpreg);
19305 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19306 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19307 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19309 emit_label (end_0_label);
19312 /* Expand strlen. */
19315 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19317 rtx addr, scratch1, scratch2, scratch3, scratch4;
19319 /* The generic case of strlen expander is long. Avoid it's
19320 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19322 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19323 && !TARGET_INLINE_ALL_STRINGOPS
19324 && !optimize_insn_for_size_p ()
19325 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19328 addr = force_reg (Pmode, XEXP (src, 0));
19329 scratch1 = gen_reg_rtx (Pmode);
19331 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19332 && !optimize_insn_for_size_p ())
19334 /* Well it seems that some optimizer does not combine a call like
19335 foo(strlen(bar), strlen(bar));
19336 when the move and the subtraction is done here. It does calculate
19337 the length just once when these instructions are done inside of
19338 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19339 often used and I use one fewer register for the lifetime of
19340 output_strlen_unroll() this is better. */
19342 emit_move_insn (out, addr);
19344 ix86_expand_strlensi_unroll_1 (out, src, align);
19346 /* strlensi_unroll_1 returns the address of the zero at the end of
19347 the string, like memchr(), so compute the length by subtracting
19348 the start address. */
19349 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19355 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19356 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19359 scratch2 = gen_reg_rtx (Pmode);
19360 scratch3 = gen_reg_rtx (Pmode);
19361 scratch4 = force_reg (Pmode, constm1_rtx);
19363 emit_move_insn (scratch3, addr);
19364 eoschar = force_reg (QImode, eoschar);
19366 src = replace_equiv_address_nv (src, scratch3);
19368 /* If .md starts supporting :P, this can be done in .md. */
19369 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19370 scratch4), UNSPEC_SCAS);
19371 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19372 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19373 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19378 /* For given symbol (function) construct code to compute address of it's PLT
19379 entry in large x86-64 PIC model. */
19381 construct_plt_address (rtx symbol)
19383 rtx tmp = gen_reg_rtx (Pmode);
19384 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19386 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19387 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19389 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19390 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19395 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19397 rtx pop, int sibcall)
19399 rtx use = NULL, call;
19401 if (pop == const0_rtx)
19403 gcc_assert (!TARGET_64BIT || !pop);
19405 if (TARGET_MACHO && !TARGET_64BIT)
19408 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19409 fnaddr = machopic_indirect_call_target (fnaddr);
19414 /* Static functions and indirect calls don't need the pic register. */
19415 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19416 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19417 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19418 use_reg (&use, pic_offset_table_rtx);
19421 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19423 rtx al = gen_rtx_REG (QImode, AX_REG);
19424 emit_move_insn (al, callarg2);
19425 use_reg (&use, al);
19428 if (ix86_cmodel == CM_LARGE_PIC
19430 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19431 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19432 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19433 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19435 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19436 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19438 if (sibcall && TARGET_64BIT
19439 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19442 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19443 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19444 emit_move_insn (fnaddr, addr);
19445 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19448 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19450 call = gen_rtx_SET (VOIDmode, retval, call);
19453 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19454 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19455 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19458 && ix86_cfun_abi () == MS_ABI
19459 && (!callarg2 || INTVAL (callarg2) != -2))
19461 /* We need to represent that SI and DI registers are clobbered
19463 static int clobbered_registers[] = {
19464 XMM6_REG, XMM7_REG, XMM8_REG,
19465 XMM9_REG, XMM10_REG, XMM11_REG,
19466 XMM12_REG, XMM13_REG, XMM14_REG,
19467 XMM15_REG, SI_REG, DI_REG
19470 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19471 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19472 UNSPEC_MS_TO_SYSV_CALL);
19476 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19477 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19480 (SSE_REGNO_P (clobbered_registers[i])
19482 clobbered_registers[i]));
19484 call = gen_rtx_PARALLEL (VOIDmode,
19485 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19489 call = emit_call_insn (call);
19491 CALL_INSN_FUNCTION_USAGE (call) = use;
19495 /* Clear stack slot assignments remembered from previous functions.
19496 This is called from INIT_EXPANDERS once before RTL is emitted for each
19499 static struct machine_function *
19500 ix86_init_machine_status (void)
19502 struct machine_function *f;
19504 f = GGC_CNEW (struct machine_function);
19505 f->use_fast_prologue_epilogue_nregs = -1;
19506 f->tls_descriptor_call_expanded_p = 0;
19507 f->call_abi = ix86_abi;
19512 /* Return a MEM corresponding to a stack slot with mode MODE.
19513 Allocate a new slot if necessary.
19515 The RTL for a function can have several slots available: N is
19516 which slot to use. */
19519 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19521 struct stack_local_entry *s;
19523 gcc_assert (n < MAX_386_STACK_LOCALS);
19525 /* Virtual slot is valid only before vregs are instantiated. */
19526 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19528 for (s = ix86_stack_locals; s; s = s->next)
19529 if (s->mode == mode && s->n == n)
19530 return copy_rtx (s->rtl);
19532 s = (struct stack_local_entry *)
19533 ggc_alloc (sizeof (struct stack_local_entry));
19536 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19538 s->next = ix86_stack_locals;
19539 ix86_stack_locals = s;
19543 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19545 static GTY(()) rtx ix86_tls_symbol;
19547 ix86_tls_get_addr (void)
19550 if (!ix86_tls_symbol)
19552 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19553 (TARGET_ANY_GNU_TLS
19555 ? "___tls_get_addr"
19556 : "__tls_get_addr");
19559 return ix86_tls_symbol;
19562 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19564 static GTY(()) rtx ix86_tls_module_base_symbol;
19566 ix86_tls_module_base (void)
19569 if (!ix86_tls_module_base_symbol)
19571 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19572 "_TLS_MODULE_BASE_");
19573 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19574 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19577 return ix86_tls_module_base_symbol;
19580 /* Calculate the length of the memory address in the instruction
19581 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19584 memory_address_length (rtx addr)
19586 struct ix86_address parts;
19587 rtx base, index, disp;
19591 if (GET_CODE (addr) == PRE_DEC
19592 || GET_CODE (addr) == POST_INC
19593 || GET_CODE (addr) == PRE_MODIFY
19594 || GET_CODE (addr) == POST_MODIFY)
19597 ok = ix86_decompose_address (addr, &parts);
19600 if (parts.base && GET_CODE (parts.base) == SUBREG)
19601 parts.base = SUBREG_REG (parts.base);
19602 if (parts.index && GET_CODE (parts.index) == SUBREG)
19603 parts.index = SUBREG_REG (parts.index);
19606 index = parts.index;
19611 - esp as the base always wants an index,
19612 - ebp as the base always wants a displacement,
19613 - r12 as the base always wants an index,
19614 - r13 as the base always wants a displacement. */
19616 /* Register Indirect. */
19617 if (base && !index && !disp)
19619 /* esp (for its index) and ebp (for its displacement) need
19620 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19623 && (addr == arg_pointer_rtx
19624 || addr == frame_pointer_rtx
19625 || REGNO (addr) == SP_REG
19626 || REGNO (addr) == BP_REG
19627 || REGNO (addr) == R12_REG
19628 || REGNO (addr) == R13_REG))
19632 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
19633 is not disp32, but disp32(%rip), so for disp32
19634 SIB byte is needed, unless print_operand_address
19635 optimizes it into disp32(%rip) or (%rip) is implied
19637 else if (disp && !base && !index)
19644 if (GET_CODE (disp) == CONST)
19645 symbol = XEXP (disp, 0);
19646 if (GET_CODE (symbol) == PLUS
19647 && CONST_INT_P (XEXP (symbol, 1)))
19648 symbol = XEXP (symbol, 0);
19650 if (GET_CODE (symbol) != LABEL_REF
19651 && (GET_CODE (symbol) != SYMBOL_REF
19652 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19653 && (GET_CODE (symbol) != UNSPEC
19654 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19655 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19662 /* Find the length of the displacement constant. */
19665 if (base && satisfies_constraint_K (disp))
19670 /* ebp always wants a displacement. Similarly r13. */
19671 else if (REG_P (base)
19672 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19675 /* An index requires the two-byte modrm form.... */
19677 /* ...like esp (or r12), which always wants an index. */
19678 || base == arg_pointer_rtx
19679 || base == frame_pointer_rtx
19681 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19698 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19699 is set, expect that insn have 8bit immediate alternative. */
19701 ix86_attr_length_immediate_default (rtx insn, int shortform)
19705 extract_insn_cached (insn);
19706 for (i = recog_data.n_operands - 1; i >= 0; --i)
19707 if (CONSTANT_P (recog_data.operand[i]))
19709 enum attr_mode mode = get_attr_mode (insn);
19712 if (shortform && CONST_INT_P (recog_data.operand[i]))
19714 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19721 ival = trunc_int_for_mode (ival, HImode);
19724 ival = trunc_int_for_mode (ival, SImode);
19729 if (IN_RANGE (ival, -128, 127))
19746 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19751 fatal_insn ("unknown insn mode", insn);
19756 /* Compute default value for "length_address" attribute. */
19758 ix86_attr_length_address_default (rtx insn)
19762 if (get_attr_type (insn) == TYPE_LEA)
19764 rtx set = PATTERN (insn), addr;
19766 if (GET_CODE (set) == PARALLEL)
19767 set = XVECEXP (set, 0, 0);
19769 gcc_assert (GET_CODE (set) == SET);
19771 addr = SET_SRC (set);
19772 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19774 if (GET_CODE (addr) == ZERO_EXTEND)
19775 addr = XEXP (addr, 0);
19776 if (GET_CODE (addr) == SUBREG)
19777 addr = SUBREG_REG (addr);
19780 return memory_address_length (addr);
19783 extract_insn_cached (insn);
19784 for (i = recog_data.n_operands - 1; i >= 0; --i)
19785 if (MEM_P (recog_data.operand[i]))
19787 constrain_operands_cached (reload_completed);
19788 if (which_alternative != -1)
19790 const char *constraints = recog_data.constraints[i];
19791 int alt = which_alternative;
19793 while (*constraints == '=' || *constraints == '+')
19796 while (*constraints++ != ',')
19798 /* Skip ignored operands. */
19799 if (*constraints == 'X')
19802 return memory_address_length (XEXP (recog_data.operand[i], 0));
19807 /* Compute default value for "length_vex" attribute. It includes
19808 2 or 3 byte VEX prefix and 1 opcode byte. */
19811 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19816 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19817 byte VEX prefix. */
19818 if (!has_0f_opcode || has_vex_w)
19821 /* We can always use 2 byte VEX prefix in 32bit. */
19825 extract_insn_cached (insn);
19827 for (i = recog_data.n_operands - 1; i >= 0; --i)
19828 if (REG_P (recog_data.operand[i]))
19830 /* REX.W bit uses 3 byte VEX prefix. */
19831 if (GET_MODE (recog_data.operand[i]) == DImode
19832 && GENERAL_REG_P (recog_data.operand[i]))
19837 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19838 if (MEM_P (recog_data.operand[i])
19839 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19846 /* Return the maximum number of instructions a cpu can issue. */
19849 ix86_issue_rate (void)
19853 case PROCESSOR_PENTIUM:
19854 case PROCESSOR_ATOM:
19858 case PROCESSOR_PENTIUMPRO:
19859 case PROCESSOR_PENTIUM4:
19860 case PROCESSOR_ATHLON:
19862 case PROCESSOR_AMDFAM10:
19863 case PROCESSOR_NOCONA:
19864 case PROCESSOR_GENERIC32:
19865 case PROCESSOR_GENERIC64:
19868 case PROCESSOR_CORE2:
19876 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19877 by DEP_INSN and nothing set by DEP_INSN. */
19880 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19884 /* Simplify the test for uninteresting insns. */
19885 if (insn_type != TYPE_SETCC
19886 && insn_type != TYPE_ICMOV
19887 && insn_type != TYPE_FCMOV
19888 && insn_type != TYPE_IBR)
19891 if ((set = single_set (dep_insn)) != 0)
19893 set = SET_DEST (set);
19896 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19897 && XVECLEN (PATTERN (dep_insn), 0) == 2
19898 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19899 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19901 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19902 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19907 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19910 /* This test is true if the dependent insn reads the flags but
19911 not any other potentially set register. */
19912 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19915 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19921 /* Return true iff USE_INSN has a memory address with operands set by
19925 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19928 extract_insn_cached (use_insn);
19929 for (i = recog_data.n_operands - 1; i >= 0; --i)
19930 if (MEM_P (recog_data.operand[i]))
19932 rtx addr = XEXP (recog_data.operand[i], 0);
19933 return modified_in_p (addr, set_insn) != 0;
19939 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19941 enum attr_type insn_type, dep_insn_type;
19942 enum attr_memory memory;
19944 int dep_insn_code_number;
19946 /* Anti and output dependencies have zero cost on all CPUs. */
19947 if (REG_NOTE_KIND (link) != 0)
19950 dep_insn_code_number = recog_memoized (dep_insn);
19952 /* If we can't recognize the insns, we can't really do anything. */
19953 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19956 insn_type = get_attr_type (insn);
19957 dep_insn_type = get_attr_type (dep_insn);
19961 case PROCESSOR_PENTIUM:
19962 /* Address Generation Interlock adds a cycle of latency. */
19963 if (insn_type == TYPE_LEA)
19965 rtx addr = PATTERN (insn);
19967 if (GET_CODE (addr) == PARALLEL)
19968 addr = XVECEXP (addr, 0, 0);
19970 gcc_assert (GET_CODE (addr) == SET);
19972 addr = SET_SRC (addr);
19973 if (modified_in_p (addr, dep_insn))
19976 else if (ix86_agi_dependent (dep_insn, insn))
19979 /* ??? Compares pair with jump/setcc. */
19980 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19983 /* Floating point stores require value to be ready one cycle earlier. */
19984 if (insn_type == TYPE_FMOV
19985 && get_attr_memory (insn) == MEMORY_STORE
19986 && !ix86_agi_dependent (dep_insn, insn))
19990 case PROCESSOR_PENTIUMPRO:
19991 memory = get_attr_memory (insn);
19993 /* INT->FP conversion is expensive. */
19994 if (get_attr_fp_int_src (dep_insn))
19997 /* There is one cycle extra latency between an FP op and a store. */
19998 if (insn_type == TYPE_FMOV
19999 && (set = single_set (dep_insn)) != NULL_RTX
20000 && (set2 = single_set (insn)) != NULL_RTX
20001 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20002 && MEM_P (SET_DEST (set2)))
20005 /* Show ability of reorder buffer to hide latency of load by executing
20006 in parallel with previous instruction in case
20007 previous instruction is not needed to compute the address. */
20008 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20009 && !ix86_agi_dependent (dep_insn, insn))
20011 /* Claim moves to take one cycle, as core can issue one load
20012 at time and the next load can start cycle later. */
20013 if (dep_insn_type == TYPE_IMOV
20014 || dep_insn_type == TYPE_FMOV)
20022 memory = get_attr_memory (insn);
20024 /* The esp dependency is resolved before the instruction is really
20026 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20027 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20030 /* INT->FP conversion is expensive. */
20031 if (get_attr_fp_int_src (dep_insn))
20034 /* Show ability of reorder buffer to hide latency of load by executing
20035 in parallel with previous instruction in case
20036 previous instruction is not needed to compute the address. */
20037 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20038 && !ix86_agi_dependent (dep_insn, insn))
20040 /* Claim moves to take one cycle, as core can issue one load
20041 at time and the next load can start cycle later. */
20042 if (dep_insn_type == TYPE_IMOV
20043 || dep_insn_type == TYPE_FMOV)
20052 case PROCESSOR_ATHLON:
20054 case PROCESSOR_AMDFAM10:
20055 case PROCESSOR_ATOM:
20056 case PROCESSOR_GENERIC32:
20057 case PROCESSOR_GENERIC64:
20058 memory = get_attr_memory (insn);
20060 /* Show ability of reorder buffer to hide latency of load by executing
20061 in parallel with previous instruction in case
20062 previous instruction is not needed to compute the address. */
20063 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20064 && !ix86_agi_dependent (dep_insn, insn))
20066 enum attr_unit unit = get_attr_unit (insn);
20069 /* Because of the difference between the length of integer and
20070 floating unit pipeline preparation stages, the memory operands
20071 for floating point are cheaper.
20073 ??? For Athlon it the difference is most probably 2. */
20074 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
20077 loadcost = TARGET_ATHLON ? 2 : 0;
20079 if (cost >= loadcost)
20092 /* How many alternative schedules to try. This should be as wide as the
20093 scheduling freedom in the DFA, but no wider. Making this value too
20094 large results extra work for the scheduler. */
20097 ia32_multipass_dfa_lookahead (void)
20101 case PROCESSOR_PENTIUM:
20104 case PROCESSOR_PENTIUMPRO:
20114 /* Compute the alignment given to a constant that is being placed in memory.
20115 EXP is the constant and ALIGN is the alignment that the object would
20117 The value of this function is used instead of that alignment to align
20121 ix86_constant_alignment (tree exp, int align)
20123 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20124 || TREE_CODE (exp) == INTEGER_CST)
20126 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20128 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20131 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20132 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20133 return BITS_PER_WORD;
20138 /* Compute the alignment for a static variable.
20139 TYPE is the data type, and ALIGN is the alignment that
20140 the object would ordinarily have. The value of this function is used
20141 instead of that alignment to align the object. */
20144 ix86_data_alignment (tree type, int align)
20146 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20148 if (AGGREGATE_TYPE_P (type)
20149 && TYPE_SIZE (type)
20150 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20151 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20152 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20153 && align < max_align)
20156 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20157 to 16byte boundary. */
20160 if (AGGREGATE_TYPE_P (type)
20161 && TYPE_SIZE (type)
20162 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20163 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20164 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20168 if (TREE_CODE (type) == ARRAY_TYPE)
20170 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20172 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20175 else if (TREE_CODE (type) == COMPLEX_TYPE)
20178 if (TYPE_MODE (type) == DCmode && align < 64)
20180 if ((TYPE_MODE (type) == XCmode
20181 || TYPE_MODE (type) == TCmode) && align < 128)
20184 else if ((TREE_CODE (type) == RECORD_TYPE
20185 || TREE_CODE (type) == UNION_TYPE
20186 || TREE_CODE (type) == QUAL_UNION_TYPE)
20187 && TYPE_FIELDS (type))
20189 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20191 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20194 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20195 || TREE_CODE (type) == INTEGER_TYPE)
20197 if (TYPE_MODE (type) == DFmode && align < 64)
20199 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20206 /* Compute the alignment for a local variable or a stack slot. EXP is
20207 the data type or decl itself, MODE is the widest mode available and
20208 ALIGN is the alignment that the object would ordinarily have. The
20209 value of this macro is used instead of that alignment to align the
20213 ix86_local_alignment (tree exp, enum machine_mode mode,
20214 unsigned int align)
20218 if (exp && DECL_P (exp))
20220 type = TREE_TYPE (exp);
20229 /* Don't do dynamic stack realignment for long long objects with
20230 -mpreferred-stack-boundary=2. */
20233 && ix86_preferred_stack_boundary < 64
20234 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20235 && (!type || !TYPE_USER_ALIGN (type))
20236 && (!decl || !DECL_USER_ALIGN (decl)))
20239 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20240 register in MODE. We will return the largest alignment of XF
20244 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20245 align = GET_MODE_ALIGNMENT (DFmode);
20249 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20250 to 16byte boundary. */
20253 if (AGGREGATE_TYPE_P (type)
20254 && TYPE_SIZE (type)
20255 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20256 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20257 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20260 if (TREE_CODE (type) == ARRAY_TYPE)
20262 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20264 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20267 else if (TREE_CODE (type) == COMPLEX_TYPE)
20269 if (TYPE_MODE (type) == DCmode && align < 64)
20271 if ((TYPE_MODE (type) == XCmode
20272 || TYPE_MODE (type) == TCmode) && align < 128)
20275 else if ((TREE_CODE (type) == RECORD_TYPE
20276 || TREE_CODE (type) == UNION_TYPE
20277 || TREE_CODE (type) == QUAL_UNION_TYPE)
20278 && TYPE_FIELDS (type))
20280 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20282 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20285 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20286 || TREE_CODE (type) == INTEGER_TYPE)
20289 if (TYPE_MODE (type) == DFmode && align < 64)
20291 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20297 /* Emit RTL insns to initialize the variable parts of a trampoline.
20298 FNADDR is an RTX for the address of the function's pure code.
20299 CXT is an RTX for the static chain value for the function. */
20301 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
20305 /* Compute offset from the end of the jmp to the target function. */
20306 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
20307 plus_constant (tramp, 10),
20308 NULL_RTX, 1, OPTAB_DIRECT);
20309 emit_move_insn (gen_rtx_MEM (QImode, tramp),
20310 gen_int_mode (0xb9, QImode));
20311 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
20312 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
20313 gen_int_mode (0xe9, QImode));
20314 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
20319 /* Try to load address using shorter movl instead of movabs.
20320 We may want to support movq for kernel mode, but kernel does not use
20321 trampolines at the moment. */
20322 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20324 fnaddr = copy_to_mode_reg (DImode, fnaddr);
20325 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20326 gen_int_mode (0xbb41, HImode));
20327 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
20328 gen_lowpart (SImode, fnaddr));
20333 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20334 gen_int_mode (0xbb49, HImode));
20335 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20339 /* Load static chain using movabs to r10. */
20340 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20341 gen_int_mode (0xba49, HImode));
20342 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20345 /* Jump to the r11 */
20346 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20347 gen_int_mode (0xff49, HImode));
20348 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
20349 gen_int_mode (0xe3, QImode));
20351 gcc_assert (offset <= TRAMPOLINE_SIZE);
20354 #ifdef ENABLE_EXECUTE_STACK
20355 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20356 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
20360 /* Codes for all the SSE/MMX builtins. */
20363 IX86_BUILTIN_ADDPS,
20364 IX86_BUILTIN_ADDSS,
20365 IX86_BUILTIN_DIVPS,
20366 IX86_BUILTIN_DIVSS,
20367 IX86_BUILTIN_MULPS,
20368 IX86_BUILTIN_MULSS,
20369 IX86_BUILTIN_SUBPS,
20370 IX86_BUILTIN_SUBSS,
20372 IX86_BUILTIN_CMPEQPS,
20373 IX86_BUILTIN_CMPLTPS,
20374 IX86_BUILTIN_CMPLEPS,
20375 IX86_BUILTIN_CMPGTPS,
20376 IX86_BUILTIN_CMPGEPS,
20377 IX86_BUILTIN_CMPNEQPS,
20378 IX86_BUILTIN_CMPNLTPS,
20379 IX86_BUILTIN_CMPNLEPS,
20380 IX86_BUILTIN_CMPNGTPS,
20381 IX86_BUILTIN_CMPNGEPS,
20382 IX86_BUILTIN_CMPORDPS,
20383 IX86_BUILTIN_CMPUNORDPS,
20384 IX86_BUILTIN_CMPEQSS,
20385 IX86_BUILTIN_CMPLTSS,
20386 IX86_BUILTIN_CMPLESS,
20387 IX86_BUILTIN_CMPNEQSS,
20388 IX86_BUILTIN_CMPNLTSS,
20389 IX86_BUILTIN_CMPNLESS,
20390 IX86_BUILTIN_CMPNGTSS,
20391 IX86_BUILTIN_CMPNGESS,
20392 IX86_BUILTIN_CMPORDSS,
20393 IX86_BUILTIN_CMPUNORDSS,
20395 IX86_BUILTIN_COMIEQSS,
20396 IX86_BUILTIN_COMILTSS,
20397 IX86_BUILTIN_COMILESS,
20398 IX86_BUILTIN_COMIGTSS,
20399 IX86_BUILTIN_COMIGESS,
20400 IX86_BUILTIN_COMINEQSS,
20401 IX86_BUILTIN_UCOMIEQSS,
20402 IX86_BUILTIN_UCOMILTSS,
20403 IX86_BUILTIN_UCOMILESS,
20404 IX86_BUILTIN_UCOMIGTSS,
20405 IX86_BUILTIN_UCOMIGESS,
20406 IX86_BUILTIN_UCOMINEQSS,
20408 IX86_BUILTIN_CVTPI2PS,
20409 IX86_BUILTIN_CVTPS2PI,
20410 IX86_BUILTIN_CVTSI2SS,
20411 IX86_BUILTIN_CVTSI642SS,
20412 IX86_BUILTIN_CVTSS2SI,
20413 IX86_BUILTIN_CVTSS2SI64,
20414 IX86_BUILTIN_CVTTPS2PI,
20415 IX86_BUILTIN_CVTTSS2SI,
20416 IX86_BUILTIN_CVTTSS2SI64,
20418 IX86_BUILTIN_MAXPS,
20419 IX86_BUILTIN_MAXSS,
20420 IX86_BUILTIN_MINPS,
20421 IX86_BUILTIN_MINSS,
20423 IX86_BUILTIN_LOADUPS,
20424 IX86_BUILTIN_STOREUPS,
20425 IX86_BUILTIN_MOVSS,
20427 IX86_BUILTIN_MOVHLPS,
20428 IX86_BUILTIN_MOVLHPS,
20429 IX86_BUILTIN_LOADHPS,
20430 IX86_BUILTIN_LOADLPS,
20431 IX86_BUILTIN_STOREHPS,
20432 IX86_BUILTIN_STORELPS,
20434 IX86_BUILTIN_MASKMOVQ,
20435 IX86_BUILTIN_MOVMSKPS,
20436 IX86_BUILTIN_PMOVMSKB,
20438 IX86_BUILTIN_MOVNTPS,
20439 IX86_BUILTIN_MOVNTQ,
20441 IX86_BUILTIN_LOADDQU,
20442 IX86_BUILTIN_STOREDQU,
20444 IX86_BUILTIN_PACKSSWB,
20445 IX86_BUILTIN_PACKSSDW,
20446 IX86_BUILTIN_PACKUSWB,
20448 IX86_BUILTIN_PADDB,
20449 IX86_BUILTIN_PADDW,
20450 IX86_BUILTIN_PADDD,
20451 IX86_BUILTIN_PADDQ,
20452 IX86_BUILTIN_PADDSB,
20453 IX86_BUILTIN_PADDSW,
20454 IX86_BUILTIN_PADDUSB,
20455 IX86_BUILTIN_PADDUSW,
20456 IX86_BUILTIN_PSUBB,
20457 IX86_BUILTIN_PSUBW,
20458 IX86_BUILTIN_PSUBD,
20459 IX86_BUILTIN_PSUBQ,
20460 IX86_BUILTIN_PSUBSB,
20461 IX86_BUILTIN_PSUBSW,
20462 IX86_BUILTIN_PSUBUSB,
20463 IX86_BUILTIN_PSUBUSW,
20466 IX86_BUILTIN_PANDN,
20470 IX86_BUILTIN_PAVGB,
20471 IX86_BUILTIN_PAVGW,
20473 IX86_BUILTIN_PCMPEQB,
20474 IX86_BUILTIN_PCMPEQW,
20475 IX86_BUILTIN_PCMPEQD,
20476 IX86_BUILTIN_PCMPGTB,
20477 IX86_BUILTIN_PCMPGTW,
20478 IX86_BUILTIN_PCMPGTD,
20480 IX86_BUILTIN_PMADDWD,
20482 IX86_BUILTIN_PMAXSW,
20483 IX86_BUILTIN_PMAXUB,
20484 IX86_BUILTIN_PMINSW,
20485 IX86_BUILTIN_PMINUB,
20487 IX86_BUILTIN_PMULHUW,
20488 IX86_BUILTIN_PMULHW,
20489 IX86_BUILTIN_PMULLW,
20491 IX86_BUILTIN_PSADBW,
20492 IX86_BUILTIN_PSHUFW,
20494 IX86_BUILTIN_PSLLW,
20495 IX86_BUILTIN_PSLLD,
20496 IX86_BUILTIN_PSLLQ,
20497 IX86_BUILTIN_PSRAW,
20498 IX86_BUILTIN_PSRAD,
20499 IX86_BUILTIN_PSRLW,
20500 IX86_BUILTIN_PSRLD,
20501 IX86_BUILTIN_PSRLQ,
20502 IX86_BUILTIN_PSLLWI,
20503 IX86_BUILTIN_PSLLDI,
20504 IX86_BUILTIN_PSLLQI,
20505 IX86_BUILTIN_PSRAWI,
20506 IX86_BUILTIN_PSRADI,
20507 IX86_BUILTIN_PSRLWI,
20508 IX86_BUILTIN_PSRLDI,
20509 IX86_BUILTIN_PSRLQI,
20511 IX86_BUILTIN_PUNPCKHBW,
20512 IX86_BUILTIN_PUNPCKHWD,
20513 IX86_BUILTIN_PUNPCKHDQ,
20514 IX86_BUILTIN_PUNPCKLBW,
20515 IX86_BUILTIN_PUNPCKLWD,
20516 IX86_BUILTIN_PUNPCKLDQ,
20518 IX86_BUILTIN_SHUFPS,
20520 IX86_BUILTIN_RCPPS,
20521 IX86_BUILTIN_RCPSS,
20522 IX86_BUILTIN_RSQRTPS,
20523 IX86_BUILTIN_RSQRTPS_NR,
20524 IX86_BUILTIN_RSQRTSS,
20525 IX86_BUILTIN_RSQRTF,
20526 IX86_BUILTIN_SQRTPS,
20527 IX86_BUILTIN_SQRTPS_NR,
20528 IX86_BUILTIN_SQRTSS,
20530 IX86_BUILTIN_UNPCKHPS,
20531 IX86_BUILTIN_UNPCKLPS,
20533 IX86_BUILTIN_ANDPS,
20534 IX86_BUILTIN_ANDNPS,
20536 IX86_BUILTIN_XORPS,
20539 IX86_BUILTIN_LDMXCSR,
20540 IX86_BUILTIN_STMXCSR,
20541 IX86_BUILTIN_SFENCE,
20543 /* 3DNow! Original */
20544 IX86_BUILTIN_FEMMS,
20545 IX86_BUILTIN_PAVGUSB,
20546 IX86_BUILTIN_PF2ID,
20547 IX86_BUILTIN_PFACC,
20548 IX86_BUILTIN_PFADD,
20549 IX86_BUILTIN_PFCMPEQ,
20550 IX86_BUILTIN_PFCMPGE,
20551 IX86_BUILTIN_PFCMPGT,
20552 IX86_BUILTIN_PFMAX,
20553 IX86_BUILTIN_PFMIN,
20554 IX86_BUILTIN_PFMUL,
20555 IX86_BUILTIN_PFRCP,
20556 IX86_BUILTIN_PFRCPIT1,
20557 IX86_BUILTIN_PFRCPIT2,
20558 IX86_BUILTIN_PFRSQIT1,
20559 IX86_BUILTIN_PFRSQRT,
20560 IX86_BUILTIN_PFSUB,
20561 IX86_BUILTIN_PFSUBR,
20562 IX86_BUILTIN_PI2FD,
20563 IX86_BUILTIN_PMULHRW,
20565 /* 3DNow! Athlon Extensions */
20566 IX86_BUILTIN_PF2IW,
20567 IX86_BUILTIN_PFNACC,
20568 IX86_BUILTIN_PFPNACC,
20569 IX86_BUILTIN_PI2FW,
20570 IX86_BUILTIN_PSWAPDSI,
20571 IX86_BUILTIN_PSWAPDSF,
20574 IX86_BUILTIN_ADDPD,
20575 IX86_BUILTIN_ADDSD,
20576 IX86_BUILTIN_DIVPD,
20577 IX86_BUILTIN_DIVSD,
20578 IX86_BUILTIN_MULPD,
20579 IX86_BUILTIN_MULSD,
20580 IX86_BUILTIN_SUBPD,
20581 IX86_BUILTIN_SUBSD,
20583 IX86_BUILTIN_CMPEQPD,
20584 IX86_BUILTIN_CMPLTPD,
20585 IX86_BUILTIN_CMPLEPD,
20586 IX86_BUILTIN_CMPGTPD,
20587 IX86_BUILTIN_CMPGEPD,
20588 IX86_BUILTIN_CMPNEQPD,
20589 IX86_BUILTIN_CMPNLTPD,
20590 IX86_BUILTIN_CMPNLEPD,
20591 IX86_BUILTIN_CMPNGTPD,
20592 IX86_BUILTIN_CMPNGEPD,
20593 IX86_BUILTIN_CMPORDPD,
20594 IX86_BUILTIN_CMPUNORDPD,
20595 IX86_BUILTIN_CMPEQSD,
20596 IX86_BUILTIN_CMPLTSD,
20597 IX86_BUILTIN_CMPLESD,
20598 IX86_BUILTIN_CMPNEQSD,
20599 IX86_BUILTIN_CMPNLTSD,
20600 IX86_BUILTIN_CMPNLESD,
20601 IX86_BUILTIN_CMPORDSD,
20602 IX86_BUILTIN_CMPUNORDSD,
20604 IX86_BUILTIN_COMIEQSD,
20605 IX86_BUILTIN_COMILTSD,
20606 IX86_BUILTIN_COMILESD,
20607 IX86_BUILTIN_COMIGTSD,
20608 IX86_BUILTIN_COMIGESD,
20609 IX86_BUILTIN_COMINEQSD,
20610 IX86_BUILTIN_UCOMIEQSD,
20611 IX86_BUILTIN_UCOMILTSD,
20612 IX86_BUILTIN_UCOMILESD,
20613 IX86_BUILTIN_UCOMIGTSD,
20614 IX86_BUILTIN_UCOMIGESD,
20615 IX86_BUILTIN_UCOMINEQSD,
20617 IX86_BUILTIN_MAXPD,
20618 IX86_BUILTIN_MAXSD,
20619 IX86_BUILTIN_MINPD,
20620 IX86_BUILTIN_MINSD,
20622 IX86_BUILTIN_ANDPD,
20623 IX86_BUILTIN_ANDNPD,
20625 IX86_BUILTIN_XORPD,
20627 IX86_BUILTIN_SQRTPD,
20628 IX86_BUILTIN_SQRTSD,
20630 IX86_BUILTIN_UNPCKHPD,
20631 IX86_BUILTIN_UNPCKLPD,
20633 IX86_BUILTIN_SHUFPD,
20635 IX86_BUILTIN_LOADUPD,
20636 IX86_BUILTIN_STOREUPD,
20637 IX86_BUILTIN_MOVSD,
20639 IX86_BUILTIN_LOADHPD,
20640 IX86_BUILTIN_LOADLPD,
20642 IX86_BUILTIN_CVTDQ2PD,
20643 IX86_BUILTIN_CVTDQ2PS,
20645 IX86_BUILTIN_CVTPD2DQ,
20646 IX86_BUILTIN_CVTPD2PI,
20647 IX86_BUILTIN_CVTPD2PS,
20648 IX86_BUILTIN_CVTTPD2DQ,
20649 IX86_BUILTIN_CVTTPD2PI,
20651 IX86_BUILTIN_CVTPI2PD,
20652 IX86_BUILTIN_CVTSI2SD,
20653 IX86_BUILTIN_CVTSI642SD,
20655 IX86_BUILTIN_CVTSD2SI,
20656 IX86_BUILTIN_CVTSD2SI64,
20657 IX86_BUILTIN_CVTSD2SS,
20658 IX86_BUILTIN_CVTSS2SD,
20659 IX86_BUILTIN_CVTTSD2SI,
20660 IX86_BUILTIN_CVTTSD2SI64,
20662 IX86_BUILTIN_CVTPS2DQ,
20663 IX86_BUILTIN_CVTPS2PD,
20664 IX86_BUILTIN_CVTTPS2DQ,
20666 IX86_BUILTIN_MOVNTI,
20667 IX86_BUILTIN_MOVNTPD,
20668 IX86_BUILTIN_MOVNTDQ,
20670 IX86_BUILTIN_MOVQ128,
20673 IX86_BUILTIN_MASKMOVDQU,
20674 IX86_BUILTIN_MOVMSKPD,
20675 IX86_BUILTIN_PMOVMSKB128,
20677 IX86_BUILTIN_PACKSSWB128,
20678 IX86_BUILTIN_PACKSSDW128,
20679 IX86_BUILTIN_PACKUSWB128,
20681 IX86_BUILTIN_PADDB128,
20682 IX86_BUILTIN_PADDW128,
20683 IX86_BUILTIN_PADDD128,
20684 IX86_BUILTIN_PADDQ128,
20685 IX86_BUILTIN_PADDSB128,
20686 IX86_BUILTIN_PADDSW128,
20687 IX86_BUILTIN_PADDUSB128,
20688 IX86_BUILTIN_PADDUSW128,
20689 IX86_BUILTIN_PSUBB128,
20690 IX86_BUILTIN_PSUBW128,
20691 IX86_BUILTIN_PSUBD128,
20692 IX86_BUILTIN_PSUBQ128,
20693 IX86_BUILTIN_PSUBSB128,
20694 IX86_BUILTIN_PSUBSW128,
20695 IX86_BUILTIN_PSUBUSB128,
20696 IX86_BUILTIN_PSUBUSW128,
20698 IX86_BUILTIN_PAND128,
20699 IX86_BUILTIN_PANDN128,
20700 IX86_BUILTIN_POR128,
20701 IX86_BUILTIN_PXOR128,
20703 IX86_BUILTIN_PAVGB128,
20704 IX86_BUILTIN_PAVGW128,
20706 IX86_BUILTIN_PCMPEQB128,
20707 IX86_BUILTIN_PCMPEQW128,
20708 IX86_BUILTIN_PCMPEQD128,
20709 IX86_BUILTIN_PCMPGTB128,
20710 IX86_BUILTIN_PCMPGTW128,
20711 IX86_BUILTIN_PCMPGTD128,
20713 IX86_BUILTIN_PMADDWD128,
20715 IX86_BUILTIN_PMAXSW128,
20716 IX86_BUILTIN_PMAXUB128,
20717 IX86_BUILTIN_PMINSW128,
20718 IX86_BUILTIN_PMINUB128,
20720 IX86_BUILTIN_PMULUDQ,
20721 IX86_BUILTIN_PMULUDQ128,
20722 IX86_BUILTIN_PMULHUW128,
20723 IX86_BUILTIN_PMULHW128,
20724 IX86_BUILTIN_PMULLW128,
20726 IX86_BUILTIN_PSADBW128,
20727 IX86_BUILTIN_PSHUFHW,
20728 IX86_BUILTIN_PSHUFLW,
20729 IX86_BUILTIN_PSHUFD,
20731 IX86_BUILTIN_PSLLDQI128,
20732 IX86_BUILTIN_PSLLWI128,
20733 IX86_BUILTIN_PSLLDI128,
20734 IX86_BUILTIN_PSLLQI128,
20735 IX86_BUILTIN_PSRAWI128,
20736 IX86_BUILTIN_PSRADI128,
20737 IX86_BUILTIN_PSRLDQI128,
20738 IX86_BUILTIN_PSRLWI128,
20739 IX86_BUILTIN_PSRLDI128,
20740 IX86_BUILTIN_PSRLQI128,
20742 IX86_BUILTIN_PSLLDQ128,
20743 IX86_BUILTIN_PSLLW128,
20744 IX86_BUILTIN_PSLLD128,
20745 IX86_BUILTIN_PSLLQ128,
20746 IX86_BUILTIN_PSRAW128,
20747 IX86_BUILTIN_PSRAD128,
20748 IX86_BUILTIN_PSRLW128,
20749 IX86_BUILTIN_PSRLD128,
20750 IX86_BUILTIN_PSRLQ128,
20752 IX86_BUILTIN_PUNPCKHBW128,
20753 IX86_BUILTIN_PUNPCKHWD128,
20754 IX86_BUILTIN_PUNPCKHDQ128,
20755 IX86_BUILTIN_PUNPCKHQDQ128,
20756 IX86_BUILTIN_PUNPCKLBW128,
20757 IX86_BUILTIN_PUNPCKLWD128,
20758 IX86_BUILTIN_PUNPCKLDQ128,
20759 IX86_BUILTIN_PUNPCKLQDQ128,
20761 IX86_BUILTIN_CLFLUSH,
20762 IX86_BUILTIN_MFENCE,
20763 IX86_BUILTIN_LFENCE,
20766 IX86_BUILTIN_ADDSUBPS,
20767 IX86_BUILTIN_HADDPS,
20768 IX86_BUILTIN_HSUBPS,
20769 IX86_BUILTIN_MOVSHDUP,
20770 IX86_BUILTIN_MOVSLDUP,
20771 IX86_BUILTIN_ADDSUBPD,
20772 IX86_BUILTIN_HADDPD,
20773 IX86_BUILTIN_HSUBPD,
20774 IX86_BUILTIN_LDDQU,
20776 IX86_BUILTIN_MONITOR,
20777 IX86_BUILTIN_MWAIT,
20780 IX86_BUILTIN_PHADDW,
20781 IX86_BUILTIN_PHADDD,
20782 IX86_BUILTIN_PHADDSW,
20783 IX86_BUILTIN_PHSUBW,
20784 IX86_BUILTIN_PHSUBD,
20785 IX86_BUILTIN_PHSUBSW,
20786 IX86_BUILTIN_PMADDUBSW,
20787 IX86_BUILTIN_PMULHRSW,
20788 IX86_BUILTIN_PSHUFB,
20789 IX86_BUILTIN_PSIGNB,
20790 IX86_BUILTIN_PSIGNW,
20791 IX86_BUILTIN_PSIGND,
20792 IX86_BUILTIN_PALIGNR,
20793 IX86_BUILTIN_PABSB,
20794 IX86_BUILTIN_PABSW,
20795 IX86_BUILTIN_PABSD,
20797 IX86_BUILTIN_PHADDW128,
20798 IX86_BUILTIN_PHADDD128,
20799 IX86_BUILTIN_PHADDSW128,
20800 IX86_BUILTIN_PHSUBW128,
20801 IX86_BUILTIN_PHSUBD128,
20802 IX86_BUILTIN_PHSUBSW128,
20803 IX86_BUILTIN_PMADDUBSW128,
20804 IX86_BUILTIN_PMULHRSW128,
20805 IX86_BUILTIN_PSHUFB128,
20806 IX86_BUILTIN_PSIGNB128,
20807 IX86_BUILTIN_PSIGNW128,
20808 IX86_BUILTIN_PSIGND128,
20809 IX86_BUILTIN_PALIGNR128,
20810 IX86_BUILTIN_PABSB128,
20811 IX86_BUILTIN_PABSW128,
20812 IX86_BUILTIN_PABSD128,
20814 /* AMDFAM10 - SSE4A New Instructions. */
20815 IX86_BUILTIN_MOVNTSD,
20816 IX86_BUILTIN_MOVNTSS,
20817 IX86_BUILTIN_EXTRQI,
20818 IX86_BUILTIN_EXTRQ,
20819 IX86_BUILTIN_INSERTQI,
20820 IX86_BUILTIN_INSERTQ,
20823 IX86_BUILTIN_BLENDPD,
20824 IX86_BUILTIN_BLENDPS,
20825 IX86_BUILTIN_BLENDVPD,
20826 IX86_BUILTIN_BLENDVPS,
20827 IX86_BUILTIN_PBLENDVB128,
20828 IX86_BUILTIN_PBLENDW128,
20833 IX86_BUILTIN_INSERTPS128,
20835 IX86_BUILTIN_MOVNTDQA,
20836 IX86_BUILTIN_MPSADBW128,
20837 IX86_BUILTIN_PACKUSDW128,
20838 IX86_BUILTIN_PCMPEQQ,
20839 IX86_BUILTIN_PHMINPOSUW128,
20841 IX86_BUILTIN_PMAXSB128,
20842 IX86_BUILTIN_PMAXSD128,
20843 IX86_BUILTIN_PMAXUD128,
20844 IX86_BUILTIN_PMAXUW128,
20846 IX86_BUILTIN_PMINSB128,
20847 IX86_BUILTIN_PMINSD128,
20848 IX86_BUILTIN_PMINUD128,
20849 IX86_BUILTIN_PMINUW128,
20851 IX86_BUILTIN_PMOVSXBW128,
20852 IX86_BUILTIN_PMOVSXBD128,
20853 IX86_BUILTIN_PMOVSXBQ128,
20854 IX86_BUILTIN_PMOVSXWD128,
20855 IX86_BUILTIN_PMOVSXWQ128,
20856 IX86_BUILTIN_PMOVSXDQ128,
20858 IX86_BUILTIN_PMOVZXBW128,
20859 IX86_BUILTIN_PMOVZXBD128,
20860 IX86_BUILTIN_PMOVZXBQ128,
20861 IX86_BUILTIN_PMOVZXWD128,
20862 IX86_BUILTIN_PMOVZXWQ128,
20863 IX86_BUILTIN_PMOVZXDQ128,
20865 IX86_BUILTIN_PMULDQ128,
20866 IX86_BUILTIN_PMULLD128,
20868 IX86_BUILTIN_ROUNDPD,
20869 IX86_BUILTIN_ROUNDPS,
20870 IX86_BUILTIN_ROUNDSD,
20871 IX86_BUILTIN_ROUNDSS,
20873 IX86_BUILTIN_PTESTZ,
20874 IX86_BUILTIN_PTESTC,
20875 IX86_BUILTIN_PTESTNZC,
20877 IX86_BUILTIN_VEC_INIT_V2SI,
20878 IX86_BUILTIN_VEC_INIT_V4HI,
20879 IX86_BUILTIN_VEC_INIT_V8QI,
20880 IX86_BUILTIN_VEC_EXT_V2DF,
20881 IX86_BUILTIN_VEC_EXT_V2DI,
20882 IX86_BUILTIN_VEC_EXT_V4SF,
20883 IX86_BUILTIN_VEC_EXT_V4SI,
20884 IX86_BUILTIN_VEC_EXT_V8HI,
20885 IX86_BUILTIN_VEC_EXT_V2SI,
20886 IX86_BUILTIN_VEC_EXT_V4HI,
20887 IX86_BUILTIN_VEC_EXT_V16QI,
20888 IX86_BUILTIN_VEC_SET_V2DI,
20889 IX86_BUILTIN_VEC_SET_V4SF,
20890 IX86_BUILTIN_VEC_SET_V4SI,
20891 IX86_BUILTIN_VEC_SET_V8HI,
20892 IX86_BUILTIN_VEC_SET_V4HI,
20893 IX86_BUILTIN_VEC_SET_V16QI,
20895 IX86_BUILTIN_VEC_PACK_SFIX,
20898 IX86_BUILTIN_CRC32QI,
20899 IX86_BUILTIN_CRC32HI,
20900 IX86_BUILTIN_CRC32SI,
20901 IX86_BUILTIN_CRC32DI,
20903 IX86_BUILTIN_PCMPESTRI128,
20904 IX86_BUILTIN_PCMPESTRM128,
20905 IX86_BUILTIN_PCMPESTRA128,
20906 IX86_BUILTIN_PCMPESTRC128,
20907 IX86_BUILTIN_PCMPESTRO128,
20908 IX86_BUILTIN_PCMPESTRS128,
20909 IX86_BUILTIN_PCMPESTRZ128,
20910 IX86_BUILTIN_PCMPISTRI128,
20911 IX86_BUILTIN_PCMPISTRM128,
20912 IX86_BUILTIN_PCMPISTRA128,
20913 IX86_BUILTIN_PCMPISTRC128,
20914 IX86_BUILTIN_PCMPISTRO128,
20915 IX86_BUILTIN_PCMPISTRS128,
20916 IX86_BUILTIN_PCMPISTRZ128,
20918 IX86_BUILTIN_PCMPGTQ,
20920 /* AES instructions */
20921 IX86_BUILTIN_AESENC128,
20922 IX86_BUILTIN_AESENCLAST128,
20923 IX86_BUILTIN_AESDEC128,
20924 IX86_BUILTIN_AESDECLAST128,
20925 IX86_BUILTIN_AESIMC128,
20926 IX86_BUILTIN_AESKEYGENASSIST128,
20928 /* PCLMUL instruction */
20929 IX86_BUILTIN_PCLMULQDQ128,
20932 IX86_BUILTIN_ADDPD256,
20933 IX86_BUILTIN_ADDPS256,
20934 IX86_BUILTIN_ADDSUBPD256,
20935 IX86_BUILTIN_ADDSUBPS256,
20936 IX86_BUILTIN_ANDPD256,
20937 IX86_BUILTIN_ANDPS256,
20938 IX86_BUILTIN_ANDNPD256,
20939 IX86_BUILTIN_ANDNPS256,
20940 IX86_BUILTIN_BLENDPD256,
20941 IX86_BUILTIN_BLENDPS256,
20942 IX86_BUILTIN_BLENDVPD256,
20943 IX86_BUILTIN_BLENDVPS256,
20944 IX86_BUILTIN_DIVPD256,
20945 IX86_BUILTIN_DIVPS256,
20946 IX86_BUILTIN_DPPS256,
20947 IX86_BUILTIN_HADDPD256,
20948 IX86_BUILTIN_HADDPS256,
20949 IX86_BUILTIN_HSUBPD256,
20950 IX86_BUILTIN_HSUBPS256,
20951 IX86_BUILTIN_MAXPD256,
20952 IX86_BUILTIN_MAXPS256,
20953 IX86_BUILTIN_MINPD256,
20954 IX86_BUILTIN_MINPS256,
20955 IX86_BUILTIN_MULPD256,
20956 IX86_BUILTIN_MULPS256,
20957 IX86_BUILTIN_ORPD256,
20958 IX86_BUILTIN_ORPS256,
20959 IX86_BUILTIN_SHUFPD256,
20960 IX86_BUILTIN_SHUFPS256,
20961 IX86_BUILTIN_SUBPD256,
20962 IX86_BUILTIN_SUBPS256,
20963 IX86_BUILTIN_XORPD256,
20964 IX86_BUILTIN_XORPS256,
20965 IX86_BUILTIN_CMPSD,
20966 IX86_BUILTIN_CMPSS,
20967 IX86_BUILTIN_CMPPD,
20968 IX86_BUILTIN_CMPPS,
20969 IX86_BUILTIN_CMPPD256,
20970 IX86_BUILTIN_CMPPS256,
20971 IX86_BUILTIN_CVTDQ2PD256,
20972 IX86_BUILTIN_CVTDQ2PS256,
20973 IX86_BUILTIN_CVTPD2PS256,
20974 IX86_BUILTIN_CVTPS2DQ256,
20975 IX86_BUILTIN_CVTPS2PD256,
20976 IX86_BUILTIN_CVTTPD2DQ256,
20977 IX86_BUILTIN_CVTPD2DQ256,
20978 IX86_BUILTIN_CVTTPS2DQ256,
20979 IX86_BUILTIN_EXTRACTF128PD256,
20980 IX86_BUILTIN_EXTRACTF128PS256,
20981 IX86_BUILTIN_EXTRACTF128SI256,
20982 IX86_BUILTIN_VZEROALL,
20983 IX86_BUILTIN_VZEROUPPER,
20984 IX86_BUILTIN_VZEROUPPER_REX64,
20985 IX86_BUILTIN_VPERMILVARPD,
20986 IX86_BUILTIN_VPERMILVARPS,
20987 IX86_BUILTIN_VPERMILVARPD256,
20988 IX86_BUILTIN_VPERMILVARPS256,
20989 IX86_BUILTIN_VPERMILPD,
20990 IX86_BUILTIN_VPERMILPS,
20991 IX86_BUILTIN_VPERMILPD256,
20992 IX86_BUILTIN_VPERMILPS256,
20993 IX86_BUILTIN_VPERM2F128PD256,
20994 IX86_BUILTIN_VPERM2F128PS256,
20995 IX86_BUILTIN_VPERM2F128SI256,
20996 IX86_BUILTIN_VBROADCASTSS,
20997 IX86_BUILTIN_VBROADCASTSD256,
20998 IX86_BUILTIN_VBROADCASTSS256,
20999 IX86_BUILTIN_VBROADCASTPD256,
21000 IX86_BUILTIN_VBROADCASTPS256,
21001 IX86_BUILTIN_VINSERTF128PD256,
21002 IX86_BUILTIN_VINSERTF128PS256,
21003 IX86_BUILTIN_VINSERTF128SI256,
21004 IX86_BUILTIN_LOADUPD256,
21005 IX86_BUILTIN_LOADUPS256,
21006 IX86_BUILTIN_STOREUPD256,
21007 IX86_BUILTIN_STOREUPS256,
21008 IX86_BUILTIN_LDDQU256,
21009 IX86_BUILTIN_MOVNTDQ256,
21010 IX86_BUILTIN_MOVNTPD256,
21011 IX86_BUILTIN_MOVNTPS256,
21012 IX86_BUILTIN_LOADDQU256,
21013 IX86_BUILTIN_STOREDQU256,
21014 IX86_BUILTIN_MASKLOADPD,
21015 IX86_BUILTIN_MASKLOADPS,
21016 IX86_BUILTIN_MASKSTOREPD,
21017 IX86_BUILTIN_MASKSTOREPS,
21018 IX86_BUILTIN_MASKLOADPD256,
21019 IX86_BUILTIN_MASKLOADPS256,
21020 IX86_BUILTIN_MASKSTOREPD256,
21021 IX86_BUILTIN_MASKSTOREPS256,
21022 IX86_BUILTIN_MOVSHDUP256,
21023 IX86_BUILTIN_MOVSLDUP256,
21024 IX86_BUILTIN_MOVDDUP256,
21026 IX86_BUILTIN_SQRTPD256,
21027 IX86_BUILTIN_SQRTPS256,
21028 IX86_BUILTIN_SQRTPS_NR256,
21029 IX86_BUILTIN_RSQRTPS256,
21030 IX86_BUILTIN_RSQRTPS_NR256,
21032 IX86_BUILTIN_RCPPS256,
21034 IX86_BUILTIN_ROUNDPD256,
21035 IX86_BUILTIN_ROUNDPS256,
21037 IX86_BUILTIN_UNPCKHPD256,
21038 IX86_BUILTIN_UNPCKLPD256,
21039 IX86_BUILTIN_UNPCKHPS256,
21040 IX86_BUILTIN_UNPCKLPS256,
21042 IX86_BUILTIN_SI256_SI,
21043 IX86_BUILTIN_PS256_PS,
21044 IX86_BUILTIN_PD256_PD,
21045 IX86_BUILTIN_SI_SI256,
21046 IX86_BUILTIN_PS_PS256,
21047 IX86_BUILTIN_PD_PD256,
21049 IX86_BUILTIN_VTESTZPD,
21050 IX86_BUILTIN_VTESTCPD,
21051 IX86_BUILTIN_VTESTNZCPD,
21052 IX86_BUILTIN_VTESTZPS,
21053 IX86_BUILTIN_VTESTCPS,
21054 IX86_BUILTIN_VTESTNZCPS,
21055 IX86_BUILTIN_VTESTZPD256,
21056 IX86_BUILTIN_VTESTCPD256,
21057 IX86_BUILTIN_VTESTNZCPD256,
21058 IX86_BUILTIN_VTESTZPS256,
21059 IX86_BUILTIN_VTESTCPS256,
21060 IX86_BUILTIN_VTESTNZCPS256,
21061 IX86_BUILTIN_PTESTZ256,
21062 IX86_BUILTIN_PTESTC256,
21063 IX86_BUILTIN_PTESTNZC256,
21065 IX86_BUILTIN_MOVMSKPD256,
21066 IX86_BUILTIN_MOVMSKPS256,
21068 /* TFmode support builtins. */
21070 IX86_BUILTIN_HUGE_VALQ,
21071 IX86_BUILTIN_FABSQ,
21072 IX86_BUILTIN_COPYSIGNQ,
21074 /* SSE5 instructions */
21075 IX86_BUILTIN_FMADDSS,
21076 IX86_BUILTIN_FMADDSD,
21077 IX86_BUILTIN_FMADDPS,
21078 IX86_BUILTIN_FMADDPD,
21079 IX86_BUILTIN_FMSUBSS,
21080 IX86_BUILTIN_FMSUBSD,
21081 IX86_BUILTIN_FMSUBPS,
21082 IX86_BUILTIN_FMSUBPD,
21083 IX86_BUILTIN_FNMADDSS,
21084 IX86_BUILTIN_FNMADDSD,
21085 IX86_BUILTIN_FNMADDPS,
21086 IX86_BUILTIN_FNMADDPD,
21087 IX86_BUILTIN_FNMSUBSS,
21088 IX86_BUILTIN_FNMSUBSD,
21089 IX86_BUILTIN_FNMSUBPS,
21090 IX86_BUILTIN_FNMSUBPD,
21091 IX86_BUILTIN_PCMOV,
21092 IX86_BUILTIN_PCMOV_V2DI,
21093 IX86_BUILTIN_PCMOV_V4SI,
21094 IX86_BUILTIN_PCMOV_V8HI,
21095 IX86_BUILTIN_PCMOV_V16QI,
21096 IX86_BUILTIN_PCMOV_V4SF,
21097 IX86_BUILTIN_PCMOV_V2DF,
21098 IX86_BUILTIN_PPERM,
21099 IX86_BUILTIN_PERMPS,
21100 IX86_BUILTIN_PERMPD,
21101 IX86_BUILTIN_PMACSSWW,
21102 IX86_BUILTIN_PMACSWW,
21103 IX86_BUILTIN_PMACSSWD,
21104 IX86_BUILTIN_PMACSWD,
21105 IX86_BUILTIN_PMACSSDD,
21106 IX86_BUILTIN_PMACSDD,
21107 IX86_BUILTIN_PMACSSDQL,
21108 IX86_BUILTIN_PMACSSDQH,
21109 IX86_BUILTIN_PMACSDQL,
21110 IX86_BUILTIN_PMACSDQH,
21111 IX86_BUILTIN_PMADCSSWD,
21112 IX86_BUILTIN_PMADCSWD,
21113 IX86_BUILTIN_PHADDBW,
21114 IX86_BUILTIN_PHADDBD,
21115 IX86_BUILTIN_PHADDBQ,
21116 IX86_BUILTIN_PHADDWD,
21117 IX86_BUILTIN_PHADDWQ,
21118 IX86_BUILTIN_PHADDDQ,
21119 IX86_BUILTIN_PHADDUBW,
21120 IX86_BUILTIN_PHADDUBD,
21121 IX86_BUILTIN_PHADDUBQ,
21122 IX86_BUILTIN_PHADDUWD,
21123 IX86_BUILTIN_PHADDUWQ,
21124 IX86_BUILTIN_PHADDUDQ,
21125 IX86_BUILTIN_PHSUBBW,
21126 IX86_BUILTIN_PHSUBWD,
21127 IX86_BUILTIN_PHSUBDQ,
21128 IX86_BUILTIN_PROTB,
21129 IX86_BUILTIN_PROTW,
21130 IX86_BUILTIN_PROTD,
21131 IX86_BUILTIN_PROTQ,
21132 IX86_BUILTIN_PROTB_IMM,
21133 IX86_BUILTIN_PROTW_IMM,
21134 IX86_BUILTIN_PROTD_IMM,
21135 IX86_BUILTIN_PROTQ_IMM,
21136 IX86_BUILTIN_PSHLB,
21137 IX86_BUILTIN_PSHLW,
21138 IX86_BUILTIN_PSHLD,
21139 IX86_BUILTIN_PSHLQ,
21140 IX86_BUILTIN_PSHAB,
21141 IX86_BUILTIN_PSHAW,
21142 IX86_BUILTIN_PSHAD,
21143 IX86_BUILTIN_PSHAQ,
21144 IX86_BUILTIN_FRCZSS,
21145 IX86_BUILTIN_FRCZSD,
21146 IX86_BUILTIN_FRCZPS,
21147 IX86_BUILTIN_FRCZPD,
21148 IX86_BUILTIN_CVTPH2PS,
21149 IX86_BUILTIN_CVTPS2PH,
21151 IX86_BUILTIN_COMEQSS,
21152 IX86_BUILTIN_COMNESS,
21153 IX86_BUILTIN_COMLTSS,
21154 IX86_BUILTIN_COMLESS,
21155 IX86_BUILTIN_COMGTSS,
21156 IX86_BUILTIN_COMGESS,
21157 IX86_BUILTIN_COMUEQSS,
21158 IX86_BUILTIN_COMUNESS,
21159 IX86_BUILTIN_COMULTSS,
21160 IX86_BUILTIN_COMULESS,
21161 IX86_BUILTIN_COMUGTSS,
21162 IX86_BUILTIN_COMUGESS,
21163 IX86_BUILTIN_COMORDSS,
21164 IX86_BUILTIN_COMUNORDSS,
21165 IX86_BUILTIN_COMFALSESS,
21166 IX86_BUILTIN_COMTRUESS,
21168 IX86_BUILTIN_COMEQSD,
21169 IX86_BUILTIN_COMNESD,
21170 IX86_BUILTIN_COMLTSD,
21171 IX86_BUILTIN_COMLESD,
21172 IX86_BUILTIN_COMGTSD,
21173 IX86_BUILTIN_COMGESD,
21174 IX86_BUILTIN_COMUEQSD,
21175 IX86_BUILTIN_COMUNESD,
21176 IX86_BUILTIN_COMULTSD,
21177 IX86_BUILTIN_COMULESD,
21178 IX86_BUILTIN_COMUGTSD,
21179 IX86_BUILTIN_COMUGESD,
21180 IX86_BUILTIN_COMORDSD,
21181 IX86_BUILTIN_COMUNORDSD,
21182 IX86_BUILTIN_COMFALSESD,
21183 IX86_BUILTIN_COMTRUESD,
21185 IX86_BUILTIN_COMEQPS,
21186 IX86_BUILTIN_COMNEPS,
21187 IX86_BUILTIN_COMLTPS,
21188 IX86_BUILTIN_COMLEPS,
21189 IX86_BUILTIN_COMGTPS,
21190 IX86_BUILTIN_COMGEPS,
21191 IX86_BUILTIN_COMUEQPS,
21192 IX86_BUILTIN_COMUNEPS,
21193 IX86_BUILTIN_COMULTPS,
21194 IX86_BUILTIN_COMULEPS,
21195 IX86_BUILTIN_COMUGTPS,
21196 IX86_BUILTIN_COMUGEPS,
21197 IX86_BUILTIN_COMORDPS,
21198 IX86_BUILTIN_COMUNORDPS,
21199 IX86_BUILTIN_COMFALSEPS,
21200 IX86_BUILTIN_COMTRUEPS,
21202 IX86_BUILTIN_COMEQPD,
21203 IX86_BUILTIN_COMNEPD,
21204 IX86_BUILTIN_COMLTPD,
21205 IX86_BUILTIN_COMLEPD,
21206 IX86_BUILTIN_COMGTPD,
21207 IX86_BUILTIN_COMGEPD,
21208 IX86_BUILTIN_COMUEQPD,
21209 IX86_BUILTIN_COMUNEPD,
21210 IX86_BUILTIN_COMULTPD,
21211 IX86_BUILTIN_COMULEPD,
21212 IX86_BUILTIN_COMUGTPD,
21213 IX86_BUILTIN_COMUGEPD,
21214 IX86_BUILTIN_COMORDPD,
21215 IX86_BUILTIN_COMUNORDPD,
21216 IX86_BUILTIN_COMFALSEPD,
21217 IX86_BUILTIN_COMTRUEPD,
21219 IX86_BUILTIN_PCOMEQUB,
21220 IX86_BUILTIN_PCOMNEUB,
21221 IX86_BUILTIN_PCOMLTUB,
21222 IX86_BUILTIN_PCOMLEUB,
21223 IX86_BUILTIN_PCOMGTUB,
21224 IX86_BUILTIN_PCOMGEUB,
21225 IX86_BUILTIN_PCOMFALSEUB,
21226 IX86_BUILTIN_PCOMTRUEUB,
21227 IX86_BUILTIN_PCOMEQUW,
21228 IX86_BUILTIN_PCOMNEUW,
21229 IX86_BUILTIN_PCOMLTUW,
21230 IX86_BUILTIN_PCOMLEUW,
21231 IX86_BUILTIN_PCOMGTUW,
21232 IX86_BUILTIN_PCOMGEUW,
21233 IX86_BUILTIN_PCOMFALSEUW,
21234 IX86_BUILTIN_PCOMTRUEUW,
21235 IX86_BUILTIN_PCOMEQUD,
21236 IX86_BUILTIN_PCOMNEUD,
21237 IX86_BUILTIN_PCOMLTUD,
21238 IX86_BUILTIN_PCOMLEUD,
21239 IX86_BUILTIN_PCOMGTUD,
21240 IX86_BUILTIN_PCOMGEUD,
21241 IX86_BUILTIN_PCOMFALSEUD,
21242 IX86_BUILTIN_PCOMTRUEUD,
21243 IX86_BUILTIN_PCOMEQUQ,
21244 IX86_BUILTIN_PCOMNEUQ,
21245 IX86_BUILTIN_PCOMLTUQ,
21246 IX86_BUILTIN_PCOMLEUQ,
21247 IX86_BUILTIN_PCOMGTUQ,
21248 IX86_BUILTIN_PCOMGEUQ,
21249 IX86_BUILTIN_PCOMFALSEUQ,
21250 IX86_BUILTIN_PCOMTRUEUQ,
21252 IX86_BUILTIN_PCOMEQB,
21253 IX86_BUILTIN_PCOMNEB,
21254 IX86_BUILTIN_PCOMLTB,
21255 IX86_BUILTIN_PCOMLEB,
21256 IX86_BUILTIN_PCOMGTB,
21257 IX86_BUILTIN_PCOMGEB,
21258 IX86_BUILTIN_PCOMFALSEB,
21259 IX86_BUILTIN_PCOMTRUEB,
21260 IX86_BUILTIN_PCOMEQW,
21261 IX86_BUILTIN_PCOMNEW,
21262 IX86_BUILTIN_PCOMLTW,
21263 IX86_BUILTIN_PCOMLEW,
21264 IX86_BUILTIN_PCOMGTW,
21265 IX86_BUILTIN_PCOMGEW,
21266 IX86_BUILTIN_PCOMFALSEW,
21267 IX86_BUILTIN_PCOMTRUEW,
21268 IX86_BUILTIN_PCOMEQD,
21269 IX86_BUILTIN_PCOMNED,
21270 IX86_BUILTIN_PCOMLTD,
21271 IX86_BUILTIN_PCOMLED,
21272 IX86_BUILTIN_PCOMGTD,
21273 IX86_BUILTIN_PCOMGED,
21274 IX86_BUILTIN_PCOMFALSED,
21275 IX86_BUILTIN_PCOMTRUED,
21276 IX86_BUILTIN_PCOMEQQ,
21277 IX86_BUILTIN_PCOMNEQ,
21278 IX86_BUILTIN_PCOMLTQ,
21279 IX86_BUILTIN_PCOMLEQ,
21280 IX86_BUILTIN_PCOMGTQ,
21281 IX86_BUILTIN_PCOMGEQ,
21282 IX86_BUILTIN_PCOMFALSEQ,
21283 IX86_BUILTIN_PCOMTRUEQ,
21288 /* Table for the ix86 builtin decls. */
21289 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21291 /* Table of all of the builtin functions that are possible with different ISA's
21292 but are waiting to be built until a function is declared to use that
21294 struct GTY(()) builtin_isa {
21295 tree type; /* builtin type to use in the declaration */
21296 const char *name; /* function name */
21297 int isa; /* isa_flags this builtin is defined for */
21298 bool const_p; /* true if the declaration is constant */
21301 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21304 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
21305 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
21306 * function decl in the ix86_builtins array. Returns the function decl or
21307 * NULL_TREE, if the builtin was not added.
21309 * If the front end has a special hook for builtin functions, delay adding
21310 * builtin functions that aren't in the current ISA until the ISA is changed
21311 * with function specific optimization. Doing so, can save about 300K for the
21312 * default compiler. When the builtin is expanded, check at that time whether
21315 * If the front end doesn't have a special hook, record all builtins, even if
21316 * it isn't an instruction set in the current ISA in case the user uses
21317 * function specific options for a different ISA, so that we don't get scope
21318 * errors if a builtin is added in the middle of a function scope. */
21321 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
21323 tree decl = NULL_TREE;
21325 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21327 ix86_builtins_isa[(int) code].isa = mask;
21329 if ((mask & ix86_isa_flags) != 0
21330 || (lang_hooks.builtin_function
21331 == lang_hooks.builtin_function_ext_scope))
21334 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
21336 ix86_builtins[(int) code] = decl;
21337 ix86_builtins_isa[(int) code].type = NULL_TREE;
21341 ix86_builtins[(int) code] = NULL_TREE;
21342 ix86_builtins_isa[(int) code].const_p = false;
21343 ix86_builtins_isa[(int) code].type = type;
21344 ix86_builtins_isa[(int) code].name = name;
21351 /* Like def_builtin, but also marks the function decl "const". */
21354 def_builtin_const (int mask, const char *name, tree type,
21355 enum ix86_builtins code)
21357 tree decl = def_builtin (mask, name, type, code);
21359 TREE_READONLY (decl) = 1;
21361 ix86_builtins_isa[(int) code].const_p = true;
21366 /* Add any new builtin functions for a given ISA that may not have been
21367 declared. This saves a bit of space compared to adding all of the
21368 declarations to the tree, even if we didn't use them. */
21371 ix86_add_new_builtins (int isa)
21376 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21378 if ((ix86_builtins_isa[i].isa & isa) != 0
21379 && ix86_builtins_isa[i].type != NULL_TREE)
21381 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21382 ix86_builtins_isa[i].type,
21383 i, BUILT_IN_MD, NULL,
21386 ix86_builtins[i] = decl;
21387 ix86_builtins_isa[i].type = NULL_TREE;
21388 if (ix86_builtins_isa[i].const_p)
21389 TREE_READONLY (decl) = 1;
21394 /* Bits for builtin_description.flag. */
21396 /* Set when we don't support the comparison natively, and should
21397 swap_comparison in order to support it. */
21398 #define BUILTIN_DESC_SWAP_OPERANDS 1
21400 struct builtin_description
21402 const unsigned int mask;
21403 const enum insn_code icode;
21404 const char *const name;
21405 const enum ix86_builtins code;
21406 const enum rtx_code comparison;
21410 static const struct builtin_description bdesc_comi[] =
21412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21438 static const struct builtin_description bdesc_pcmpestr[] =
21441 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21442 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21443 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21444 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21445 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21446 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21447 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21450 static const struct builtin_description bdesc_pcmpistr[] =
21453 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21454 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21455 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21456 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21457 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21458 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21459 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21462 /* Special builtin types */
21463 enum ix86_special_builtin_type
21465 SPECIAL_FTYPE_UNKNOWN,
21467 V32QI_FTYPE_PCCHAR,
21468 V16QI_FTYPE_PCCHAR,
21470 V8SF_FTYPE_PCFLOAT,
21472 V4DF_FTYPE_PCDOUBLE,
21473 V4SF_FTYPE_PCFLOAT,
21474 V2DF_FTYPE_PCDOUBLE,
21475 V8SF_FTYPE_PCV8SF_V8SF,
21476 V4DF_FTYPE_PCV4DF_V4DF,
21477 V4SF_FTYPE_V4SF_PCV2SF,
21478 V4SF_FTYPE_PCV4SF_V4SF,
21479 V2DF_FTYPE_V2DF_PCDOUBLE,
21480 V2DF_FTYPE_PCV2DF_V2DF,
21482 VOID_FTYPE_PV2SF_V4SF,
21483 VOID_FTYPE_PV4DI_V4DI,
21484 VOID_FTYPE_PV2DI_V2DI,
21485 VOID_FTYPE_PCHAR_V32QI,
21486 VOID_FTYPE_PCHAR_V16QI,
21487 VOID_FTYPE_PFLOAT_V8SF,
21488 VOID_FTYPE_PFLOAT_V4SF,
21489 VOID_FTYPE_PDOUBLE_V4DF,
21490 VOID_FTYPE_PDOUBLE_V2DF,
21492 VOID_FTYPE_PINT_INT,
21493 VOID_FTYPE_PV8SF_V8SF_V8SF,
21494 VOID_FTYPE_PV4DF_V4DF_V4DF,
21495 VOID_FTYPE_PV4SF_V4SF_V4SF,
21496 VOID_FTYPE_PV2DF_V2DF_V2DF
21499 /* Builtin types */
21500 enum ix86_builtin_type
21503 FLOAT128_FTYPE_FLOAT128,
21505 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21506 INT_FTYPE_V8SF_V8SF_PTEST,
21507 INT_FTYPE_V4DI_V4DI_PTEST,
21508 INT_FTYPE_V4DF_V4DF_PTEST,
21509 INT_FTYPE_V4SF_V4SF_PTEST,
21510 INT_FTYPE_V2DI_V2DI_PTEST,
21511 INT_FTYPE_V2DF_V2DF_PTEST,
21543 V4SF_FTYPE_V4SF_VEC_MERGE,
21552 V2DF_FTYPE_V2DF_VEC_MERGE,
21563 V16QI_FTYPE_V16QI_V16QI,
21564 V16QI_FTYPE_V8HI_V8HI,
21565 V8QI_FTYPE_V8QI_V8QI,
21566 V8QI_FTYPE_V4HI_V4HI,
21567 V8HI_FTYPE_V8HI_V8HI,
21568 V8HI_FTYPE_V8HI_V8HI_COUNT,
21569 V8HI_FTYPE_V16QI_V16QI,
21570 V8HI_FTYPE_V4SI_V4SI,
21571 V8HI_FTYPE_V8HI_SI_COUNT,
21572 V8SF_FTYPE_V8SF_V8SF,
21573 V8SF_FTYPE_V8SF_V8SI,
21574 V4SI_FTYPE_V4SI_V4SI,
21575 V4SI_FTYPE_V4SI_V4SI_COUNT,
21576 V4SI_FTYPE_V8HI_V8HI,
21577 V4SI_FTYPE_V4SF_V4SF,
21578 V4SI_FTYPE_V2DF_V2DF,
21579 V4SI_FTYPE_V4SI_SI_COUNT,
21580 V4HI_FTYPE_V4HI_V4HI,
21581 V4HI_FTYPE_V4HI_V4HI_COUNT,
21582 V4HI_FTYPE_V8QI_V8QI,
21583 V4HI_FTYPE_V2SI_V2SI,
21584 V4HI_FTYPE_V4HI_SI_COUNT,
21585 V4DF_FTYPE_V4DF_V4DF,
21586 V4DF_FTYPE_V4DF_V4DI,
21587 V4SF_FTYPE_V4SF_V4SF,
21588 V4SF_FTYPE_V4SF_V4SF_SWAP,
21589 V4SF_FTYPE_V4SF_V4SI,
21590 V4SF_FTYPE_V4SF_V2SI,
21591 V4SF_FTYPE_V4SF_V2DF,
21592 V4SF_FTYPE_V4SF_DI,
21593 V4SF_FTYPE_V4SF_SI,
21594 V2DI_FTYPE_V2DI_V2DI,
21595 V2DI_FTYPE_V2DI_V2DI_COUNT,
21596 V2DI_FTYPE_V16QI_V16QI,
21597 V2DI_FTYPE_V4SI_V4SI,
21598 V2DI_FTYPE_V2DI_V16QI,
21599 V2DI_FTYPE_V2DF_V2DF,
21600 V2DI_FTYPE_V2DI_SI_COUNT,
21601 V2SI_FTYPE_V2SI_V2SI,
21602 V2SI_FTYPE_V2SI_V2SI_COUNT,
21603 V2SI_FTYPE_V4HI_V4HI,
21604 V2SI_FTYPE_V2SF_V2SF,
21605 V2SI_FTYPE_V2SI_SI_COUNT,
21606 V2DF_FTYPE_V2DF_V2DF,
21607 V2DF_FTYPE_V2DF_V2DF_SWAP,
21608 V2DF_FTYPE_V2DF_V4SF,
21609 V2DF_FTYPE_V2DF_V2DI,
21610 V2DF_FTYPE_V2DF_DI,
21611 V2DF_FTYPE_V2DF_SI,
21612 V2SF_FTYPE_V2SF_V2SF,
21613 V1DI_FTYPE_V1DI_V1DI,
21614 V1DI_FTYPE_V1DI_V1DI_COUNT,
21615 V1DI_FTYPE_V8QI_V8QI,
21616 V1DI_FTYPE_V2SI_V2SI,
21617 V1DI_FTYPE_V1DI_SI_COUNT,
21618 UINT64_FTYPE_UINT64_UINT64,
21619 UINT_FTYPE_UINT_UINT,
21620 UINT_FTYPE_UINT_USHORT,
21621 UINT_FTYPE_UINT_UCHAR,
21622 V8HI_FTYPE_V8HI_INT,
21623 V4SI_FTYPE_V4SI_INT,
21624 V4HI_FTYPE_V4HI_INT,
21625 V8SF_FTYPE_V8SF_INT,
21626 V4SI_FTYPE_V8SI_INT,
21627 V4SF_FTYPE_V8SF_INT,
21628 V2DF_FTYPE_V4DF_INT,
21629 V4DF_FTYPE_V4DF_INT,
21630 V4SF_FTYPE_V4SF_INT,
21631 V2DI_FTYPE_V2DI_INT,
21632 V2DI2TI_FTYPE_V2DI_INT,
21633 V2DF_FTYPE_V2DF_INT,
21634 V16QI_FTYPE_V16QI_V16QI_V16QI,
21635 V8SF_FTYPE_V8SF_V8SF_V8SF,
21636 V4DF_FTYPE_V4DF_V4DF_V4DF,
21637 V4SF_FTYPE_V4SF_V4SF_V4SF,
21638 V2DF_FTYPE_V2DF_V2DF_V2DF,
21639 V16QI_FTYPE_V16QI_V16QI_INT,
21640 V8SI_FTYPE_V8SI_V8SI_INT,
21641 V8SI_FTYPE_V8SI_V4SI_INT,
21642 V8HI_FTYPE_V8HI_V8HI_INT,
21643 V8SF_FTYPE_V8SF_V8SF_INT,
21644 V8SF_FTYPE_V8SF_V4SF_INT,
21645 V4SI_FTYPE_V4SI_V4SI_INT,
21646 V4DF_FTYPE_V4DF_V4DF_INT,
21647 V4DF_FTYPE_V4DF_V2DF_INT,
21648 V4SF_FTYPE_V4SF_V4SF_INT,
21649 V2DI_FTYPE_V2DI_V2DI_INT,
21650 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21651 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21652 V2DF_FTYPE_V2DF_V2DF_INT,
21653 V2DI_FTYPE_V2DI_UINT_UINT,
21654 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21657 /* Special builtins with variable number of arguments. */
21658 static const struct builtin_description bdesc_special_args[] =
21661 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21664 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21667 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21668 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21669 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21671 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21672 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21673 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21674 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21676 /* SSE or 3DNow!A */
21677 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21678 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21695 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21698 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21701 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21702 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21707 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21737 /* Builtins with variable number of arguments. */
21738 static const struct builtin_description bdesc_args[] =
21741 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21742 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21743 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21744 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21745 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21746 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21748 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21749 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21750 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21751 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21752 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21753 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21754 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21755 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21757 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21758 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21760 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21761 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21762 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21763 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21765 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21766 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21767 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21768 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21769 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21770 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21772 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21773 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21774 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21775 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21776 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21777 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21779 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21780 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21781 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21783 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21785 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21786 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21787 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21788 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21789 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21790 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21792 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21793 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21794 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21795 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21796 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21797 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21799 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21800 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21801 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21802 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21805 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21806 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21807 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21808 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21810 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21811 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21812 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21813 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21814 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21815 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21816 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21817 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21818 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21819 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21820 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21821 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21822 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21823 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21824 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21827 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21828 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21829 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21830 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21831 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21832 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21835 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21836 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21837 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21838 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21839 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21840 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21841 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21842 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21843 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21844 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21845 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21846 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21848 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21850 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21851 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21852 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21854 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21855 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21859 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21860 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21861 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21862 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21863 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21864 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21865 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21866 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21867 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21870 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21871 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21874 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21875 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21882 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21883 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21884 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21885 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21887 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21889 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21890 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21892 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21893 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21894 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21900 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21902 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21908 /* SSE MMX or 3Dnow!A */
21909 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21910 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21911 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21913 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21914 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21915 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21916 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21918 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21919 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21921 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21926 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21928 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21935 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21936 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21938 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21942 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21943 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21949 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21950 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21951 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21952 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21980 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21984 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21986 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21987 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21993 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21995 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21996 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21997 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21998 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21999 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22000 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22001 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22002 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22013 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22014 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
22016 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22018 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22019 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22031 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22032 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22033 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22049 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
22052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
22053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
22057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
22058 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
22059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
22060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
22062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
22063 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22064 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22065 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22066 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22067 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22068 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
22071 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22072 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22073 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22074 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22075 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22076 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22078 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22079 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22080 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22081 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
22084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22087 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
22089 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
22090 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
22092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22095 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22096 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22099 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
22100 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22102 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22103 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22104 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22105 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22106 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22107 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22110 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
22111 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
22112 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22113 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
22114 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
22115 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22117 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22118 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22119 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22120 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22121 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22122 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22123 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22124 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22125 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22126 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22127 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22128 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22129 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
22130 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
22131 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22132 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22133 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22134 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22135 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22136 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22137 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22138 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22139 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22140 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22143 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
22144 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
22147 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22148 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22149 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
22150 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
22151 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22152 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22153 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22154 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
22155 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22156 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
22158 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22159 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22160 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22161 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22162 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22163 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22164 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22165 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22166 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22167 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22168 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22169 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22170 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22172 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22173 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22174 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22175 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22176 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22177 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22178 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22179 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22180 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22181 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22182 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22183 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22185 /* SSE4.1 and SSE5 */
22186 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22187 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22188 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22189 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22191 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22192 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22193 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22196 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22197 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22198 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22199 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22200 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22203 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22204 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22205 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22206 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22209 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22210 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22212 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22213 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22214 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22215 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22218 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22221 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22222 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22225 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22226 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22229 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22235 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22236 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22237 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22238 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22239 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22240 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22241 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22242 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22243 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22244 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22245 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22246 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
22250 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
22256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
22257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
22267 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
22268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
22269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
22270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
22271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
22272 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22273 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
22274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22276 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22277 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22292 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22294 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22296 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22298 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22300 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22301 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22303 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22305 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22309 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22310 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
22332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22336 enum multi_arg_type {
22346 MULTI_ARG_3_PERMPS,
22347 MULTI_ARG_3_PERMPD,
22354 MULTI_ARG_2_DI_IMM,
22355 MULTI_ARG_2_SI_IMM,
22356 MULTI_ARG_2_HI_IMM,
22357 MULTI_ARG_2_QI_IMM,
22358 MULTI_ARG_2_SF_CMP,
22359 MULTI_ARG_2_DF_CMP,
22360 MULTI_ARG_2_DI_CMP,
22361 MULTI_ARG_2_SI_CMP,
22362 MULTI_ARG_2_HI_CMP,
22363 MULTI_ARG_2_QI_CMP,
22386 static const struct builtin_description bdesc_multi_arg[] =
22388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22393 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22401 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22412 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22426 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22444 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22453 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22477 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22494 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22496 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22498 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22499 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22500 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22502 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22505 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22507 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22508 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22511 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22517 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22518 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22520 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22521 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22522 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22523 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22524 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22525 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22526 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22527 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22528 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22529 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22530 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22532 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22533 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22534 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22535 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22536 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22537 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22538 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22540 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22541 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22542 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22543 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22544 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22545 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22546 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22548 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22549 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22550 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22551 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22552 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22553 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22554 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22556 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22557 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22558 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22559 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22560 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22561 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22562 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22564 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22565 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22566 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22567 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22568 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22569 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22570 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22572 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22573 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22574 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22575 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22576 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22577 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22578 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22580 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22581 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22582 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22583 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22584 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22585 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22586 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22588 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22589 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22590 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22591 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22592 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22593 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22594 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22596 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22597 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22598 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22599 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22600 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22601 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22602 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22603 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22605 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22606 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22607 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22608 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22609 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22610 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22611 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22612 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22614 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22615 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22616 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22617 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22618 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22619 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22620 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22621 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22624 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22625 in the current target ISA to allow the user to compile particular modules
22626 with different target specific options that differ from the command line
22629 ix86_init_mmx_sse_builtins (void)
22631 const struct builtin_description * d;
22634 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22635 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22636 tree V1DI_type_node
22637 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22638 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22639 tree V2DI_type_node
22640 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22641 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22642 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22643 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22644 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22645 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22646 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22648 tree pchar_type_node = build_pointer_type (char_type_node);
22649 tree pcchar_type_node
22650 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22651 tree pfloat_type_node = build_pointer_type (float_type_node);
22652 tree pcfloat_type_node
22653 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22654 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22655 tree pcv2sf_type_node
22656 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22657 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22658 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22661 tree int_ftype_v4sf_v4sf
22662 = build_function_type_list (integer_type_node,
22663 V4SF_type_node, V4SF_type_node, NULL_TREE);
22664 tree v4si_ftype_v4sf_v4sf
22665 = build_function_type_list (V4SI_type_node,
22666 V4SF_type_node, V4SF_type_node, NULL_TREE);
22667 /* MMX/SSE/integer conversions. */
22668 tree int_ftype_v4sf
22669 = build_function_type_list (integer_type_node,
22670 V4SF_type_node, NULL_TREE);
22671 tree int64_ftype_v4sf
22672 = build_function_type_list (long_long_integer_type_node,
22673 V4SF_type_node, NULL_TREE);
22674 tree int_ftype_v8qi
22675 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22676 tree v4sf_ftype_v4sf_int
22677 = build_function_type_list (V4SF_type_node,
22678 V4SF_type_node, integer_type_node, NULL_TREE);
22679 tree v4sf_ftype_v4sf_int64
22680 = build_function_type_list (V4SF_type_node,
22681 V4SF_type_node, long_long_integer_type_node,
22683 tree v4sf_ftype_v4sf_v2si
22684 = build_function_type_list (V4SF_type_node,
22685 V4SF_type_node, V2SI_type_node, NULL_TREE);
22687 /* Miscellaneous. */
22688 tree v8qi_ftype_v4hi_v4hi
22689 = build_function_type_list (V8QI_type_node,
22690 V4HI_type_node, V4HI_type_node, NULL_TREE);
22691 tree v4hi_ftype_v2si_v2si
22692 = build_function_type_list (V4HI_type_node,
22693 V2SI_type_node, V2SI_type_node, NULL_TREE);
22694 tree v4sf_ftype_v4sf_v4sf_int
22695 = build_function_type_list (V4SF_type_node,
22696 V4SF_type_node, V4SF_type_node,
22697 integer_type_node, NULL_TREE);
22698 tree v2si_ftype_v4hi_v4hi
22699 = build_function_type_list (V2SI_type_node,
22700 V4HI_type_node, V4HI_type_node, NULL_TREE);
22701 tree v4hi_ftype_v4hi_int
22702 = build_function_type_list (V4HI_type_node,
22703 V4HI_type_node, integer_type_node, NULL_TREE);
22704 tree v2si_ftype_v2si_int
22705 = build_function_type_list (V2SI_type_node,
22706 V2SI_type_node, integer_type_node, NULL_TREE);
22707 tree v1di_ftype_v1di_int
22708 = build_function_type_list (V1DI_type_node,
22709 V1DI_type_node, integer_type_node, NULL_TREE);
22711 tree void_ftype_void
22712 = build_function_type (void_type_node, void_list_node);
22713 tree void_ftype_unsigned
22714 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22715 tree void_ftype_unsigned_unsigned
22716 = build_function_type_list (void_type_node, unsigned_type_node,
22717 unsigned_type_node, NULL_TREE);
22718 tree void_ftype_pcvoid_unsigned_unsigned
22719 = build_function_type_list (void_type_node, const_ptr_type_node,
22720 unsigned_type_node, unsigned_type_node,
22722 tree unsigned_ftype_void
22723 = build_function_type (unsigned_type_node, void_list_node);
22724 tree v2si_ftype_v4sf
22725 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22726 /* Loads/stores. */
22727 tree void_ftype_v8qi_v8qi_pchar
22728 = build_function_type_list (void_type_node,
22729 V8QI_type_node, V8QI_type_node,
22730 pchar_type_node, NULL_TREE);
22731 tree v4sf_ftype_pcfloat
22732 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22733 tree v4sf_ftype_v4sf_pcv2sf
22734 = build_function_type_list (V4SF_type_node,
22735 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22736 tree void_ftype_pv2sf_v4sf
22737 = build_function_type_list (void_type_node,
22738 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22739 tree void_ftype_pfloat_v4sf
22740 = build_function_type_list (void_type_node,
22741 pfloat_type_node, V4SF_type_node, NULL_TREE);
22742 tree void_ftype_pdi_di
22743 = build_function_type_list (void_type_node,
22744 pdi_type_node, long_long_unsigned_type_node,
22746 tree void_ftype_pv2di_v2di
22747 = build_function_type_list (void_type_node,
22748 pv2di_type_node, V2DI_type_node, NULL_TREE);
22749 /* Normal vector unops. */
22750 tree v4sf_ftype_v4sf
22751 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22752 tree v16qi_ftype_v16qi
22753 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22754 tree v8hi_ftype_v8hi
22755 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22756 tree v4si_ftype_v4si
22757 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22758 tree v8qi_ftype_v8qi
22759 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22760 tree v4hi_ftype_v4hi
22761 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22763 /* Normal vector binops. */
22764 tree v4sf_ftype_v4sf_v4sf
22765 = build_function_type_list (V4SF_type_node,
22766 V4SF_type_node, V4SF_type_node, NULL_TREE);
22767 tree v8qi_ftype_v8qi_v8qi
22768 = build_function_type_list (V8QI_type_node,
22769 V8QI_type_node, V8QI_type_node, NULL_TREE);
22770 tree v4hi_ftype_v4hi_v4hi
22771 = build_function_type_list (V4HI_type_node,
22772 V4HI_type_node, V4HI_type_node, NULL_TREE);
22773 tree v2si_ftype_v2si_v2si
22774 = build_function_type_list (V2SI_type_node,
22775 V2SI_type_node, V2SI_type_node, NULL_TREE);
22776 tree v1di_ftype_v1di_v1di
22777 = build_function_type_list (V1DI_type_node,
22778 V1DI_type_node, V1DI_type_node, NULL_TREE);
22779 tree v1di_ftype_v1di_v1di_int
22780 = build_function_type_list (V1DI_type_node,
22781 V1DI_type_node, V1DI_type_node,
22782 integer_type_node, NULL_TREE);
22783 tree v2si_ftype_v2sf
22784 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22785 tree v2sf_ftype_v2si
22786 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22787 tree v2si_ftype_v2si
22788 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22789 tree v2sf_ftype_v2sf
22790 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22791 tree v2sf_ftype_v2sf_v2sf
22792 = build_function_type_list (V2SF_type_node,
22793 V2SF_type_node, V2SF_type_node, NULL_TREE);
22794 tree v2si_ftype_v2sf_v2sf
22795 = build_function_type_list (V2SI_type_node,
22796 V2SF_type_node, V2SF_type_node, NULL_TREE);
22797 tree pint_type_node = build_pointer_type (integer_type_node);
22798 tree pdouble_type_node = build_pointer_type (double_type_node);
22799 tree pcdouble_type_node = build_pointer_type (
22800 build_type_variant (double_type_node, 1, 0));
22801 tree int_ftype_v2df_v2df
22802 = build_function_type_list (integer_type_node,
22803 V2DF_type_node, V2DF_type_node, NULL_TREE);
22805 tree void_ftype_pcvoid
22806 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22807 tree v4sf_ftype_v4si
22808 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22809 tree v4si_ftype_v4sf
22810 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22811 tree v2df_ftype_v4si
22812 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22813 tree v4si_ftype_v2df
22814 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22815 tree v4si_ftype_v2df_v2df
22816 = build_function_type_list (V4SI_type_node,
22817 V2DF_type_node, V2DF_type_node, NULL_TREE);
22818 tree v2si_ftype_v2df
22819 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22820 tree v4sf_ftype_v2df
22821 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22822 tree v2df_ftype_v2si
22823 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22824 tree v2df_ftype_v4sf
22825 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22826 tree int_ftype_v2df
22827 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22828 tree int64_ftype_v2df
22829 = build_function_type_list (long_long_integer_type_node,
22830 V2DF_type_node, NULL_TREE);
22831 tree v2df_ftype_v2df_int
22832 = build_function_type_list (V2DF_type_node,
22833 V2DF_type_node, integer_type_node, NULL_TREE);
22834 tree v2df_ftype_v2df_int64
22835 = build_function_type_list (V2DF_type_node,
22836 V2DF_type_node, long_long_integer_type_node,
22838 tree v4sf_ftype_v4sf_v2df
22839 = build_function_type_list (V4SF_type_node,
22840 V4SF_type_node, V2DF_type_node, NULL_TREE);
22841 tree v2df_ftype_v2df_v4sf
22842 = build_function_type_list (V2DF_type_node,
22843 V2DF_type_node, V4SF_type_node, NULL_TREE);
22844 tree v2df_ftype_v2df_v2df_int
22845 = build_function_type_list (V2DF_type_node,
22846 V2DF_type_node, V2DF_type_node,
22849 tree v2df_ftype_v2df_pcdouble
22850 = build_function_type_list (V2DF_type_node,
22851 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22852 tree void_ftype_pdouble_v2df
22853 = build_function_type_list (void_type_node,
22854 pdouble_type_node, V2DF_type_node, NULL_TREE);
22855 tree void_ftype_pint_int
22856 = build_function_type_list (void_type_node,
22857 pint_type_node, integer_type_node, NULL_TREE);
22858 tree void_ftype_v16qi_v16qi_pchar
22859 = build_function_type_list (void_type_node,
22860 V16QI_type_node, V16QI_type_node,
22861 pchar_type_node, NULL_TREE);
22862 tree v2df_ftype_pcdouble
22863 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22864 tree v2df_ftype_v2df_v2df
22865 = build_function_type_list (V2DF_type_node,
22866 V2DF_type_node, V2DF_type_node, NULL_TREE);
22867 tree v16qi_ftype_v16qi_v16qi
22868 = build_function_type_list (V16QI_type_node,
22869 V16QI_type_node, V16QI_type_node, NULL_TREE);
22870 tree v8hi_ftype_v8hi_v8hi
22871 = build_function_type_list (V8HI_type_node,
22872 V8HI_type_node, V8HI_type_node, NULL_TREE);
22873 tree v4si_ftype_v4si_v4si
22874 = build_function_type_list (V4SI_type_node,
22875 V4SI_type_node, V4SI_type_node, NULL_TREE);
22876 tree v2di_ftype_v2di_v2di
22877 = build_function_type_list (V2DI_type_node,
22878 V2DI_type_node, V2DI_type_node, NULL_TREE);
22879 tree v2di_ftype_v2df_v2df
22880 = build_function_type_list (V2DI_type_node,
22881 V2DF_type_node, V2DF_type_node, NULL_TREE);
22882 tree v2df_ftype_v2df
22883 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22884 tree v2di_ftype_v2di_int
22885 = build_function_type_list (V2DI_type_node,
22886 V2DI_type_node, integer_type_node, NULL_TREE);
22887 tree v2di_ftype_v2di_v2di_int
22888 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22889 V2DI_type_node, integer_type_node, NULL_TREE);
22890 tree v4si_ftype_v4si_int
22891 = build_function_type_list (V4SI_type_node,
22892 V4SI_type_node, integer_type_node, NULL_TREE);
22893 tree v8hi_ftype_v8hi_int
22894 = build_function_type_list (V8HI_type_node,
22895 V8HI_type_node, integer_type_node, NULL_TREE);
22896 tree v4si_ftype_v8hi_v8hi
22897 = build_function_type_list (V4SI_type_node,
22898 V8HI_type_node, V8HI_type_node, NULL_TREE);
22899 tree v1di_ftype_v8qi_v8qi
22900 = build_function_type_list (V1DI_type_node,
22901 V8QI_type_node, V8QI_type_node, NULL_TREE);
22902 tree v1di_ftype_v2si_v2si
22903 = build_function_type_list (V1DI_type_node,
22904 V2SI_type_node, V2SI_type_node, NULL_TREE);
22905 tree v2di_ftype_v16qi_v16qi
22906 = build_function_type_list (V2DI_type_node,
22907 V16QI_type_node, V16QI_type_node, NULL_TREE);
22908 tree v2di_ftype_v4si_v4si
22909 = build_function_type_list (V2DI_type_node,
22910 V4SI_type_node, V4SI_type_node, NULL_TREE);
22911 tree int_ftype_v16qi
22912 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22913 tree v16qi_ftype_pcchar
22914 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22915 tree void_ftype_pchar_v16qi
22916 = build_function_type_list (void_type_node,
22917 pchar_type_node, V16QI_type_node, NULL_TREE);
22919 tree v2di_ftype_v2di_unsigned_unsigned
22920 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22921 unsigned_type_node, unsigned_type_node,
22923 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22924 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22925 unsigned_type_node, unsigned_type_node,
22927 tree v2di_ftype_v2di_v16qi
22928 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22930 tree v2df_ftype_v2df_v2df_v2df
22931 = build_function_type_list (V2DF_type_node,
22932 V2DF_type_node, V2DF_type_node,
22933 V2DF_type_node, NULL_TREE);
22934 tree v4sf_ftype_v4sf_v4sf_v4sf
22935 = build_function_type_list (V4SF_type_node,
22936 V4SF_type_node, V4SF_type_node,
22937 V4SF_type_node, NULL_TREE);
22938 tree v8hi_ftype_v16qi
22939 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22941 tree v4si_ftype_v16qi
22942 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22944 tree v2di_ftype_v16qi
22945 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22947 tree v4si_ftype_v8hi
22948 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22950 tree v2di_ftype_v8hi
22951 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22953 tree v2di_ftype_v4si
22954 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22956 tree v2di_ftype_pv2di
22957 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22959 tree v16qi_ftype_v16qi_v16qi_int
22960 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22961 V16QI_type_node, integer_type_node,
22963 tree v16qi_ftype_v16qi_v16qi_v16qi
22964 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22965 V16QI_type_node, V16QI_type_node,
22967 tree v8hi_ftype_v8hi_v8hi_int
22968 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22969 V8HI_type_node, integer_type_node,
22971 tree v4si_ftype_v4si_v4si_int
22972 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22973 V4SI_type_node, integer_type_node,
22975 tree int_ftype_v2di_v2di
22976 = build_function_type_list (integer_type_node,
22977 V2DI_type_node, V2DI_type_node,
22979 tree int_ftype_v16qi_int_v16qi_int_int
22980 = build_function_type_list (integer_type_node,
22987 tree v16qi_ftype_v16qi_int_v16qi_int_int
22988 = build_function_type_list (V16QI_type_node,
22995 tree int_ftype_v16qi_v16qi_int
22996 = build_function_type_list (integer_type_node,
23002 /* SSE5 instructions */
23003 tree v2di_ftype_v2di_v2di_v2di
23004 = build_function_type_list (V2DI_type_node,
23010 tree v4si_ftype_v4si_v4si_v4si
23011 = build_function_type_list (V4SI_type_node,
23017 tree v4si_ftype_v4si_v4si_v2di
23018 = build_function_type_list (V4SI_type_node,
23024 tree v8hi_ftype_v8hi_v8hi_v8hi
23025 = build_function_type_list (V8HI_type_node,
23031 tree v8hi_ftype_v8hi_v8hi_v4si
23032 = build_function_type_list (V8HI_type_node,
23038 tree v2df_ftype_v2df_v2df_v16qi
23039 = build_function_type_list (V2DF_type_node,
23045 tree v4sf_ftype_v4sf_v4sf_v16qi
23046 = build_function_type_list (V4SF_type_node,
23052 tree v2di_ftype_v2di_si
23053 = build_function_type_list (V2DI_type_node,
23058 tree v4si_ftype_v4si_si
23059 = build_function_type_list (V4SI_type_node,
23064 tree v8hi_ftype_v8hi_si
23065 = build_function_type_list (V8HI_type_node,
23070 tree v16qi_ftype_v16qi_si
23071 = build_function_type_list (V16QI_type_node,
23075 tree v4sf_ftype_v4hi
23076 = build_function_type_list (V4SF_type_node,
23080 tree v4hi_ftype_v4sf
23081 = build_function_type_list (V4HI_type_node,
23085 tree v2di_ftype_v2di
23086 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
23088 tree v16qi_ftype_v8hi_v8hi
23089 = build_function_type_list (V16QI_type_node,
23090 V8HI_type_node, V8HI_type_node,
23092 tree v8hi_ftype_v4si_v4si
23093 = build_function_type_list (V8HI_type_node,
23094 V4SI_type_node, V4SI_type_node,
23096 tree v8hi_ftype_v16qi_v16qi
23097 = build_function_type_list (V8HI_type_node,
23098 V16QI_type_node, V16QI_type_node,
23100 tree v4hi_ftype_v8qi_v8qi
23101 = build_function_type_list (V4HI_type_node,
23102 V8QI_type_node, V8QI_type_node,
23104 tree unsigned_ftype_unsigned_uchar
23105 = build_function_type_list (unsigned_type_node,
23106 unsigned_type_node,
23107 unsigned_char_type_node,
23109 tree unsigned_ftype_unsigned_ushort
23110 = build_function_type_list (unsigned_type_node,
23111 unsigned_type_node,
23112 short_unsigned_type_node,
23114 tree unsigned_ftype_unsigned_unsigned
23115 = build_function_type_list (unsigned_type_node,
23116 unsigned_type_node,
23117 unsigned_type_node,
23119 tree uint64_ftype_uint64_uint64
23120 = build_function_type_list (long_long_unsigned_type_node,
23121 long_long_unsigned_type_node,
23122 long_long_unsigned_type_node,
23124 tree float_ftype_float
23125 = build_function_type_list (float_type_node,
23130 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
23132 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
23134 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
23136 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
23138 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
23140 tree v8sf_ftype_v8sf
23141 = build_function_type_list (V8SF_type_node,
23144 tree v8si_ftype_v8sf
23145 = build_function_type_list (V8SI_type_node,
23148 tree v8sf_ftype_v8si
23149 = build_function_type_list (V8SF_type_node,
23152 tree v4si_ftype_v4df
23153 = build_function_type_list (V4SI_type_node,
23156 tree v4df_ftype_v4df
23157 = build_function_type_list (V4DF_type_node,
23160 tree v4df_ftype_v4si
23161 = build_function_type_list (V4DF_type_node,
23164 tree v4df_ftype_v4sf
23165 = build_function_type_list (V4DF_type_node,
23168 tree v4sf_ftype_v4df
23169 = build_function_type_list (V4SF_type_node,
23172 tree v8sf_ftype_v8sf_v8sf
23173 = build_function_type_list (V8SF_type_node,
23174 V8SF_type_node, V8SF_type_node,
23176 tree v4df_ftype_v4df_v4df
23177 = build_function_type_list (V4DF_type_node,
23178 V4DF_type_node, V4DF_type_node,
23180 tree v8sf_ftype_v8sf_int
23181 = build_function_type_list (V8SF_type_node,
23182 V8SF_type_node, integer_type_node,
23184 tree v4si_ftype_v8si_int
23185 = build_function_type_list (V4SI_type_node,
23186 V8SI_type_node, integer_type_node,
23188 tree v4df_ftype_v4df_int
23189 = build_function_type_list (V4DF_type_node,
23190 V4DF_type_node, integer_type_node,
23192 tree v4sf_ftype_v8sf_int
23193 = build_function_type_list (V4SF_type_node,
23194 V8SF_type_node, integer_type_node,
23196 tree v2df_ftype_v4df_int
23197 = build_function_type_list (V2DF_type_node,
23198 V4DF_type_node, integer_type_node,
23200 tree v8sf_ftype_v8sf_v8sf_int
23201 = build_function_type_list (V8SF_type_node,
23202 V8SF_type_node, V8SF_type_node,
23205 tree v8sf_ftype_v8sf_v8sf_v8sf
23206 = build_function_type_list (V8SF_type_node,
23207 V8SF_type_node, V8SF_type_node,
23210 tree v4df_ftype_v4df_v4df_v4df
23211 = build_function_type_list (V4DF_type_node,
23212 V4DF_type_node, V4DF_type_node,
23215 tree v8si_ftype_v8si_v8si_int
23216 = build_function_type_list (V8SI_type_node,
23217 V8SI_type_node, V8SI_type_node,
23220 tree v4df_ftype_v4df_v4df_int
23221 = build_function_type_list (V4DF_type_node,
23222 V4DF_type_node, V4DF_type_node,
23225 tree v8sf_ftype_pcfloat
23226 = build_function_type_list (V8SF_type_node,
23229 tree v4df_ftype_pcdouble
23230 = build_function_type_list (V4DF_type_node,
23231 pcdouble_type_node,
23233 tree pcv4sf_type_node
23234 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
23235 tree pcv2df_type_node
23236 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
23237 tree v8sf_ftype_pcv4sf
23238 = build_function_type_list (V8SF_type_node,
23241 tree v4df_ftype_pcv2df
23242 = build_function_type_list (V4DF_type_node,
23245 tree v32qi_ftype_pcchar
23246 = build_function_type_list (V32QI_type_node,
23249 tree void_ftype_pchar_v32qi
23250 = build_function_type_list (void_type_node,
23251 pchar_type_node, V32QI_type_node,
23253 tree v8si_ftype_v8si_v4si_int
23254 = build_function_type_list (V8SI_type_node,
23255 V8SI_type_node, V4SI_type_node,
23258 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
23259 tree void_ftype_pv4di_v4di
23260 = build_function_type_list (void_type_node,
23261 pv4di_type_node, V4DI_type_node,
23263 tree v8sf_ftype_v8sf_v4sf_int
23264 = build_function_type_list (V8SF_type_node,
23265 V8SF_type_node, V4SF_type_node,
23268 tree v4df_ftype_v4df_v2df_int
23269 = build_function_type_list (V4DF_type_node,
23270 V4DF_type_node, V2DF_type_node,
23273 tree void_ftype_pfloat_v8sf
23274 = build_function_type_list (void_type_node,
23275 pfloat_type_node, V8SF_type_node,
23277 tree void_ftype_pdouble_v4df
23278 = build_function_type_list (void_type_node,
23279 pdouble_type_node, V4DF_type_node,
23281 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
23282 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
23283 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
23284 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
23285 tree pcv8sf_type_node
23286 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
23287 tree pcv4df_type_node
23288 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
23289 tree v8sf_ftype_pcv8sf_v8sf
23290 = build_function_type_list (V8SF_type_node,
23291 pcv8sf_type_node, V8SF_type_node,
23293 tree v4df_ftype_pcv4df_v4df
23294 = build_function_type_list (V4DF_type_node,
23295 pcv4df_type_node, V4DF_type_node,
23297 tree v4sf_ftype_pcv4sf_v4sf
23298 = build_function_type_list (V4SF_type_node,
23299 pcv4sf_type_node, V4SF_type_node,
23301 tree v2df_ftype_pcv2df_v2df
23302 = build_function_type_list (V2DF_type_node,
23303 pcv2df_type_node, V2DF_type_node,
23305 tree void_ftype_pv8sf_v8sf_v8sf
23306 = build_function_type_list (void_type_node,
23307 pv8sf_type_node, V8SF_type_node,
23310 tree void_ftype_pv4df_v4df_v4df
23311 = build_function_type_list (void_type_node,
23312 pv4df_type_node, V4DF_type_node,
23315 tree void_ftype_pv4sf_v4sf_v4sf
23316 = build_function_type_list (void_type_node,
23317 pv4sf_type_node, V4SF_type_node,
23320 tree void_ftype_pv2df_v2df_v2df
23321 = build_function_type_list (void_type_node,
23322 pv2df_type_node, V2DF_type_node,
23325 tree v4df_ftype_v2df
23326 = build_function_type_list (V4DF_type_node,
23329 tree v8sf_ftype_v4sf
23330 = build_function_type_list (V8SF_type_node,
23333 tree v8si_ftype_v4si
23334 = build_function_type_list (V8SI_type_node,
23337 tree v2df_ftype_v4df
23338 = build_function_type_list (V2DF_type_node,
23341 tree v4sf_ftype_v8sf
23342 = build_function_type_list (V4SF_type_node,
23345 tree v4si_ftype_v8si
23346 = build_function_type_list (V4SI_type_node,
23349 tree int_ftype_v4df
23350 = build_function_type_list (integer_type_node,
23353 tree int_ftype_v8sf
23354 = build_function_type_list (integer_type_node,
23357 tree int_ftype_v8sf_v8sf
23358 = build_function_type_list (integer_type_node,
23359 V8SF_type_node, V8SF_type_node,
23361 tree int_ftype_v4di_v4di
23362 = build_function_type_list (integer_type_node,
23363 V4DI_type_node, V4DI_type_node,
23365 tree int_ftype_v4df_v4df
23366 = build_function_type_list (integer_type_node,
23367 V4DF_type_node, V4DF_type_node,
23369 tree v8sf_ftype_v8sf_v8si
23370 = build_function_type_list (V8SF_type_node,
23371 V8SF_type_node, V8SI_type_node,
23373 tree v4df_ftype_v4df_v4di
23374 = build_function_type_list (V4DF_type_node,
23375 V4DF_type_node, V4DI_type_node,
23377 tree v4sf_ftype_v4sf_v4si
23378 = build_function_type_list (V4SF_type_node,
23379 V4SF_type_node, V4SI_type_node, NULL_TREE);
23380 tree v2df_ftype_v2df_v2di
23381 = build_function_type_list (V2DF_type_node,
23382 V2DF_type_node, V2DI_type_node, NULL_TREE);
23386 /* Add all special builtins with variable number of operands. */
23387 for (i = 0, d = bdesc_special_args;
23388 i < ARRAY_SIZE (bdesc_special_args);
23396 switch ((enum ix86_special_builtin_type) d->flag)
23398 case VOID_FTYPE_VOID:
23399 type = void_ftype_void;
23401 case V32QI_FTYPE_PCCHAR:
23402 type = v32qi_ftype_pcchar;
23404 case V16QI_FTYPE_PCCHAR:
23405 type = v16qi_ftype_pcchar;
23407 case V8SF_FTYPE_PCV4SF:
23408 type = v8sf_ftype_pcv4sf;
23410 case V8SF_FTYPE_PCFLOAT:
23411 type = v8sf_ftype_pcfloat;
23413 case V4DF_FTYPE_PCV2DF:
23414 type = v4df_ftype_pcv2df;
23416 case V4DF_FTYPE_PCDOUBLE:
23417 type = v4df_ftype_pcdouble;
23419 case V4SF_FTYPE_PCFLOAT:
23420 type = v4sf_ftype_pcfloat;
23422 case V2DI_FTYPE_PV2DI:
23423 type = v2di_ftype_pv2di;
23425 case V2DF_FTYPE_PCDOUBLE:
23426 type = v2df_ftype_pcdouble;
23428 case V8SF_FTYPE_PCV8SF_V8SF:
23429 type = v8sf_ftype_pcv8sf_v8sf;
23431 case V4DF_FTYPE_PCV4DF_V4DF:
23432 type = v4df_ftype_pcv4df_v4df;
23434 case V4SF_FTYPE_V4SF_PCV2SF:
23435 type = v4sf_ftype_v4sf_pcv2sf;
23437 case V4SF_FTYPE_PCV4SF_V4SF:
23438 type = v4sf_ftype_pcv4sf_v4sf;
23440 case V2DF_FTYPE_V2DF_PCDOUBLE:
23441 type = v2df_ftype_v2df_pcdouble;
23443 case V2DF_FTYPE_PCV2DF_V2DF:
23444 type = v2df_ftype_pcv2df_v2df;
23446 case VOID_FTYPE_PV2SF_V4SF:
23447 type = void_ftype_pv2sf_v4sf;
23449 case VOID_FTYPE_PV4DI_V4DI:
23450 type = void_ftype_pv4di_v4di;
23452 case VOID_FTYPE_PV2DI_V2DI:
23453 type = void_ftype_pv2di_v2di;
23455 case VOID_FTYPE_PCHAR_V32QI:
23456 type = void_ftype_pchar_v32qi;
23458 case VOID_FTYPE_PCHAR_V16QI:
23459 type = void_ftype_pchar_v16qi;
23461 case VOID_FTYPE_PFLOAT_V8SF:
23462 type = void_ftype_pfloat_v8sf;
23464 case VOID_FTYPE_PFLOAT_V4SF:
23465 type = void_ftype_pfloat_v4sf;
23467 case VOID_FTYPE_PDOUBLE_V4DF:
23468 type = void_ftype_pdouble_v4df;
23470 case VOID_FTYPE_PDOUBLE_V2DF:
23471 type = void_ftype_pdouble_v2df;
23473 case VOID_FTYPE_PDI_DI:
23474 type = void_ftype_pdi_di;
23476 case VOID_FTYPE_PINT_INT:
23477 type = void_ftype_pint_int;
23479 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23480 type = void_ftype_pv8sf_v8sf_v8sf;
23482 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23483 type = void_ftype_pv4df_v4df_v4df;
23485 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23486 type = void_ftype_pv4sf_v4sf_v4sf;
23488 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23489 type = void_ftype_pv2df_v2df_v2df;
23492 gcc_unreachable ();
23495 def_builtin (d->mask, d->name, type, d->code);
23498 /* Add all builtins with variable number of operands. */
23499 for (i = 0, d = bdesc_args;
23500 i < ARRAY_SIZE (bdesc_args);
23508 switch ((enum ix86_builtin_type) d->flag)
23510 case FLOAT_FTYPE_FLOAT:
23511 type = float_ftype_float;
23513 case INT_FTYPE_V8SF_V8SF_PTEST:
23514 type = int_ftype_v8sf_v8sf;
23516 case INT_FTYPE_V4DI_V4DI_PTEST:
23517 type = int_ftype_v4di_v4di;
23519 case INT_FTYPE_V4DF_V4DF_PTEST:
23520 type = int_ftype_v4df_v4df;
23522 case INT_FTYPE_V4SF_V4SF_PTEST:
23523 type = int_ftype_v4sf_v4sf;
23525 case INT_FTYPE_V2DI_V2DI_PTEST:
23526 type = int_ftype_v2di_v2di;
23528 case INT_FTYPE_V2DF_V2DF_PTEST:
23529 type = int_ftype_v2df_v2df;
23531 case INT64_FTYPE_V4SF:
23532 type = int64_ftype_v4sf;
23534 case INT64_FTYPE_V2DF:
23535 type = int64_ftype_v2df;
23537 case INT_FTYPE_V16QI:
23538 type = int_ftype_v16qi;
23540 case INT_FTYPE_V8QI:
23541 type = int_ftype_v8qi;
23543 case INT_FTYPE_V8SF:
23544 type = int_ftype_v8sf;
23546 case INT_FTYPE_V4DF:
23547 type = int_ftype_v4df;
23549 case INT_FTYPE_V4SF:
23550 type = int_ftype_v4sf;
23552 case INT_FTYPE_V2DF:
23553 type = int_ftype_v2df;
23555 case V16QI_FTYPE_V16QI:
23556 type = v16qi_ftype_v16qi;
23558 case V8SI_FTYPE_V8SF:
23559 type = v8si_ftype_v8sf;
23561 case V8SI_FTYPE_V4SI:
23562 type = v8si_ftype_v4si;
23564 case V8HI_FTYPE_V8HI:
23565 type = v8hi_ftype_v8hi;
23567 case V8HI_FTYPE_V16QI:
23568 type = v8hi_ftype_v16qi;
23570 case V8QI_FTYPE_V8QI:
23571 type = v8qi_ftype_v8qi;
23573 case V8SF_FTYPE_V8SF:
23574 type = v8sf_ftype_v8sf;
23576 case V8SF_FTYPE_V8SI:
23577 type = v8sf_ftype_v8si;
23579 case V8SF_FTYPE_V4SF:
23580 type = v8sf_ftype_v4sf;
23582 case V4SI_FTYPE_V4DF:
23583 type = v4si_ftype_v4df;
23585 case V4SI_FTYPE_V4SI:
23586 type = v4si_ftype_v4si;
23588 case V4SI_FTYPE_V16QI:
23589 type = v4si_ftype_v16qi;
23591 case V4SI_FTYPE_V8SI:
23592 type = v4si_ftype_v8si;
23594 case V4SI_FTYPE_V8HI:
23595 type = v4si_ftype_v8hi;
23597 case V4SI_FTYPE_V4SF:
23598 type = v4si_ftype_v4sf;
23600 case V4SI_FTYPE_V2DF:
23601 type = v4si_ftype_v2df;
23603 case V4HI_FTYPE_V4HI:
23604 type = v4hi_ftype_v4hi;
23606 case V4DF_FTYPE_V4DF:
23607 type = v4df_ftype_v4df;
23609 case V4DF_FTYPE_V4SI:
23610 type = v4df_ftype_v4si;
23612 case V4DF_FTYPE_V4SF:
23613 type = v4df_ftype_v4sf;
23615 case V4DF_FTYPE_V2DF:
23616 type = v4df_ftype_v2df;
23618 case V4SF_FTYPE_V4SF:
23619 case V4SF_FTYPE_V4SF_VEC_MERGE:
23620 type = v4sf_ftype_v4sf;
23622 case V4SF_FTYPE_V8SF:
23623 type = v4sf_ftype_v8sf;
23625 case V4SF_FTYPE_V4SI:
23626 type = v4sf_ftype_v4si;
23628 case V4SF_FTYPE_V4DF:
23629 type = v4sf_ftype_v4df;
23631 case V4SF_FTYPE_V2DF:
23632 type = v4sf_ftype_v2df;
23634 case V2DI_FTYPE_V2DI:
23635 type = v2di_ftype_v2di;
23637 case V2DI_FTYPE_V16QI:
23638 type = v2di_ftype_v16qi;
23640 case V2DI_FTYPE_V8HI:
23641 type = v2di_ftype_v8hi;
23643 case V2DI_FTYPE_V4SI:
23644 type = v2di_ftype_v4si;
23646 case V2SI_FTYPE_V2SI:
23647 type = v2si_ftype_v2si;
23649 case V2SI_FTYPE_V4SF:
23650 type = v2si_ftype_v4sf;
23652 case V2SI_FTYPE_V2DF:
23653 type = v2si_ftype_v2df;
23655 case V2SI_FTYPE_V2SF:
23656 type = v2si_ftype_v2sf;
23658 case V2DF_FTYPE_V4DF:
23659 type = v2df_ftype_v4df;
23661 case V2DF_FTYPE_V4SF:
23662 type = v2df_ftype_v4sf;
23664 case V2DF_FTYPE_V2DF:
23665 case V2DF_FTYPE_V2DF_VEC_MERGE:
23666 type = v2df_ftype_v2df;
23668 case V2DF_FTYPE_V2SI:
23669 type = v2df_ftype_v2si;
23671 case V2DF_FTYPE_V4SI:
23672 type = v2df_ftype_v4si;
23674 case V2SF_FTYPE_V2SF:
23675 type = v2sf_ftype_v2sf;
23677 case V2SF_FTYPE_V2SI:
23678 type = v2sf_ftype_v2si;
23680 case V16QI_FTYPE_V16QI_V16QI:
23681 type = v16qi_ftype_v16qi_v16qi;
23683 case V16QI_FTYPE_V8HI_V8HI:
23684 type = v16qi_ftype_v8hi_v8hi;
23686 case V8QI_FTYPE_V8QI_V8QI:
23687 type = v8qi_ftype_v8qi_v8qi;
23689 case V8QI_FTYPE_V4HI_V4HI:
23690 type = v8qi_ftype_v4hi_v4hi;
23692 case V8HI_FTYPE_V8HI_V8HI:
23693 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23694 type = v8hi_ftype_v8hi_v8hi;
23696 case V8HI_FTYPE_V16QI_V16QI:
23697 type = v8hi_ftype_v16qi_v16qi;
23699 case V8HI_FTYPE_V4SI_V4SI:
23700 type = v8hi_ftype_v4si_v4si;
23702 case V8HI_FTYPE_V8HI_SI_COUNT:
23703 type = v8hi_ftype_v8hi_int;
23705 case V8SF_FTYPE_V8SF_V8SF:
23706 type = v8sf_ftype_v8sf_v8sf;
23708 case V8SF_FTYPE_V8SF_V8SI:
23709 type = v8sf_ftype_v8sf_v8si;
23711 case V4SI_FTYPE_V4SI_V4SI:
23712 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23713 type = v4si_ftype_v4si_v4si;
23715 case V4SI_FTYPE_V8HI_V8HI:
23716 type = v4si_ftype_v8hi_v8hi;
23718 case V4SI_FTYPE_V4SF_V4SF:
23719 type = v4si_ftype_v4sf_v4sf;
23721 case V4SI_FTYPE_V2DF_V2DF:
23722 type = v4si_ftype_v2df_v2df;
23724 case V4SI_FTYPE_V4SI_SI_COUNT:
23725 type = v4si_ftype_v4si_int;
23727 case V4HI_FTYPE_V4HI_V4HI:
23728 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23729 type = v4hi_ftype_v4hi_v4hi;
23731 case V4HI_FTYPE_V8QI_V8QI:
23732 type = v4hi_ftype_v8qi_v8qi;
23734 case V4HI_FTYPE_V2SI_V2SI:
23735 type = v4hi_ftype_v2si_v2si;
23737 case V4HI_FTYPE_V4HI_SI_COUNT:
23738 type = v4hi_ftype_v4hi_int;
23740 case V4DF_FTYPE_V4DF_V4DF:
23741 type = v4df_ftype_v4df_v4df;
23743 case V4DF_FTYPE_V4DF_V4DI:
23744 type = v4df_ftype_v4df_v4di;
23746 case V4SF_FTYPE_V4SF_V4SF:
23747 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23748 type = v4sf_ftype_v4sf_v4sf;
23750 case V4SF_FTYPE_V4SF_V4SI:
23751 type = v4sf_ftype_v4sf_v4si;
23753 case V4SF_FTYPE_V4SF_V2SI:
23754 type = v4sf_ftype_v4sf_v2si;
23756 case V4SF_FTYPE_V4SF_V2DF:
23757 type = v4sf_ftype_v4sf_v2df;
23759 case V4SF_FTYPE_V4SF_DI:
23760 type = v4sf_ftype_v4sf_int64;
23762 case V4SF_FTYPE_V4SF_SI:
23763 type = v4sf_ftype_v4sf_int;
23765 case V2DI_FTYPE_V2DI_V2DI:
23766 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23767 type = v2di_ftype_v2di_v2di;
23769 case V2DI_FTYPE_V16QI_V16QI:
23770 type = v2di_ftype_v16qi_v16qi;
23772 case V2DI_FTYPE_V4SI_V4SI:
23773 type = v2di_ftype_v4si_v4si;
23775 case V2DI_FTYPE_V2DI_V16QI:
23776 type = v2di_ftype_v2di_v16qi;
23778 case V2DI_FTYPE_V2DF_V2DF:
23779 type = v2di_ftype_v2df_v2df;
23781 case V2DI_FTYPE_V2DI_SI_COUNT:
23782 type = v2di_ftype_v2di_int;
23784 case V2SI_FTYPE_V2SI_V2SI:
23785 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23786 type = v2si_ftype_v2si_v2si;
23788 case V2SI_FTYPE_V4HI_V4HI:
23789 type = v2si_ftype_v4hi_v4hi;
23791 case V2SI_FTYPE_V2SF_V2SF:
23792 type = v2si_ftype_v2sf_v2sf;
23794 case V2SI_FTYPE_V2SI_SI_COUNT:
23795 type = v2si_ftype_v2si_int;
23797 case V2DF_FTYPE_V2DF_V2DF:
23798 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23799 type = v2df_ftype_v2df_v2df;
23801 case V2DF_FTYPE_V2DF_V4SF:
23802 type = v2df_ftype_v2df_v4sf;
23804 case V2DF_FTYPE_V2DF_V2DI:
23805 type = v2df_ftype_v2df_v2di;
23807 case V2DF_FTYPE_V2DF_DI:
23808 type = v2df_ftype_v2df_int64;
23810 case V2DF_FTYPE_V2DF_SI:
23811 type = v2df_ftype_v2df_int;
23813 case V2SF_FTYPE_V2SF_V2SF:
23814 type = v2sf_ftype_v2sf_v2sf;
23816 case V1DI_FTYPE_V1DI_V1DI:
23817 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23818 type = v1di_ftype_v1di_v1di;
23820 case V1DI_FTYPE_V8QI_V8QI:
23821 type = v1di_ftype_v8qi_v8qi;
23823 case V1DI_FTYPE_V2SI_V2SI:
23824 type = v1di_ftype_v2si_v2si;
23826 case V1DI_FTYPE_V1DI_SI_COUNT:
23827 type = v1di_ftype_v1di_int;
23829 case UINT64_FTYPE_UINT64_UINT64:
23830 type = uint64_ftype_uint64_uint64;
23832 case UINT_FTYPE_UINT_UINT:
23833 type = unsigned_ftype_unsigned_unsigned;
23835 case UINT_FTYPE_UINT_USHORT:
23836 type = unsigned_ftype_unsigned_ushort;
23838 case UINT_FTYPE_UINT_UCHAR:
23839 type = unsigned_ftype_unsigned_uchar;
23841 case V8HI_FTYPE_V8HI_INT:
23842 type = v8hi_ftype_v8hi_int;
23844 case V8SF_FTYPE_V8SF_INT:
23845 type = v8sf_ftype_v8sf_int;
23847 case V4SI_FTYPE_V4SI_INT:
23848 type = v4si_ftype_v4si_int;
23850 case V4SI_FTYPE_V8SI_INT:
23851 type = v4si_ftype_v8si_int;
23853 case V4HI_FTYPE_V4HI_INT:
23854 type = v4hi_ftype_v4hi_int;
23856 case V4DF_FTYPE_V4DF_INT:
23857 type = v4df_ftype_v4df_int;
23859 case V4SF_FTYPE_V4SF_INT:
23860 type = v4sf_ftype_v4sf_int;
23862 case V4SF_FTYPE_V8SF_INT:
23863 type = v4sf_ftype_v8sf_int;
23865 case V2DI_FTYPE_V2DI_INT:
23866 case V2DI2TI_FTYPE_V2DI_INT:
23867 type = v2di_ftype_v2di_int;
23869 case V2DF_FTYPE_V2DF_INT:
23870 type = v2df_ftype_v2df_int;
23872 case V2DF_FTYPE_V4DF_INT:
23873 type = v2df_ftype_v4df_int;
23875 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23876 type = v16qi_ftype_v16qi_v16qi_v16qi;
23878 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23879 type = v8sf_ftype_v8sf_v8sf_v8sf;
23881 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23882 type = v4df_ftype_v4df_v4df_v4df;
23884 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23885 type = v4sf_ftype_v4sf_v4sf_v4sf;
23887 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23888 type = v2df_ftype_v2df_v2df_v2df;
23890 case V16QI_FTYPE_V16QI_V16QI_INT:
23891 type = v16qi_ftype_v16qi_v16qi_int;
23893 case V8SI_FTYPE_V8SI_V8SI_INT:
23894 type = v8si_ftype_v8si_v8si_int;
23896 case V8SI_FTYPE_V8SI_V4SI_INT:
23897 type = v8si_ftype_v8si_v4si_int;
23899 case V8HI_FTYPE_V8HI_V8HI_INT:
23900 type = v8hi_ftype_v8hi_v8hi_int;
23902 case V8SF_FTYPE_V8SF_V8SF_INT:
23903 type = v8sf_ftype_v8sf_v8sf_int;
23905 case V8SF_FTYPE_V8SF_V4SF_INT:
23906 type = v8sf_ftype_v8sf_v4sf_int;
23908 case V4SI_FTYPE_V4SI_V4SI_INT:
23909 type = v4si_ftype_v4si_v4si_int;
23911 case V4DF_FTYPE_V4DF_V4DF_INT:
23912 type = v4df_ftype_v4df_v4df_int;
23914 case V4DF_FTYPE_V4DF_V2DF_INT:
23915 type = v4df_ftype_v4df_v2df_int;
23917 case V4SF_FTYPE_V4SF_V4SF_INT:
23918 type = v4sf_ftype_v4sf_v4sf_int;
23920 case V2DI_FTYPE_V2DI_V2DI_INT:
23921 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23922 type = v2di_ftype_v2di_v2di_int;
23924 case V2DF_FTYPE_V2DF_V2DF_INT:
23925 type = v2df_ftype_v2df_v2df_int;
23927 case V2DI_FTYPE_V2DI_UINT_UINT:
23928 type = v2di_ftype_v2di_unsigned_unsigned;
23930 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23931 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23933 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23934 type = v1di_ftype_v1di_v1di_int;
23937 gcc_unreachable ();
23940 def_builtin_const (d->mask, d->name, type, d->code);
23943 /* pcmpestr[im] insns. */
23944 for (i = 0, d = bdesc_pcmpestr;
23945 i < ARRAY_SIZE (bdesc_pcmpestr);
23948 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23949 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23951 ftype = int_ftype_v16qi_int_v16qi_int_int;
23952 def_builtin_const (d->mask, d->name, ftype, d->code);
23955 /* pcmpistr[im] insns. */
23956 for (i = 0, d = bdesc_pcmpistr;
23957 i < ARRAY_SIZE (bdesc_pcmpistr);
23960 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23961 ftype = v16qi_ftype_v16qi_v16qi_int;
23963 ftype = int_ftype_v16qi_v16qi_int;
23964 def_builtin_const (d->mask, d->name, ftype, d->code);
23967 /* comi/ucomi insns. */
23968 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23969 if (d->mask == OPTION_MASK_ISA_SSE2)
23970 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23972 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23975 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23976 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23978 /* SSE or 3DNow!A */
23979 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23982 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23984 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23985 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23988 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23989 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23992 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23993 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23994 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23995 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23996 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23997 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
24000 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
24003 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
24004 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
24006 /* Access to the vec_init patterns. */
24007 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
24008 integer_type_node, NULL_TREE);
24009 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
24011 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
24012 short_integer_type_node,
24013 short_integer_type_node,
24014 short_integer_type_node, NULL_TREE);
24015 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
24017 ftype = build_function_type_list (V8QI_type_node, char_type_node,
24018 char_type_node, char_type_node,
24019 char_type_node, char_type_node,
24020 char_type_node, char_type_node,
24021 char_type_node, NULL_TREE);
24022 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
24024 /* Access to the vec_extract patterns. */
24025 ftype = build_function_type_list (double_type_node, V2DF_type_node,
24026 integer_type_node, NULL_TREE);
24027 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
24029 ftype = build_function_type_list (long_long_integer_type_node,
24030 V2DI_type_node, integer_type_node,
24032 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
24034 ftype = build_function_type_list (float_type_node, V4SF_type_node,
24035 integer_type_node, NULL_TREE);
24036 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
24038 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
24039 integer_type_node, NULL_TREE);
24040 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
24042 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
24043 integer_type_node, NULL_TREE);
24044 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
24046 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
24047 integer_type_node, NULL_TREE);
24048 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
24050 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
24051 integer_type_node, NULL_TREE);
24052 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
24054 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
24055 integer_type_node, NULL_TREE);
24056 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
24058 /* Access to the vec_set patterns. */
24059 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
24061 integer_type_node, NULL_TREE);
24062 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
24064 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
24066 integer_type_node, NULL_TREE);
24067 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
24069 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
24071 integer_type_node, NULL_TREE);
24072 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
24074 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
24076 integer_type_node, NULL_TREE);
24077 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
24079 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
24081 integer_type_node, NULL_TREE);
24082 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
24084 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
24086 integer_type_node, NULL_TREE);
24087 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
24089 /* Add SSE5 multi-arg argument instructions */
24090 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24092 tree mtype = NULL_TREE;
24097 switch ((enum multi_arg_type)d->flag)
24099 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
24100 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
24101 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
24102 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
24103 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
24104 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
24105 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
24106 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
24107 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
24108 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
24109 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
24110 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
24111 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
24112 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
24113 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
24114 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
24115 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
24116 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
24117 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
24118 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
24119 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
24120 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
24121 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
24122 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
24123 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
24124 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
24125 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
24126 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
24127 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
24128 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
24129 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
24130 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
24131 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
24132 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
24133 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
24134 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
24135 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
24136 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
24137 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
24138 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
24139 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
24140 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
24141 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
24142 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
24143 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
24144 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
24145 case MULTI_ARG_UNKNOWN:
24147 gcc_unreachable ();
24151 def_builtin_const (d->mask, d->name, mtype, d->code);
24155 /* Internal method for ix86_init_builtins. */
24158 ix86_init_builtins_va_builtins_abi (void)
24160 tree ms_va_ref, sysv_va_ref;
24161 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24162 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24163 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24164 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24168 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24169 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24170 ms_va_ref = build_reference_type (ms_va_list_type_node);
24172 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24175 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24176 fnvoid_va_start_ms =
24177 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24178 fnvoid_va_end_sysv =
24179 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24180 fnvoid_va_start_sysv =
24181 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24183 fnvoid_va_copy_ms =
24184 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24186 fnvoid_va_copy_sysv =
24187 build_function_type_list (void_type_node, sysv_va_ref,
24188 sysv_va_ref, NULL_TREE);
24190 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24191 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24192 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24193 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24194 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24195 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24196 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24197 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24198 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24199 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24200 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24201 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24205 ix86_init_builtins (void)
24207 tree float128_type_node = make_node (REAL_TYPE);
24210 /* The __float80 type. */
24211 if (TYPE_MODE (long_double_type_node) == XFmode)
24212 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
24216 /* The __float80 type. */
24217 tree float80_type_node = make_node (REAL_TYPE);
24219 TYPE_PRECISION (float80_type_node) = 80;
24220 layout_type (float80_type_node);
24221 (*lang_hooks.types.register_builtin_type) (float80_type_node,
24225 /* The __float128 type. */
24226 TYPE_PRECISION (float128_type_node) = 128;
24227 layout_type (float128_type_node);
24228 (*lang_hooks.types.register_builtin_type) (float128_type_node,
24231 /* TFmode support builtins. */
24232 ftype = build_function_type (float128_type_node, void_list_node);
24233 decl = add_builtin_function ("__builtin_infq", ftype,
24234 IX86_BUILTIN_INFQ, BUILT_IN_MD,
24236 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
24238 decl = add_builtin_function ("__builtin_huge_valq", ftype,
24239 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
24241 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
24243 /* We will expand them to normal call if SSE2 isn't available since
24244 they are used by libgcc. */
24245 ftype = build_function_type_list (float128_type_node,
24246 float128_type_node,
24248 decl = add_builtin_function ("__builtin_fabsq", ftype,
24249 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
24250 "__fabstf2", NULL_TREE);
24251 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
24252 TREE_READONLY (decl) = 1;
24254 ftype = build_function_type_list (float128_type_node,
24255 float128_type_node,
24256 float128_type_node,
24258 decl = add_builtin_function ("__builtin_copysignq", ftype,
24259 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
24260 "__copysigntf3", NULL_TREE);
24261 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
24262 TREE_READONLY (decl) = 1;
24264 ix86_init_mmx_sse_builtins ();
24266 ix86_init_builtins_va_builtins_abi ();
24269 /* Errors in the source file can cause expand_expr to return const0_rtx
24270 where we expect a vector. To avoid crashing, use one of the vector
24271 clear instructions. */
24273 safe_vector_operand (rtx x, enum machine_mode mode)
24275 if (x == const0_rtx)
24276 x = CONST0_RTX (mode);
24280 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24283 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24286 tree arg0 = CALL_EXPR_ARG (exp, 0);
24287 tree arg1 = CALL_EXPR_ARG (exp, 1);
24288 rtx op0 = expand_normal (arg0);
24289 rtx op1 = expand_normal (arg1);
24290 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24291 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24292 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24294 if (VECTOR_MODE_P (mode0))
24295 op0 = safe_vector_operand (op0, mode0);
24296 if (VECTOR_MODE_P (mode1))
24297 op1 = safe_vector_operand (op1, mode1);
24299 if (optimize || !target
24300 || GET_MODE (target) != tmode
24301 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24302 target = gen_reg_rtx (tmode);
24304 if (GET_MODE (op1) == SImode && mode1 == TImode)
24306 rtx x = gen_reg_rtx (V4SImode);
24307 emit_insn (gen_sse2_loadd (x, op1));
24308 op1 = gen_lowpart (TImode, x);
24311 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24312 op0 = copy_to_mode_reg (mode0, op0);
24313 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24314 op1 = copy_to_mode_reg (mode1, op1);
24316 pat = GEN_FCN (icode) (target, op0, op1);
24325 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24328 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24329 enum multi_arg_type m_type,
24330 enum rtx_code sub_code)
24335 bool comparison_p = false;
24337 bool last_arg_constant = false;
24338 int num_memory = 0;
24341 enum machine_mode mode;
24344 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24348 case MULTI_ARG_3_SF:
24349 case MULTI_ARG_3_DF:
24350 case MULTI_ARG_3_DI:
24351 case MULTI_ARG_3_SI:
24352 case MULTI_ARG_3_SI_DI:
24353 case MULTI_ARG_3_HI:
24354 case MULTI_ARG_3_HI_SI:
24355 case MULTI_ARG_3_QI:
24356 case MULTI_ARG_3_PERMPS:
24357 case MULTI_ARG_3_PERMPD:
24361 case MULTI_ARG_2_SF:
24362 case MULTI_ARG_2_DF:
24363 case MULTI_ARG_2_DI:
24364 case MULTI_ARG_2_SI:
24365 case MULTI_ARG_2_HI:
24366 case MULTI_ARG_2_QI:
24370 case MULTI_ARG_2_DI_IMM:
24371 case MULTI_ARG_2_SI_IMM:
24372 case MULTI_ARG_2_HI_IMM:
24373 case MULTI_ARG_2_QI_IMM:
24375 last_arg_constant = true;
24378 case MULTI_ARG_1_SF:
24379 case MULTI_ARG_1_DF:
24380 case MULTI_ARG_1_DI:
24381 case MULTI_ARG_1_SI:
24382 case MULTI_ARG_1_HI:
24383 case MULTI_ARG_1_QI:
24384 case MULTI_ARG_1_SI_DI:
24385 case MULTI_ARG_1_HI_DI:
24386 case MULTI_ARG_1_HI_SI:
24387 case MULTI_ARG_1_QI_DI:
24388 case MULTI_ARG_1_QI_SI:
24389 case MULTI_ARG_1_QI_HI:
24390 case MULTI_ARG_1_PH2PS:
24391 case MULTI_ARG_1_PS2PH:
24395 case MULTI_ARG_2_SF_CMP:
24396 case MULTI_ARG_2_DF_CMP:
24397 case MULTI_ARG_2_DI_CMP:
24398 case MULTI_ARG_2_SI_CMP:
24399 case MULTI_ARG_2_HI_CMP:
24400 case MULTI_ARG_2_QI_CMP:
24402 comparison_p = true;
24405 case MULTI_ARG_2_SF_TF:
24406 case MULTI_ARG_2_DF_TF:
24407 case MULTI_ARG_2_DI_TF:
24408 case MULTI_ARG_2_SI_TF:
24409 case MULTI_ARG_2_HI_TF:
24410 case MULTI_ARG_2_QI_TF:
24415 case MULTI_ARG_UNKNOWN:
24417 gcc_unreachable ();
24420 if (optimize || !target
24421 || GET_MODE (target) != tmode
24422 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24423 target = gen_reg_rtx (tmode);
24425 gcc_assert (nargs <= 4);
24427 for (i = 0; i < nargs; i++)
24429 tree arg = CALL_EXPR_ARG (exp, i);
24430 rtx op = expand_normal (arg);
24431 int adjust = (comparison_p) ? 1 : 0;
24432 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24434 if (last_arg_constant && i == nargs-1)
24436 if (!CONST_INT_P (op))
24438 error ("last argument must be an immediate");
24439 return gen_reg_rtx (tmode);
24444 if (VECTOR_MODE_P (mode))
24445 op = safe_vector_operand (op, mode);
24447 /* If we aren't optimizing, only allow one memory operand to be
24449 if (memory_operand (op, mode))
24452 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24455 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24457 op = force_reg (mode, op);
24461 args[i].mode = mode;
24467 pat = GEN_FCN (icode) (target, args[0].op);
24472 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24473 GEN_INT ((int)sub_code));
24474 else if (! comparison_p)
24475 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24478 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24482 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24487 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24491 gcc_unreachable ();
24501 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24502 insns with vec_merge. */
24505 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24509 tree arg0 = CALL_EXPR_ARG (exp, 0);
24510 rtx op1, op0 = expand_normal (arg0);
24511 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24512 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24514 if (optimize || !target
24515 || GET_MODE (target) != tmode
24516 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24517 target = gen_reg_rtx (tmode);
24519 if (VECTOR_MODE_P (mode0))
24520 op0 = safe_vector_operand (op0, mode0);
24522 if ((optimize && !register_operand (op0, mode0))
24523 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24524 op0 = copy_to_mode_reg (mode0, op0);
24527 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24528 op1 = copy_to_mode_reg (mode0, op1);
24530 pat = GEN_FCN (icode) (target, op0, op1);
24537 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24540 ix86_expand_sse_compare (const struct builtin_description *d,
24541 tree exp, rtx target, bool swap)
24544 tree arg0 = CALL_EXPR_ARG (exp, 0);
24545 tree arg1 = CALL_EXPR_ARG (exp, 1);
24546 rtx op0 = expand_normal (arg0);
24547 rtx op1 = expand_normal (arg1);
24549 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24550 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24551 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24552 enum rtx_code comparison = d->comparison;
24554 if (VECTOR_MODE_P (mode0))
24555 op0 = safe_vector_operand (op0, mode0);
24556 if (VECTOR_MODE_P (mode1))
24557 op1 = safe_vector_operand (op1, mode1);
24559 /* Swap operands if we have a comparison that isn't available in
24563 rtx tmp = gen_reg_rtx (mode1);
24564 emit_move_insn (tmp, op1);
24569 if (optimize || !target
24570 || GET_MODE (target) != tmode
24571 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24572 target = gen_reg_rtx (tmode);
24574 if ((optimize && !register_operand (op0, mode0))
24575 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24576 op0 = copy_to_mode_reg (mode0, op0);
24577 if ((optimize && !register_operand (op1, mode1))
24578 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24579 op1 = copy_to_mode_reg (mode1, op1);
24581 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24582 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24589 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24592 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24596 tree arg0 = CALL_EXPR_ARG (exp, 0);
24597 tree arg1 = CALL_EXPR_ARG (exp, 1);
24598 rtx op0 = expand_normal (arg0);
24599 rtx op1 = expand_normal (arg1);
24600 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24601 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24602 enum rtx_code comparison = d->comparison;
24604 if (VECTOR_MODE_P (mode0))
24605 op0 = safe_vector_operand (op0, mode0);
24606 if (VECTOR_MODE_P (mode1))
24607 op1 = safe_vector_operand (op1, mode1);
24609 /* Swap operands if we have a comparison that isn't available in
24611 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24618 target = gen_reg_rtx (SImode);
24619 emit_move_insn (target, const0_rtx);
24620 target = gen_rtx_SUBREG (QImode, target, 0);
24622 if ((optimize && !register_operand (op0, mode0))
24623 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24624 op0 = copy_to_mode_reg (mode0, op0);
24625 if ((optimize && !register_operand (op1, mode1))
24626 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24627 op1 = copy_to_mode_reg (mode1, op1);
24629 pat = GEN_FCN (d->icode) (op0, op1);
24633 emit_insn (gen_rtx_SET (VOIDmode,
24634 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24635 gen_rtx_fmt_ee (comparison, QImode,
24639 return SUBREG_REG (target);
24642 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24645 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24649 tree arg0 = CALL_EXPR_ARG (exp, 0);
24650 tree arg1 = CALL_EXPR_ARG (exp, 1);
24651 rtx op0 = expand_normal (arg0);
24652 rtx op1 = expand_normal (arg1);
24653 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24654 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24655 enum rtx_code comparison = d->comparison;
24657 if (VECTOR_MODE_P (mode0))
24658 op0 = safe_vector_operand (op0, mode0);
24659 if (VECTOR_MODE_P (mode1))
24660 op1 = safe_vector_operand (op1, mode1);
24662 target = gen_reg_rtx (SImode);
24663 emit_move_insn (target, const0_rtx);
24664 target = gen_rtx_SUBREG (QImode, target, 0);
24666 if ((optimize && !register_operand (op0, mode0))
24667 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24668 op0 = copy_to_mode_reg (mode0, op0);
24669 if ((optimize && !register_operand (op1, mode1))
24670 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24671 op1 = copy_to_mode_reg (mode1, op1);
24673 pat = GEN_FCN (d->icode) (op0, op1);
24677 emit_insn (gen_rtx_SET (VOIDmode,
24678 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24679 gen_rtx_fmt_ee (comparison, QImode,
24683 return SUBREG_REG (target);
24686 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24689 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24690 tree exp, rtx target)
24693 tree arg0 = CALL_EXPR_ARG (exp, 0);
24694 tree arg1 = CALL_EXPR_ARG (exp, 1);
24695 tree arg2 = CALL_EXPR_ARG (exp, 2);
24696 tree arg3 = CALL_EXPR_ARG (exp, 3);
24697 tree arg4 = CALL_EXPR_ARG (exp, 4);
24698 rtx scratch0, scratch1;
24699 rtx op0 = expand_normal (arg0);
24700 rtx op1 = expand_normal (arg1);
24701 rtx op2 = expand_normal (arg2);
24702 rtx op3 = expand_normal (arg3);
24703 rtx op4 = expand_normal (arg4);
24704 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24706 tmode0 = insn_data[d->icode].operand[0].mode;
24707 tmode1 = insn_data[d->icode].operand[1].mode;
24708 modev2 = insn_data[d->icode].operand[2].mode;
24709 modei3 = insn_data[d->icode].operand[3].mode;
24710 modev4 = insn_data[d->icode].operand[4].mode;
24711 modei5 = insn_data[d->icode].operand[5].mode;
24712 modeimm = insn_data[d->icode].operand[6].mode;
24714 if (VECTOR_MODE_P (modev2))
24715 op0 = safe_vector_operand (op0, modev2);
24716 if (VECTOR_MODE_P (modev4))
24717 op2 = safe_vector_operand (op2, modev4);
24719 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24720 op0 = copy_to_mode_reg (modev2, op0);
24721 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24722 op1 = copy_to_mode_reg (modei3, op1);
24723 if ((optimize && !register_operand (op2, modev4))
24724 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24725 op2 = copy_to_mode_reg (modev4, op2);
24726 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24727 op3 = copy_to_mode_reg (modei5, op3);
24729 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24731 error ("the fifth argument must be a 8-bit immediate");
24735 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24737 if (optimize || !target
24738 || GET_MODE (target) != tmode0
24739 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24740 target = gen_reg_rtx (tmode0);
24742 scratch1 = gen_reg_rtx (tmode1);
24744 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24746 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24748 if (optimize || !target
24749 || GET_MODE (target) != tmode1
24750 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24751 target = gen_reg_rtx (tmode1);
24753 scratch0 = gen_reg_rtx (tmode0);
24755 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24759 gcc_assert (d->flag);
24761 scratch0 = gen_reg_rtx (tmode0);
24762 scratch1 = gen_reg_rtx (tmode1);
24764 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24774 target = gen_reg_rtx (SImode);
24775 emit_move_insn (target, const0_rtx);
24776 target = gen_rtx_SUBREG (QImode, target, 0);
24779 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24780 gen_rtx_fmt_ee (EQ, QImode,
24781 gen_rtx_REG ((enum machine_mode) d->flag,
24784 return SUBREG_REG (target);
24791 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24794 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24795 tree exp, rtx target)
24798 tree arg0 = CALL_EXPR_ARG (exp, 0);
24799 tree arg1 = CALL_EXPR_ARG (exp, 1);
24800 tree arg2 = CALL_EXPR_ARG (exp, 2);
24801 rtx scratch0, scratch1;
24802 rtx op0 = expand_normal (arg0);
24803 rtx op1 = expand_normal (arg1);
24804 rtx op2 = expand_normal (arg2);
24805 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24807 tmode0 = insn_data[d->icode].operand[0].mode;
24808 tmode1 = insn_data[d->icode].operand[1].mode;
24809 modev2 = insn_data[d->icode].operand[2].mode;
24810 modev3 = insn_data[d->icode].operand[3].mode;
24811 modeimm = insn_data[d->icode].operand[4].mode;
24813 if (VECTOR_MODE_P (modev2))
24814 op0 = safe_vector_operand (op0, modev2);
24815 if (VECTOR_MODE_P (modev3))
24816 op1 = safe_vector_operand (op1, modev3);
24818 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24819 op0 = copy_to_mode_reg (modev2, op0);
24820 if ((optimize && !register_operand (op1, modev3))
24821 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24822 op1 = copy_to_mode_reg (modev3, op1);
24824 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24826 error ("the third argument must be a 8-bit immediate");
24830 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24832 if (optimize || !target
24833 || GET_MODE (target) != tmode0
24834 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24835 target = gen_reg_rtx (tmode0);
24837 scratch1 = gen_reg_rtx (tmode1);
24839 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24841 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24843 if (optimize || !target
24844 || GET_MODE (target) != tmode1
24845 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24846 target = gen_reg_rtx (tmode1);
24848 scratch0 = gen_reg_rtx (tmode0);
24850 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24854 gcc_assert (d->flag);
24856 scratch0 = gen_reg_rtx (tmode0);
24857 scratch1 = gen_reg_rtx (tmode1);
24859 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24869 target = gen_reg_rtx (SImode);
24870 emit_move_insn (target, const0_rtx);
24871 target = gen_rtx_SUBREG (QImode, target, 0);
24874 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24875 gen_rtx_fmt_ee (EQ, QImode,
24876 gen_rtx_REG ((enum machine_mode) d->flag,
24879 return SUBREG_REG (target);
24885 /* Subroutine of ix86_expand_builtin to take care of insns with
24886 variable number of operands. */
24889 ix86_expand_args_builtin (const struct builtin_description *d,
24890 tree exp, rtx target)
24892 rtx pat, real_target;
24893 unsigned int i, nargs;
24894 unsigned int nargs_constant = 0;
24895 int num_memory = 0;
24899 enum machine_mode mode;
24901 bool last_arg_count = false;
24902 enum insn_code icode = d->icode;
24903 const struct insn_data *insn_p = &insn_data[icode];
24904 enum machine_mode tmode = insn_p->operand[0].mode;
24905 enum machine_mode rmode = VOIDmode;
24907 enum rtx_code comparison = d->comparison;
24909 switch ((enum ix86_builtin_type) d->flag)
24911 case INT_FTYPE_V8SF_V8SF_PTEST:
24912 case INT_FTYPE_V4DI_V4DI_PTEST:
24913 case INT_FTYPE_V4DF_V4DF_PTEST:
24914 case INT_FTYPE_V4SF_V4SF_PTEST:
24915 case INT_FTYPE_V2DI_V2DI_PTEST:
24916 case INT_FTYPE_V2DF_V2DF_PTEST:
24917 return ix86_expand_sse_ptest (d, exp, target);
24918 case FLOAT128_FTYPE_FLOAT128:
24919 case FLOAT_FTYPE_FLOAT:
24920 case INT64_FTYPE_V4SF:
24921 case INT64_FTYPE_V2DF:
24922 case INT_FTYPE_V16QI:
24923 case INT_FTYPE_V8QI:
24924 case INT_FTYPE_V8SF:
24925 case INT_FTYPE_V4DF:
24926 case INT_FTYPE_V4SF:
24927 case INT_FTYPE_V2DF:
24928 case V16QI_FTYPE_V16QI:
24929 case V8SI_FTYPE_V8SF:
24930 case V8SI_FTYPE_V4SI:
24931 case V8HI_FTYPE_V8HI:
24932 case V8HI_FTYPE_V16QI:
24933 case V8QI_FTYPE_V8QI:
24934 case V8SF_FTYPE_V8SF:
24935 case V8SF_FTYPE_V8SI:
24936 case V8SF_FTYPE_V4SF:
24937 case V4SI_FTYPE_V4SI:
24938 case V4SI_FTYPE_V16QI:
24939 case V4SI_FTYPE_V4SF:
24940 case V4SI_FTYPE_V8SI:
24941 case V4SI_FTYPE_V8HI:
24942 case V4SI_FTYPE_V4DF:
24943 case V4SI_FTYPE_V2DF:
24944 case V4HI_FTYPE_V4HI:
24945 case V4DF_FTYPE_V4DF:
24946 case V4DF_FTYPE_V4SI:
24947 case V4DF_FTYPE_V4SF:
24948 case V4DF_FTYPE_V2DF:
24949 case V4SF_FTYPE_V4SF:
24950 case V4SF_FTYPE_V4SI:
24951 case V4SF_FTYPE_V8SF:
24952 case V4SF_FTYPE_V4DF:
24953 case V4SF_FTYPE_V2DF:
24954 case V2DI_FTYPE_V2DI:
24955 case V2DI_FTYPE_V16QI:
24956 case V2DI_FTYPE_V8HI:
24957 case V2DI_FTYPE_V4SI:
24958 case V2DF_FTYPE_V2DF:
24959 case V2DF_FTYPE_V4SI:
24960 case V2DF_FTYPE_V4DF:
24961 case V2DF_FTYPE_V4SF:
24962 case V2DF_FTYPE_V2SI:
24963 case V2SI_FTYPE_V2SI:
24964 case V2SI_FTYPE_V4SF:
24965 case V2SI_FTYPE_V2SF:
24966 case V2SI_FTYPE_V2DF:
24967 case V2SF_FTYPE_V2SF:
24968 case V2SF_FTYPE_V2SI:
24971 case V4SF_FTYPE_V4SF_VEC_MERGE:
24972 case V2DF_FTYPE_V2DF_VEC_MERGE:
24973 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24974 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24975 case V16QI_FTYPE_V16QI_V16QI:
24976 case V16QI_FTYPE_V8HI_V8HI:
24977 case V8QI_FTYPE_V8QI_V8QI:
24978 case V8QI_FTYPE_V4HI_V4HI:
24979 case V8HI_FTYPE_V8HI_V8HI:
24980 case V8HI_FTYPE_V16QI_V16QI:
24981 case V8HI_FTYPE_V4SI_V4SI:
24982 case V8SF_FTYPE_V8SF_V8SF:
24983 case V8SF_FTYPE_V8SF_V8SI:
24984 case V4SI_FTYPE_V4SI_V4SI:
24985 case V4SI_FTYPE_V8HI_V8HI:
24986 case V4SI_FTYPE_V4SF_V4SF:
24987 case V4SI_FTYPE_V2DF_V2DF:
24988 case V4HI_FTYPE_V4HI_V4HI:
24989 case V4HI_FTYPE_V8QI_V8QI:
24990 case V4HI_FTYPE_V2SI_V2SI:
24991 case V4DF_FTYPE_V4DF_V4DF:
24992 case V4DF_FTYPE_V4DF_V4DI:
24993 case V4SF_FTYPE_V4SF_V4SF:
24994 case V4SF_FTYPE_V4SF_V4SI:
24995 case V4SF_FTYPE_V4SF_V2SI:
24996 case V4SF_FTYPE_V4SF_V2DF:
24997 case V4SF_FTYPE_V4SF_DI:
24998 case V4SF_FTYPE_V4SF_SI:
24999 case V2DI_FTYPE_V2DI_V2DI:
25000 case V2DI_FTYPE_V16QI_V16QI:
25001 case V2DI_FTYPE_V4SI_V4SI:
25002 case V2DI_FTYPE_V2DI_V16QI:
25003 case V2DI_FTYPE_V2DF_V2DF:
25004 case V2SI_FTYPE_V2SI_V2SI:
25005 case V2SI_FTYPE_V4HI_V4HI:
25006 case V2SI_FTYPE_V2SF_V2SF:
25007 case V2DF_FTYPE_V2DF_V2DF:
25008 case V2DF_FTYPE_V2DF_V4SF:
25009 case V2DF_FTYPE_V2DF_V2DI:
25010 case V2DF_FTYPE_V2DF_DI:
25011 case V2DF_FTYPE_V2DF_SI:
25012 case V2SF_FTYPE_V2SF_V2SF:
25013 case V1DI_FTYPE_V1DI_V1DI:
25014 case V1DI_FTYPE_V8QI_V8QI:
25015 case V1DI_FTYPE_V2SI_V2SI:
25016 if (comparison == UNKNOWN)
25017 return ix86_expand_binop_builtin (icode, exp, target);
25020 case V4SF_FTYPE_V4SF_V4SF_SWAP:
25021 case V2DF_FTYPE_V2DF_V2DF_SWAP:
25022 gcc_assert (comparison != UNKNOWN);
25026 case V8HI_FTYPE_V8HI_V8HI_COUNT:
25027 case V8HI_FTYPE_V8HI_SI_COUNT:
25028 case V4SI_FTYPE_V4SI_V4SI_COUNT:
25029 case V4SI_FTYPE_V4SI_SI_COUNT:
25030 case V4HI_FTYPE_V4HI_V4HI_COUNT:
25031 case V4HI_FTYPE_V4HI_SI_COUNT:
25032 case V2DI_FTYPE_V2DI_V2DI_COUNT:
25033 case V2DI_FTYPE_V2DI_SI_COUNT:
25034 case V2SI_FTYPE_V2SI_V2SI_COUNT:
25035 case V2SI_FTYPE_V2SI_SI_COUNT:
25036 case V1DI_FTYPE_V1DI_V1DI_COUNT:
25037 case V1DI_FTYPE_V1DI_SI_COUNT:
25039 last_arg_count = true;
25041 case UINT64_FTYPE_UINT64_UINT64:
25042 case UINT_FTYPE_UINT_UINT:
25043 case UINT_FTYPE_UINT_USHORT:
25044 case UINT_FTYPE_UINT_UCHAR:
25047 case V2DI2TI_FTYPE_V2DI_INT:
25050 nargs_constant = 1;
25052 case V8HI_FTYPE_V8HI_INT:
25053 case V8SF_FTYPE_V8SF_INT:
25054 case V4SI_FTYPE_V4SI_INT:
25055 case V4SI_FTYPE_V8SI_INT:
25056 case V4HI_FTYPE_V4HI_INT:
25057 case V4DF_FTYPE_V4DF_INT:
25058 case V4SF_FTYPE_V4SF_INT:
25059 case V4SF_FTYPE_V8SF_INT:
25060 case V2DI_FTYPE_V2DI_INT:
25061 case V2DF_FTYPE_V2DF_INT:
25062 case V2DF_FTYPE_V4DF_INT:
25064 nargs_constant = 1;
25066 case V16QI_FTYPE_V16QI_V16QI_V16QI:
25067 case V8SF_FTYPE_V8SF_V8SF_V8SF:
25068 case V4DF_FTYPE_V4DF_V4DF_V4DF:
25069 case V4SF_FTYPE_V4SF_V4SF_V4SF:
25070 case V2DF_FTYPE_V2DF_V2DF_V2DF:
25073 case V16QI_FTYPE_V16QI_V16QI_INT:
25074 case V8HI_FTYPE_V8HI_V8HI_INT:
25075 case V8SI_FTYPE_V8SI_V8SI_INT:
25076 case V8SI_FTYPE_V8SI_V4SI_INT:
25077 case V8SF_FTYPE_V8SF_V8SF_INT:
25078 case V8SF_FTYPE_V8SF_V4SF_INT:
25079 case V4SI_FTYPE_V4SI_V4SI_INT:
25080 case V4DF_FTYPE_V4DF_V4DF_INT:
25081 case V4DF_FTYPE_V4DF_V2DF_INT:
25082 case V4SF_FTYPE_V4SF_V4SF_INT:
25083 case V2DI_FTYPE_V2DI_V2DI_INT:
25084 case V2DF_FTYPE_V2DF_V2DF_INT:
25086 nargs_constant = 1;
25088 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
25091 nargs_constant = 1;
25093 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
25096 nargs_constant = 1;
25098 case V2DI_FTYPE_V2DI_UINT_UINT:
25100 nargs_constant = 2;
25102 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25104 nargs_constant = 2;
25107 gcc_unreachable ();
25110 gcc_assert (nargs <= ARRAY_SIZE (args));
25112 if (comparison != UNKNOWN)
25114 gcc_assert (nargs == 2);
25115 return ix86_expand_sse_compare (d, exp, target, swap);
25118 if (rmode == VOIDmode || rmode == tmode)
25122 || GET_MODE (target) != tmode
25123 || ! (*insn_p->operand[0].predicate) (target, tmode))
25124 target = gen_reg_rtx (tmode);
25125 real_target = target;
25129 target = gen_reg_rtx (rmode);
25130 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25133 for (i = 0; i < nargs; i++)
25135 tree arg = CALL_EXPR_ARG (exp, i);
25136 rtx op = expand_normal (arg);
25137 enum machine_mode mode = insn_p->operand[i + 1].mode;
25138 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
25140 if (last_arg_count && (i + 1) == nargs)
25142 /* SIMD shift insns take either an 8-bit immediate or
25143 register as count. But builtin functions take int as
25144 count. If count doesn't match, we put it in register. */
25147 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25148 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
25149 op = copy_to_reg (op);
25152 else if ((nargs - i) <= nargs_constant)
25157 case CODE_FOR_sse4_1_roundpd:
25158 case CODE_FOR_sse4_1_roundps:
25159 case CODE_FOR_sse4_1_roundsd:
25160 case CODE_FOR_sse4_1_roundss:
25161 case CODE_FOR_sse4_1_blendps:
25162 case CODE_FOR_avx_blendpd256:
25163 case CODE_FOR_avx_vpermilv4df:
25164 case CODE_FOR_avx_roundpd256:
25165 case CODE_FOR_avx_roundps256:
25166 error ("the last argument must be a 4-bit immediate");
25169 case CODE_FOR_sse4_1_blendpd:
25170 case CODE_FOR_avx_vpermilv2df:
25171 error ("the last argument must be a 2-bit immediate");
25174 case CODE_FOR_avx_vextractf128v4df:
25175 case CODE_FOR_avx_vextractf128v8sf:
25176 case CODE_FOR_avx_vextractf128v8si:
25177 case CODE_FOR_avx_vinsertf128v4df:
25178 case CODE_FOR_avx_vinsertf128v8sf:
25179 case CODE_FOR_avx_vinsertf128v8si:
25180 error ("the last argument must be a 1-bit immediate");
25183 case CODE_FOR_avx_cmpsdv2df3:
25184 case CODE_FOR_avx_cmpssv4sf3:
25185 case CODE_FOR_avx_cmppdv2df3:
25186 case CODE_FOR_avx_cmppsv4sf3:
25187 case CODE_FOR_avx_cmppdv4df3:
25188 case CODE_FOR_avx_cmppsv8sf3:
25189 error ("the last argument must be a 5-bit immediate");
25193 switch (nargs_constant)
25196 if ((nargs - i) == nargs_constant)
25198 error ("the next to last argument must be an 8-bit immediate");
25202 error ("the last argument must be an 8-bit immediate");
25205 gcc_unreachable ();
25212 if (VECTOR_MODE_P (mode))
25213 op = safe_vector_operand (op, mode);
25215 /* If we aren't optimizing, only allow one memory operand to
25217 if (memory_operand (op, mode))
25220 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25222 if (optimize || !match || num_memory > 1)
25223 op = copy_to_mode_reg (mode, op);
25227 op = copy_to_reg (op);
25228 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25233 args[i].mode = mode;
25239 pat = GEN_FCN (icode) (real_target, args[0].op);
25242 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25245 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25249 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25250 args[2].op, args[3].op);
25253 gcc_unreachable ();
25263 /* Subroutine of ix86_expand_builtin to take care of special insns
25264 with variable number of operands. */
25267 ix86_expand_special_args_builtin (const struct builtin_description *d,
25268 tree exp, rtx target)
25272 unsigned int i, nargs, arg_adjust, memory;
25276 enum machine_mode mode;
25278 enum insn_code icode = d->icode;
25279 bool last_arg_constant = false;
25280 const struct insn_data *insn_p = &insn_data[icode];
25281 enum machine_mode tmode = insn_p->operand[0].mode;
25282 enum { load, store } klass;
25284 switch ((enum ix86_special_builtin_type) d->flag)
25286 case VOID_FTYPE_VOID:
25287 emit_insn (GEN_FCN (icode) (target));
25289 case V2DI_FTYPE_PV2DI:
25290 case V32QI_FTYPE_PCCHAR:
25291 case V16QI_FTYPE_PCCHAR:
25292 case V8SF_FTYPE_PCV4SF:
25293 case V8SF_FTYPE_PCFLOAT:
25294 case V4SF_FTYPE_PCFLOAT:
25295 case V4DF_FTYPE_PCV2DF:
25296 case V4DF_FTYPE_PCDOUBLE:
25297 case V2DF_FTYPE_PCDOUBLE:
25302 case VOID_FTYPE_PV2SF_V4SF:
25303 case VOID_FTYPE_PV4DI_V4DI:
25304 case VOID_FTYPE_PV2DI_V2DI:
25305 case VOID_FTYPE_PCHAR_V32QI:
25306 case VOID_FTYPE_PCHAR_V16QI:
25307 case VOID_FTYPE_PFLOAT_V8SF:
25308 case VOID_FTYPE_PFLOAT_V4SF:
25309 case VOID_FTYPE_PDOUBLE_V4DF:
25310 case VOID_FTYPE_PDOUBLE_V2DF:
25311 case VOID_FTYPE_PDI_DI:
25312 case VOID_FTYPE_PINT_INT:
25315 /* Reserve memory operand for target. */
25316 memory = ARRAY_SIZE (args);
25318 case V4SF_FTYPE_V4SF_PCV2SF:
25319 case V2DF_FTYPE_V2DF_PCDOUBLE:
25324 case V8SF_FTYPE_PCV8SF_V8SF:
25325 case V4DF_FTYPE_PCV4DF_V4DF:
25326 case V4SF_FTYPE_PCV4SF_V4SF:
25327 case V2DF_FTYPE_PCV2DF_V2DF:
25332 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25333 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25334 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25335 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25338 /* Reserve memory operand for target. */
25339 memory = ARRAY_SIZE (args);
25342 gcc_unreachable ();
25345 gcc_assert (nargs <= ARRAY_SIZE (args));
25347 if (klass == store)
25349 arg = CALL_EXPR_ARG (exp, 0);
25350 op = expand_normal (arg);
25351 gcc_assert (target == 0);
25352 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25360 || GET_MODE (target) != tmode
25361 || ! (*insn_p->operand[0].predicate) (target, tmode))
25362 target = gen_reg_rtx (tmode);
25365 for (i = 0; i < nargs; i++)
25367 enum machine_mode mode = insn_p->operand[i + 1].mode;
25370 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25371 op = expand_normal (arg);
25372 match = (*insn_p->operand[i + 1].predicate) (op, mode);
25374 if (last_arg_constant && (i + 1) == nargs)
25380 error ("the last argument must be an 8-bit immediate");
25388 /* This must be the memory operand. */
25389 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25390 gcc_assert (GET_MODE (op) == mode
25391 || GET_MODE (op) == VOIDmode);
25395 /* This must be register. */
25396 if (VECTOR_MODE_P (mode))
25397 op = safe_vector_operand (op, mode);
25399 gcc_assert (GET_MODE (op) == mode
25400 || GET_MODE (op) == VOIDmode);
25401 op = copy_to_mode_reg (mode, op);
25406 args[i].mode = mode;
25412 pat = GEN_FCN (icode) (target, args[0].op);
25415 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25418 gcc_unreachable ();
25424 return klass == store ? 0 : target;
25427 /* Return the integer constant in ARG. Constrain it to be in the range
25428 of the subparts of VEC_TYPE; issue an error if not. */
25431 get_element_number (tree vec_type, tree arg)
25433 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25435 if (!host_integerp (arg, 1)
25436 || (elt = tree_low_cst (arg, 1), elt > max))
25438 error ("selector must be an integer constant in the range 0..%wi", max);
25445 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25446 ix86_expand_vector_init. We DO have language-level syntax for this, in
25447 the form of (type){ init-list }. Except that since we can't place emms
25448 instructions from inside the compiler, we can't allow the use of MMX
25449 registers unless the user explicitly asks for it. So we do *not* define
25450 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25451 we have builtins invoked by mmintrin.h that gives us license to emit
25452 these sorts of instructions. */
25455 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25457 enum machine_mode tmode = TYPE_MODE (type);
25458 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25459 int i, n_elt = GET_MODE_NUNITS (tmode);
25460 rtvec v = rtvec_alloc (n_elt);
25462 gcc_assert (VECTOR_MODE_P (tmode));
25463 gcc_assert (call_expr_nargs (exp) == n_elt);
25465 for (i = 0; i < n_elt; ++i)
25467 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25468 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25471 if (!target || !register_operand (target, tmode))
25472 target = gen_reg_rtx (tmode);
25474 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25478 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25479 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25480 had a language-level syntax for referencing vector elements. */
25483 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25485 enum machine_mode tmode, mode0;
25490 arg0 = CALL_EXPR_ARG (exp, 0);
25491 arg1 = CALL_EXPR_ARG (exp, 1);
25493 op0 = expand_normal (arg0);
25494 elt = get_element_number (TREE_TYPE (arg0), arg1);
25496 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25497 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25498 gcc_assert (VECTOR_MODE_P (mode0));
25500 op0 = force_reg (mode0, op0);
25502 if (optimize || !target || !register_operand (target, tmode))
25503 target = gen_reg_rtx (tmode);
25505 ix86_expand_vector_extract (true, target, op0, elt);
25510 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25511 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25512 a language-level syntax for referencing vector elements. */
25515 ix86_expand_vec_set_builtin (tree exp)
25517 enum machine_mode tmode, mode1;
25518 tree arg0, arg1, arg2;
25520 rtx op0, op1, target;
25522 arg0 = CALL_EXPR_ARG (exp, 0);
25523 arg1 = CALL_EXPR_ARG (exp, 1);
25524 arg2 = CALL_EXPR_ARG (exp, 2);
25526 tmode = TYPE_MODE (TREE_TYPE (arg0));
25527 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25528 gcc_assert (VECTOR_MODE_P (tmode));
25530 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25531 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25532 elt = get_element_number (TREE_TYPE (arg0), arg2);
25534 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25535 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25537 op0 = force_reg (tmode, op0);
25538 op1 = force_reg (mode1, op1);
25540 /* OP0 is the source of these builtin functions and shouldn't be
25541 modified. Create a copy, use it and return it as target. */
25542 target = gen_reg_rtx (tmode);
25543 emit_move_insn (target, op0);
25544 ix86_expand_vector_set (true, target, op1, elt);
25549 /* Expand an expression EXP that calls a built-in function,
25550 with result going to TARGET if that's convenient
25551 (and in mode MODE if that's convenient).
25552 SUBTARGET may be used as the target for computing one of EXP's operands.
25553 IGNORE is nonzero if the value is to be ignored. */
25556 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25557 enum machine_mode mode ATTRIBUTE_UNUSED,
25558 int ignore ATTRIBUTE_UNUSED)
25560 const struct builtin_description *d;
25562 enum insn_code icode;
25563 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25564 tree arg0, arg1, arg2;
25565 rtx op0, op1, op2, pat;
25566 enum machine_mode mode0, mode1, mode2;
25567 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25569 /* Determine whether the builtin function is available under the current ISA.
25570 Originally the builtin was not created if it wasn't applicable to the
25571 current ISA based on the command line switches. With function specific
25572 options, we need to check in the context of the function making the call
25573 whether it is supported. */
25574 if (ix86_builtins_isa[fcode].isa
25575 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25577 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25578 NULL, NULL, false);
25581 error ("%qE needs unknown isa option", fndecl);
25584 gcc_assert (opts != NULL);
25585 error ("%qE needs isa option %s", fndecl, opts);
25593 case IX86_BUILTIN_MASKMOVQ:
25594 case IX86_BUILTIN_MASKMOVDQU:
25595 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25596 ? CODE_FOR_mmx_maskmovq
25597 : CODE_FOR_sse2_maskmovdqu);
25598 /* Note the arg order is different from the operand order. */
25599 arg1 = CALL_EXPR_ARG (exp, 0);
25600 arg2 = CALL_EXPR_ARG (exp, 1);
25601 arg0 = CALL_EXPR_ARG (exp, 2);
25602 op0 = expand_normal (arg0);
25603 op1 = expand_normal (arg1);
25604 op2 = expand_normal (arg2);
25605 mode0 = insn_data[icode].operand[0].mode;
25606 mode1 = insn_data[icode].operand[1].mode;
25607 mode2 = insn_data[icode].operand[2].mode;
25609 op0 = force_reg (Pmode, op0);
25610 op0 = gen_rtx_MEM (mode1, op0);
25612 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25613 op0 = copy_to_mode_reg (mode0, op0);
25614 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25615 op1 = copy_to_mode_reg (mode1, op1);
25616 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25617 op2 = copy_to_mode_reg (mode2, op2);
25618 pat = GEN_FCN (icode) (op0, op1, op2);
25624 case IX86_BUILTIN_LDMXCSR:
25625 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25626 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25627 emit_move_insn (target, op0);
25628 emit_insn (gen_sse_ldmxcsr (target));
25631 case IX86_BUILTIN_STMXCSR:
25632 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25633 emit_insn (gen_sse_stmxcsr (target));
25634 return copy_to_mode_reg (SImode, target);
25636 case IX86_BUILTIN_CLFLUSH:
25637 arg0 = CALL_EXPR_ARG (exp, 0);
25638 op0 = expand_normal (arg0);
25639 icode = CODE_FOR_sse2_clflush;
25640 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25641 op0 = copy_to_mode_reg (Pmode, op0);
25643 emit_insn (gen_sse2_clflush (op0));
25646 case IX86_BUILTIN_MONITOR:
25647 arg0 = CALL_EXPR_ARG (exp, 0);
25648 arg1 = CALL_EXPR_ARG (exp, 1);
25649 arg2 = CALL_EXPR_ARG (exp, 2);
25650 op0 = expand_normal (arg0);
25651 op1 = expand_normal (arg1);
25652 op2 = expand_normal (arg2);
25654 op0 = copy_to_mode_reg (Pmode, op0);
25656 op1 = copy_to_mode_reg (SImode, op1);
25658 op2 = copy_to_mode_reg (SImode, op2);
25659 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25662 case IX86_BUILTIN_MWAIT:
25663 arg0 = CALL_EXPR_ARG (exp, 0);
25664 arg1 = CALL_EXPR_ARG (exp, 1);
25665 op0 = expand_normal (arg0);
25666 op1 = expand_normal (arg1);
25668 op0 = copy_to_mode_reg (SImode, op0);
25670 op1 = copy_to_mode_reg (SImode, op1);
25671 emit_insn (gen_sse3_mwait (op0, op1));
25674 case IX86_BUILTIN_VEC_INIT_V2SI:
25675 case IX86_BUILTIN_VEC_INIT_V4HI:
25676 case IX86_BUILTIN_VEC_INIT_V8QI:
25677 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25679 case IX86_BUILTIN_VEC_EXT_V2DF:
25680 case IX86_BUILTIN_VEC_EXT_V2DI:
25681 case IX86_BUILTIN_VEC_EXT_V4SF:
25682 case IX86_BUILTIN_VEC_EXT_V4SI:
25683 case IX86_BUILTIN_VEC_EXT_V8HI:
25684 case IX86_BUILTIN_VEC_EXT_V2SI:
25685 case IX86_BUILTIN_VEC_EXT_V4HI:
25686 case IX86_BUILTIN_VEC_EXT_V16QI:
25687 return ix86_expand_vec_ext_builtin (exp, target);
25689 case IX86_BUILTIN_VEC_SET_V2DI:
25690 case IX86_BUILTIN_VEC_SET_V4SF:
25691 case IX86_BUILTIN_VEC_SET_V4SI:
25692 case IX86_BUILTIN_VEC_SET_V8HI:
25693 case IX86_BUILTIN_VEC_SET_V4HI:
25694 case IX86_BUILTIN_VEC_SET_V16QI:
25695 return ix86_expand_vec_set_builtin (exp);
25697 case IX86_BUILTIN_INFQ:
25698 case IX86_BUILTIN_HUGE_VALQ:
25700 REAL_VALUE_TYPE inf;
25704 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25706 tmp = validize_mem (force_const_mem (mode, tmp));
25709 target = gen_reg_rtx (mode);
25711 emit_move_insn (target, tmp);
25719 for (i = 0, d = bdesc_special_args;
25720 i < ARRAY_SIZE (bdesc_special_args);
25722 if (d->code == fcode)
25723 return ix86_expand_special_args_builtin (d, exp, target);
25725 for (i = 0, d = bdesc_args;
25726 i < ARRAY_SIZE (bdesc_args);
25728 if (d->code == fcode)
25731 case IX86_BUILTIN_FABSQ:
25732 case IX86_BUILTIN_COPYSIGNQ:
25734 /* Emit a normal call if SSE2 isn't available. */
25735 return expand_call (exp, target, ignore);
25737 return ix86_expand_args_builtin (d, exp, target);
25740 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25741 if (d->code == fcode)
25742 return ix86_expand_sse_comi (d, exp, target);
25744 for (i = 0, d = bdesc_pcmpestr;
25745 i < ARRAY_SIZE (bdesc_pcmpestr);
25747 if (d->code == fcode)
25748 return ix86_expand_sse_pcmpestr (d, exp, target);
25750 for (i = 0, d = bdesc_pcmpistr;
25751 i < ARRAY_SIZE (bdesc_pcmpistr);
25753 if (d->code == fcode)
25754 return ix86_expand_sse_pcmpistr (d, exp, target);
25756 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25757 if (d->code == fcode)
25758 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25759 (enum multi_arg_type)d->flag,
25762 gcc_unreachable ();
25765 /* Returns a function decl for a vectorized version of the builtin function
25766 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25767 if it is not available. */
25770 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25773 enum machine_mode in_mode, out_mode;
25776 if (TREE_CODE (type_out) != VECTOR_TYPE
25777 || TREE_CODE (type_in) != VECTOR_TYPE)
25780 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25781 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25782 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25783 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25787 case BUILT_IN_SQRT:
25788 if (out_mode == DFmode && out_n == 2
25789 && in_mode == DFmode && in_n == 2)
25790 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25793 case BUILT_IN_SQRTF:
25794 if (out_mode == SFmode && out_n == 4
25795 && in_mode == SFmode && in_n == 4)
25796 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25799 case BUILT_IN_LRINT:
25800 if (out_mode == SImode && out_n == 4
25801 && in_mode == DFmode && in_n == 2)
25802 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25805 case BUILT_IN_LRINTF:
25806 if (out_mode == SImode && out_n == 4
25807 && in_mode == SFmode && in_n == 4)
25808 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25815 /* Dispatch to a handler for a vectorization library. */
25816 if (ix86_veclib_handler)
25817 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25823 /* Handler for an SVML-style interface to
25824 a library with vectorized intrinsics. */
25827 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25830 tree fntype, new_fndecl, args;
25833 enum machine_mode el_mode, in_mode;
25836 /* The SVML is suitable for unsafe math only. */
25837 if (!flag_unsafe_math_optimizations)
25840 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25841 n = TYPE_VECTOR_SUBPARTS (type_out);
25842 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25843 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25844 if (el_mode != in_mode
25852 case BUILT_IN_LOG10:
25854 case BUILT_IN_TANH:
25856 case BUILT_IN_ATAN:
25857 case BUILT_IN_ATAN2:
25858 case BUILT_IN_ATANH:
25859 case BUILT_IN_CBRT:
25860 case BUILT_IN_SINH:
25862 case BUILT_IN_ASINH:
25863 case BUILT_IN_ASIN:
25864 case BUILT_IN_COSH:
25866 case BUILT_IN_ACOSH:
25867 case BUILT_IN_ACOS:
25868 if (el_mode != DFmode || n != 2)
25872 case BUILT_IN_EXPF:
25873 case BUILT_IN_LOGF:
25874 case BUILT_IN_LOG10F:
25875 case BUILT_IN_POWF:
25876 case BUILT_IN_TANHF:
25877 case BUILT_IN_TANF:
25878 case BUILT_IN_ATANF:
25879 case BUILT_IN_ATAN2F:
25880 case BUILT_IN_ATANHF:
25881 case BUILT_IN_CBRTF:
25882 case BUILT_IN_SINHF:
25883 case BUILT_IN_SINF:
25884 case BUILT_IN_ASINHF:
25885 case BUILT_IN_ASINF:
25886 case BUILT_IN_COSHF:
25887 case BUILT_IN_COSF:
25888 case BUILT_IN_ACOSHF:
25889 case BUILT_IN_ACOSF:
25890 if (el_mode != SFmode || n != 4)
25898 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25900 if (fn == BUILT_IN_LOGF)
25901 strcpy (name, "vmlsLn4");
25902 else if (fn == BUILT_IN_LOG)
25903 strcpy (name, "vmldLn2");
25906 sprintf (name, "vmls%s", bname+10);
25907 name[strlen (name)-1] = '4';
25910 sprintf (name, "vmld%s2", bname+10);
25912 /* Convert to uppercase. */
25916 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25917 args = TREE_CHAIN (args))
25921 fntype = build_function_type_list (type_out, type_in, NULL);
25923 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25925 /* Build a function declaration for the vectorized function. */
25926 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25927 TREE_PUBLIC (new_fndecl) = 1;
25928 DECL_EXTERNAL (new_fndecl) = 1;
25929 DECL_IS_NOVOPS (new_fndecl) = 1;
25930 TREE_READONLY (new_fndecl) = 1;
25935 /* Handler for an ACML-style interface to
25936 a library with vectorized intrinsics. */
25939 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25941 char name[20] = "__vr.._";
25942 tree fntype, new_fndecl, args;
25945 enum machine_mode el_mode, in_mode;
25948 /* The ACML is 64bits only and suitable for unsafe math only as
25949 it does not correctly support parts of IEEE with the required
25950 precision such as denormals. */
25952 || !flag_unsafe_math_optimizations)
25955 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25956 n = TYPE_VECTOR_SUBPARTS (type_out);
25957 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25958 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25959 if (el_mode != in_mode
25969 case BUILT_IN_LOG2:
25970 case BUILT_IN_LOG10:
25973 if (el_mode != DFmode
25978 case BUILT_IN_SINF:
25979 case BUILT_IN_COSF:
25980 case BUILT_IN_EXPF:
25981 case BUILT_IN_POWF:
25982 case BUILT_IN_LOGF:
25983 case BUILT_IN_LOG2F:
25984 case BUILT_IN_LOG10F:
25987 if (el_mode != SFmode
25996 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25997 sprintf (name + 7, "%s", bname+10);
26000 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26001 args = TREE_CHAIN (args))
26005 fntype = build_function_type_list (type_out, type_in, NULL);
26007 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26009 /* Build a function declaration for the vectorized function. */
26010 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
26011 TREE_PUBLIC (new_fndecl) = 1;
26012 DECL_EXTERNAL (new_fndecl) = 1;
26013 DECL_IS_NOVOPS (new_fndecl) = 1;
26014 TREE_READONLY (new_fndecl) = 1;
26020 /* Returns a decl of a function that implements conversion of an integer vector
26021 into a floating-point vector, or vice-versa. TYPE is the type of the integer
26022 side of the conversion.
26023 Return NULL_TREE if it is not available. */
26026 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
26028 if (TREE_CODE (type) != VECTOR_TYPE
26029 /* There are only conversions from/to signed integers. */
26030 || TYPE_UNSIGNED (TREE_TYPE (type)))
26036 switch (TYPE_MODE (type))
26039 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
26044 case FIX_TRUNC_EXPR:
26045 switch (TYPE_MODE (type))
26048 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
26058 /* Returns a code for a target-specific builtin that implements
26059 reciprocal of the function, or NULL_TREE if not available. */
26062 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
26063 bool sqrt ATTRIBUTE_UNUSED)
26065 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
26066 && flag_finite_math_only && !flag_trapping_math
26067 && flag_unsafe_math_optimizations))
26071 /* Machine dependent builtins. */
26074 /* Vectorized version of sqrt to rsqrt conversion. */
26075 case IX86_BUILTIN_SQRTPS_NR:
26076 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26082 /* Normal builtins. */
26085 /* Sqrt to rsqrt conversion. */
26086 case BUILT_IN_SQRTF:
26087 return ix86_builtins[IX86_BUILTIN_RSQRTF];
26094 /* Store OPERAND to the memory after reload is completed. This means
26095 that we can't easily use assign_stack_local. */
26097 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26101 gcc_assert (reload_completed);
26102 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
26104 result = gen_rtx_MEM (mode,
26105 gen_rtx_PLUS (Pmode,
26107 GEN_INT (-RED_ZONE_SIZE)));
26108 emit_move_insn (result, operand);
26110 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
26116 operand = gen_lowpart (DImode, operand);
26120 gen_rtx_SET (VOIDmode,
26121 gen_rtx_MEM (DImode,
26122 gen_rtx_PRE_DEC (DImode,
26123 stack_pointer_rtx)),
26127 gcc_unreachable ();
26129 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26138 split_di (&operand, 1, operands, operands + 1);
26140 gen_rtx_SET (VOIDmode,
26141 gen_rtx_MEM (SImode,
26142 gen_rtx_PRE_DEC (Pmode,
26143 stack_pointer_rtx)),
26146 gen_rtx_SET (VOIDmode,
26147 gen_rtx_MEM (SImode,
26148 gen_rtx_PRE_DEC (Pmode,
26149 stack_pointer_rtx)),
26154 /* Store HImodes as SImodes. */
26155 operand = gen_lowpart (SImode, operand);
26159 gen_rtx_SET (VOIDmode,
26160 gen_rtx_MEM (GET_MODE (operand),
26161 gen_rtx_PRE_DEC (SImode,
26162 stack_pointer_rtx)),
26166 gcc_unreachable ();
26168 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26173 /* Free operand from the memory. */
26175 ix86_free_from_memory (enum machine_mode mode)
26177 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
26181 if (mode == DImode || TARGET_64BIT)
26185 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26186 to pop or add instruction if registers are available. */
26187 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26188 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26193 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26194 QImode must go into class Q_REGS.
26195 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26196 movdf to do mem-to-mem moves through integer regs. */
26198 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26200 enum machine_mode mode = GET_MODE (x);
26202 /* We're only allowed to return a subclass of CLASS. Many of the
26203 following checks fail for NO_REGS, so eliminate that early. */
26204 if (regclass == NO_REGS)
26207 /* All classes can load zeros. */
26208 if (x == CONST0_RTX (mode))
26211 /* Force constants into memory if we are loading a (nonzero) constant into
26212 an MMX or SSE register. This is because there are no MMX/SSE instructions
26213 to load from a constant. */
26215 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26218 /* Prefer SSE regs only, if we can use them for math. */
26219 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26220 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26222 /* Floating-point constants need more complex checks. */
26223 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26225 /* General regs can load everything. */
26226 if (reg_class_subset_p (regclass, GENERAL_REGS))
26229 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26230 zero above. We only want to wind up preferring 80387 registers if
26231 we plan on doing computation with them. */
26233 && standard_80387_constant_p (x))
26235 /* Limit class to non-sse. */
26236 if (regclass == FLOAT_SSE_REGS)
26238 if (regclass == FP_TOP_SSE_REGS)
26240 if (regclass == FP_SECOND_SSE_REGS)
26241 return FP_SECOND_REG;
26242 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26249 /* Generally when we see PLUS here, it's the function invariant
26250 (plus soft-fp const_int). Which can only be computed into general
26252 if (GET_CODE (x) == PLUS)
26253 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26255 /* QImode constants are easy to load, but non-constant QImode data
26256 must go into Q_REGS. */
26257 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26259 if (reg_class_subset_p (regclass, Q_REGS))
26261 if (reg_class_subset_p (Q_REGS, regclass))
26269 /* Discourage putting floating-point values in SSE registers unless
26270 SSE math is being used, and likewise for the 387 registers. */
26272 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26274 enum machine_mode mode = GET_MODE (x);
26276 /* Restrict the output reload class to the register bank that we are doing
26277 math on. If we would like not to return a subset of CLASS, reject this
26278 alternative: if reload cannot do this, it will still use its choice. */
26279 mode = GET_MODE (x);
26280 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26281 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26283 if (X87_FLOAT_MODE_P (mode))
26285 if (regclass == FP_TOP_SSE_REGS)
26287 else if (regclass == FP_SECOND_SSE_REGS)
26288 return FP_SECOND_REG;
26290 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26296 static enum reg_class
26297 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
26298 enum machine_mode mode,
26299 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26301 /* QImode spills from non-QI registers require
26302 intermediate register on 32bit targets. */
26303 if (!in_p && mode == QImode && !TARGET_64BIT
26304 && (rclass == GENERAL_REGS
26305 || rclass == LEGACY_REGS
26306 || rclass == INDEX_REGS))
26315 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26316 regno = true_regnum (x);
26318 /* Return Q_REGS if the operand is in memory. */
26326 /* If we are copying between general and FP registers, we need a memory
26327 location. The same is true for SSE and MMX registers.
26329 To optimize register_move_cost performance, allow inline variant.
26331 The macro can't work reliably when one of the CLASSES is class containing
26332 registers from multiple units (SSE, MMX, integer). We avoid this by never
26333 combining those units in single alternative in the machine description.
26334 Ensure that this constraint holds to avoid unexpected surprises.
26336 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26337 enforce these sanity checks. */
26340 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26341 enum machine_mode mode, int strict)
26343 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26344 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26345 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26346 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26347 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26348 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26350 gcc_assert (!strict);
26354 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26357 /* ??? This is a lie. We do have moves between mmx/general, and for
26358 mmx/sse2. But by saying we need secondary memory we discourage the
26359 register allocator from using the mmx registers unless needed. */
26360 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26363 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26365 /* SSE1 doesn't have any direct moves from other classes. */
26369 /* If the target says that inter-unit moves are more expensive
26370 than moving through memory, then don't generate them. */
26371 if (!TARGET_INTER_UNIT_MOVES)
26374 /* Between SSE and general, we have moves no larger than word size. */
26375 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26383 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26384 enum machine_mode mode, int strict)
26386 return inline_secondary_memory_needed (class1, class2, mode, strict);
26389 /* Return true if the registers in CLASS cannot represent the change from
26390 modes FROM to TO. */
26393 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26394 enum reg_class regclass)
26399 /* x87 registers can't do subreg at all, as all values are reformatted
26400 to extended precision. */
26401 if (MAYBE_FLOAT_CLASS_P (regclass))
26404 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26406 /* Vector registers do not support QI or HImode loads. If we don't
26407 disallow a change to these modes, reload will assume it's ok to
26408 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26409 the vec_dupv4hi pattern. */
26410 if (GET_MODE_SIZE (from) < 4)
26413 /* Vector registers do not support subreg with nonzero offsets, which
26414 are otherwise valid for integer registers. Since we can't see
26415 whether we have a nonzero offset from here, prohibit all
26416 nonparadoxical subregs changing size. */
26417 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26424 /* Return the cost of moving data of mode M between a
26425 register and memory. A value of 2 is the default; this cost is
26426 relative to those in `REGISTER_MOVE_COST'.
26428 This function is used extensively by register_move_cost that is used to
26429 build tables at startup. Make it inline in this case.
26430 When IN is 2, return maximum of in and out move cost.
26432 If moving between registers and memory is more expensive than
26433 between two registers, you should define this macro to express the
26436 Model also increased moving costs of QImode registers in non
26440 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26444 if (FLOAT_CLASS_P (regclass))
26462 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26463 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26465 if (SSE_CLASS_P (regclass))
26468 switch (GET_MODE_SIZE (mode))
26483 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26484 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26486 if (MMX_CLASS_P (regclass))
26489 switch (GET_MODE_SIZE (mode))
26501 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26502 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26504 switch (GET_MODE_SIZE (mode))
26507 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26510 return ix86_cost->int_store[0];
26511 if (TARGET_PARTIAL_REG_DEPENDENCY
26512 && optimize_function_for_speed_p (cfun))
26513 cost = ix86_cost->movzbl_load;
26515 cost = ix86_cost->int_load[0];
26517 return MAX (cost, ix86_cost->int_store[0]);
26523 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26525 return ix86_cost->movzbl_load;
26527 return ix86_cost->int_store[0] + 4;
26532 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26533 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26535 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26536 if (mode == TFmode)
26539 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26541 cost = ix86_cost->int_load[2];
26543 cost = ix86_cost->int_store[2];
26544 return (cost * (((int) GET_MODE_SIZE (mode)
26545 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26550 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26552 return inline_memory_move_cost (mode, regclass, in);
26556 /* Return the cost of moving data from a register in class CLASS1 to
26557 one in class CLASS2.
26559 It is not required that the cost always equal 2 when FROM is the same as TO;
26560 on some machines it is expensive to move between registers if they are not
26561 general registers. */
26564 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26565 enum reg_class class2)
26567 /* In case we require secondary memory, compute cost of the store followed
26568 by load. In order to avoid bad register allocation choices, we need
26569 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26571 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26575 cost += inline_memory_move_cost (mode, class1, 2);
26576 cost += inline_memory_move_cost (mode, class2, 2);
26578 /* In case of copying from general_purpose_register we may emit multiple
26579 stores followed by single load causing memory size mismatch stall.
26580 Count this as arbitrarily high cost of 20. */
26581 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26584 /* In the case of FP/MMX moves, the registers actually overlap, and we
26585 have to switch modes in order to treat them differently. */
26586 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26587 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26593 /* Moves between SSE/MMX and integer unit are expensive. */
26594 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26595 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26597 /* ??? By keeping returned value relatively high, we limit the number
26598 of moves between integer and MMX/SSE registers for all targets.
26599 Additionally, high value prevents problem with x86_modes_tieable_p(),
26600 where integer modes in MMX/SSE registers are not tieable
26601 because of missing QImode and HImode moves to, from or between
26602 MMX/SSE registers. */
26603 return MAX (8, ix86_cost->mmxsse_to_integer);
26605 if (MAYBE_FLOAT_CLASS_P (class1))
26606 return ix86_cost->fp_move;
26607 if (MAYBE_SSE_CLASS_P (class1))
26608 return ix86_cost->sse_move;
26609 if (MAYBE_MMX_CLASS_P (class1))
26610 return ix86_cost->mmx_move;
26614 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26617 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26619 /* Flags and only flags can only hold CCmode values. */
26620 if (CC_REGNO_P (regno))
26621 return GET_MODE_CLASS (mode) == MODE_CC;
26622 if (GET_MODE_CLASS (mode) == MODE_CC
26623 || GET_MODE_CLASS (mode) == MODE_RANDOM
26624 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26626 if (FP_REGNO_P (regno))
26627 return VALID_FP_MODE_P (mode);
26628 if (SSE_REGNO_P (regno))
26630 /* We implement the move patterns for all vector modes into and
26631 out of SSE registers, even when no operation instructions
26632 are available. OImode move is available only when AVX is
26634 return ((TARGET_AVX && mode == OImode)
26635 || VALID_AVX256_REG_MODE (mode)
26636 || VALID_SSE_REG_MODE (mode)
26637 || VALID_SSE2_REG_MODE (mode)
26638 || VALID_MMX_REG_MODE (mode)
26639 || VALID_MMX_REG_MODE_3DNOW (mode));
26641 if (MMX_REGNO_P (regno))
26643 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26644 so if the register is available at all, then we can move data of
26645 the given mode into or out of it. */
26646 return (VALID_MMX_REG_MODE (mode)
26647 || VALID_MMX_REG_MODE_3DNOW (mode));
26650 if (mode == QImode)
26652 /* Take care for QImode values - they can be in non-QI regs,
26653 but then they do cause partial register stalls. */
26654 if (regno <= BX_REG || TARGET_64BIT)
26656 if (!TARGET_PARTIAL_REG_STALL)
26658 return reload_in_progress || reload_completed;
26660 /* We handle both integer and floats in the general purpose registers. */
26661 else if (VALID_INT_MODE_P (mode))
26663 else if (VALID_FP_MODE_P (mode))
26665 else if (VALID_DFP_MODE_P (mode))
26667 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26668 on to use that value in smaller contexts, this can easily force a
26669 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26670 supporting DImode, allow it. */
26671 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26677 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26678 tieable integer mode. */
26681 ix86_tieable_integer_mode_p (enum machine_mode mode)
26690 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26693 return TARGET_64BIT;
26700 /* Return true if MODE1 is accessible in a register that can hold MODE2
26701 without copying. That is, all register classes that can hold MODE2
26702 can also hold MODE1. */
26705 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26707 if (mode1 == mode2)
26710 if (ix86_tieable_integer_mode_p (mode1)
26711 && ix86_tieable_integer_mode_p (mode2))
26714 /* MODE2 being XFmode implies fp stack or general regs, which means we
26715 can tie any smaller floating point modes to it. Note that we do not
26716 tie this with TFmode. */
26717 if (mode2 == XFmode)
26718 return mode1 == SFmode || mode1 == DFmode;
26720 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26721 that we can tie it with SFmode. */
26722 if (mode2 == DFmode)
26723 return mode1 == SFmode;
26725 /* If MODE2 is only appropriate for an SSE register, then tie with
26726 any other mode acceptable to SSE registers. */
26727 if (GET_MODE_SIZE (mode2) == 16
26728 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26729 return (GET_MODE_SIZE (mode1) == 16
26730 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26732 /* If MODE2 is appropriate for an MMX register, then tie
26733 with any other mode acceptable to MMX registers. */
26734 if (GET_MODE_SIZE (mode2) == 8
26735 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26736 return (GET_MODE_SIZE (mode1) == 8
26737 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26742 /* Compute a (partial) cost for rtx X. Return true if the complete
26743 cost has been computed, and false if subexpressions should be
26744 scanned. In either case, *TOTAL contains the cost result. */
26747 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26749 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26750 enum machine_mode mode = GET_MODE (x);
26751 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26759 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26761 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26763 else if (flag_pic && SYMBOLIC_CONST (x)
26765 || (!GET_CODE (x) != LABEL_REF
26766 && (GET_CODE (x) != SYMBOL_REF
26767 || !SYMBOL_REF_LOCAL_P (x)))))
26774 if (mode == VOIDmode)
26777 switch (standard_80387_constant_p (x))
26782 default: /* Other constants */
26787 /* Start with (MEM (SYMBOL_REF)), since that's where
26788 it'll probably end up. Add a penalty for size. */
26789 *total = (COSTS_N_INSNS (1)
26790 + (flag_pic != 0 && !TARGET_64BIT)
26791 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26797 /* The zero extensions is often completely free on x86_64, so make
26798 it as cheap as possible. */
26799 if (TARGET_64BIT && mode == DImode
26800 && GET_MODE (XEXP (x, 0)) == SImode)
26802 else if (TARGET_ZERO_EXTEND_WITH_AND)
26803 *total = cost->add;
26805 *total = cost->movzx;
26809 *total = cost->movsx;
26813 if (CONST_INT_P (XEXP (x, 1))
26814 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26816 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26819 *total = cost->add;
26822 if ((value == 2 || value == 3)
26823 && cost->lea <= cost->shift_const)
26825 *total = cost->lea;
26835 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26837 if (CONST_INT_P (XEXP (x, 1)))
26839 if (INTVAL (XEXP (x, 1)) > 32)
26840 *total = cost->shift_const + COSTS_N_INSNS (2);
26842 *total = cost->shift_const * 2;
26846 if (GET_CODE (XEXP (x, 1)) == AND)
26847 *total = cost->shift_var * 2;
26849 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26854 if (CONST_INT_P (XEXP (x, 1)))
26855 *total = cost->shift_const;
26857 *total = cost->shift_var;
26862 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26864 /* ??? SSE scalar cost should be used here. */
26865 *total = cost->fmul;
26868 else if (X87_FLOAT_MODE_P (mode))
26870 *total = cost->fmul;
26873 else if (FLOAT_MODE_P (mode))
26875 /* ??? SSE vector cost should be used here. */
26876 *total = cost->fmul;
26881 rtx op0 = XEXP (x, 0);
26882 rtx op1 = XEXP (x, 1);
26884 if (CONST_INT_P (XEXP (x, 1)))
26886 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26887 for (nbits = 0; value != 0; value &= value - 1)
26891 /* This is arbitrary. */
26894 /* Compute costs correctly for widening multiplication. */
26895 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26896 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26897 == GET_MODE_SIZE (mode))
26899 int is_mulwiden = 0;
26900 enum machine_mode inner_mode = GET_MODE (op0);
26902 if (GET_CODE (op0) == GET_CODE (op1))
26903 is_mulwiden = 1, op1 = XEXP (op1, 0);
26904 else if (CONST_INT_P (op1))
26906 if (GET_CODE (op0) == SIGN_EXTEND)
26907 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26910 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26914 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26917 *total = (cost->mult_init[MODE_INDEX (mode)]
26918 + nbits * cost->mult_bit
26919 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26928 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26929 /* ??? SSE cost should be used here. */
26930 *total = cost->fdiv;
26931 else if (X87_FLOAT_MODE_P (mode))
26932 *total = cost->fdiv;
26933 else if (FLOAT_MODE_P (mode))
26934 /* ??? SSE vector cost should be used here. */
26935 *total = cost->fdiv;
26937 *total = cost->divide[MODE_INDEX (mode)];
26941 if (GET_MODE_CLASS (mode) == MODE_INT
26942 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26944 if (GET_CODE (XEXP (x, 0)) == PLUS
26945 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26946 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26947 && CONSTANT_P (XEXP (x, 1)))
26949 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26950 if (val == 2 || val == 4 || val == 8)
26952 *total = cost->lea;
26953 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26954 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26955 outer_code, speed);
26956 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26960 else if (GET_CODE (XEXP (x, 0)) == MULT
26961 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26963 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26964 if (val == 2 || val == 4 || val == 8)
26966 *total = cost->lea;
26967 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26968 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26972 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26974 *total = cost->lea;
26975 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26976 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26977 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26984 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26986 /* ??? SSE cost should be used here. */
26987 *total = cost->fadd;
26990 else if (X87_FLOAT_MODE_P (mode))
26992 *total = cost->fadd;
26995 else if (FLOAT_MODE_P (mode))
26997 /* ??? SSE vector cost should be used here. */
26998 *total = cost->fadd;
27006 if (!TARGET_64BIT && mode == DImode)
27008 *total = (cost->add * 2
27009 + (rtx_cost (XEXP (x, 0), outer_code, speed)
27010 << (GET_MODE (XEXP (x, 0)) != DImode))
27011 + (rtx_cost (XEXP (x, 1), outer_code, speed)
27012 << (GET_MODE (XEXP (x, 1)) != DImode)));
27018 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27020 /* ??? SSE cost should be used here. */
27021 *total = cost->fchs;
27024 else if (X87_FLOAT_MODE_P (mode))
27026 *total = cost->fchs;
27029 else if (FLOAT_MODE_P (mode))
27031 /* ??? SSE vector cost should be used here. */
27032 *total = cost->fchs;
27038 if (!TARGET_64BIT && mode == DImode)
27039 *total = cost->add * 2;
27041 *total = cost->add;
27045 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27046 && XEXP (XEXP (x, 0), 1) == const1_rtx
27047 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27048 && XEXP (x, 1) == const0_rtx)
27050 /* This kind of construct is implemented using test[bwl].
27051 Treat it as if we had an AND. */
27052 *total = (cost->add
27053 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27054 + rtx_cost (const1_rtx, outer_code, speed));
27060 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27065 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27066 /* ??? SSE cost should be used here. */
27067 *total = cost->fabs;
27068 else if (X87_FLOAT_MODE_P (mode))
27069 *total = cost->fabs;
27070 else if (FLOAT_MODE_P (mode))
27071 /* ??? SSE vector cost should be used here. */
27072 *total = cost->fabs;
27076 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27077 /* ??? SSE cost should be used here. */
27078 *total = cost->fsqrt;
27079 else if (X87_FLOAT_MODE_P (mode))
27080 *total = cost->fsqrt;
27081 else if (FLOAT_MODE_P (mode))
27082 /* ??? SSE vector cost should be used here. */
27083 *total = cost->fsqrt;
27087 if (XINT (x, 1) == UNSPEC_TP)
27098 static int current_machopic_label_num;
27100 /* Given a symbol name and its associated stub, write out the
27101 definition of the stub. */
27104 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27106 unsigned int length;
27107 char *binder_name, *symbol_name, lazy_ptr_name[32];
27108 int label = ++current_machopic_label_num;
27110 /* For 64-bit we shouldn't get here. */
27111 gcc_assert (!TARGET_64BIT);
27113 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27114 symb = (*targetm.strip_name_encoding) (symb);
27116 length = strlen (stub);
27117 binder_name = XALLOCAVEC (char, length + 32);
27118 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27120 length = strlen (symb);
27121 symbol_name = XALLOCAVEC (char, length + 32);
27122 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27124 sprintf (lazy_ptr_name, "L%d$lz", label);
27127 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27129 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27131 fprintf (file, "%s:\n", stub);
27132 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27136 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27137 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27138 fprintf (file, "\tjmp\t*%%edx\n");
27141 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27143 fprintf (file, "%s:\n", binder_name);
27147 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27148 fprintf (file, "\tpushl\t%%eax\n");
27151 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27153 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
27155 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27156 fprintf (file, "%s:\n", lazy_ptr_name);
27157 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27158 fprintf (file, "\t.long %s\n", binder_name);
27162 darwin_x86_file_end (void)
27164 darwin_file_end ();
27167 #endif /* TARGET_MACHO */
27169 /* Order the registers for register allocator. */
27172 x86_order_regs_for_local_alloc (void)
27177 /* First allocate the local general purpose registers. */
27178 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27179 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27180 reg_alloc_order [pos++] = i;
27182 /* Global general purpose registers. */
27183 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27184 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27185 reg_alloc_order [pos++] = i;
27187 /* x87 registers come first in case we are doing FP math
27189 if (!TARGET_SSE_MATH)
27190 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27191 reg_alloc_order [pos++] = i;
27193 /* SSE registers. */
27194 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27195 reg_alloc_order [pos++] = i;
27196 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27197 reg_alloc_order [pos++] = i;
27199 /* x87 registers. */
27200 if (TARGET_SSE_MATH)
27201 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27202 reg_alloc_order [pos++] = i;
27204 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27205 reg_alloc_order [pos++] = i;
27207 /* Initialize the rest of array as we do not allocate some registers
27209 while (pos < FIRST_PSEUDO_REGISTER)
27210 reg_alloc_order [pos++] = 0;
27213 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27214 struct attribute_spec.handler. */
27216 ix86_handle_abi_attribute (tree *node, tree name,
27217 tree args ATTRIBUTE_UNUSED,
27218 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27220 if (TREE_CODE (*node) != FUNCTION_TYPE
27221 && TREE_CODE (*node) != METHOD_TYPE
27222 && TREE_CODE (*node) != FIELD_DECL
27223 && TREE_CODE (*node) != TYPE_DECL)
27225 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27227 *no_add_attrs = true;
27232 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27234 *no_add_attrs = true;
27238 /* Can combine regparm with all attributes but fastcall. */
27239 if (is_attribute_p ("ms_abi", name))
27241 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27243 error ("ms_abi and sysv_abi attributes are not compatible");
27248 else if (is_attribute_p ("sysv_abi", name))
27250 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27252 error ("ms_abi and sysv_abi attributes are not compatible");
27261 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27262 struct attribute_spec.handler. */
27264 ix86_handle_struct_attribute (tree *node, tree name,
27265 tree args ATTRIBUTE_UNUSED,
27266 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27269 if (DECL_P (*node))
27271 if (TREE_CODE (*node) == TYPE_DECL)
27272 type = &TREE_TYPE (*node);
27277 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27278 || TREE_CODE (*type) == UNION_TYPE)))
27280 warning (OPT_Wattributes, "%qE attribute ignored",
27282 *no_add_attrs = true;
27285 else if ((is_attribute_p ("ms_struct", name)
27286 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27287 || ((is_attribute_p ("gcc_struct", name)
27288 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27290 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27292 *no_add_attrs = true;
27299 ix86_ms_bitfield_layout_p (const_tree record_type)
27301 return (TARGET_MS_BITFIELD_LAYOUT &&
27302 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27303 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27306 /* Returns an expression indicating where the this parameter is
27307 located on entry to the FUNCTION. */
27310 x86_this_parameter (tree function)
27312 tree type = TREE_TYPE (function);
27313 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27318 const int *parm_regs;
27320 if (ix86_function_type_abi (type) == MS_ABI)
27321 parm_regs = x86_64_ms_abi_int_parameter_registers;
27323 parm_regs = x86_64_int_parameter_registers;
27324 return gen_rtx_REG (DImode, parm_regs[aggr]);
27327 nregs = ix86_function_regparm (type, function);
27329 if (nregs > 0 && !stdarg_p (type))
27333 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27334 regno = aggr ? DX_REG : CX_REG;
27342 return gen_rtx_MEM (SImode,
27343 plus_constant (stack_pointer_rtx, 4));
27346 return gen_rtx_REG (SImode, regno);
27349 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27352 /* Determine whether x86_output_mi_thunk can succeed. */
27355 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27356 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27357 HOST_WIDE_INT vcall_offset, const_tree function)
27359 /* 64-bit can handle anything. */
27363 /* For 32-bit, everything's fine if we have one free register. */
27364 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27367 /* Need a free register for vcall_offset. */
27371 /* Need a free register for GOT references. */
27372 if (flag_pic && !(*targetm.binds_local_p) (function))
27375 /* Otherwise ok. */
27379 /* Output the assembler code for a thunk function. THUNK_DECL is the
27380 declaration for the thunk function itself, FUNCTION is the decl for
27381 the target function. DELTA is an immediate constant offset to be
27382 added to THIS. If VCALL_OFFSET is nonzero, the word at
27383 *(*this + vcall_offset) should be added to THIS. */
27386 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27387 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27388 HOST_WIDE_INT vcall_offset, tree function)
27391 rtx this_param = x86_this_parameter (function);
27394 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27395 pull it in now and let DELTA benefit. */
27396 if (REG_P (this_param))
27397 this_reg = this_param;
27398 else if (vcall_offset)
27400 /* Put the this parameter into %eax. */
27401 xops[0] = this_param;
27402 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27403 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27406 this_reg = NULL_RTX;
27408 /* Adjust the this parameter by a fixed constant. */
27411 xops[0] = GEN_INT (delta);
27412 xops[1] = this_reg ? this_reg : this_param;
27415 if (!x86_64_general_operand (xops[0], DImode))
27417 tmp = gen_rtx_REG (DImode, R10_REG);
27419 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27421 xops[1] = this_param;
27423 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27426 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27429 /* Adjust the this parameter by a value stored in the vtable. */
27433 tmp = gen_rtx_REG (DImode, R10_REG);
27436 int tmp_regno = CX_REG;
27437 if (lookup_attribute ("fastcall",
27438 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27439 tmp_regno = AX_REG;
27440 tmp = gen_rtx_REG (SImode, tmp_regno);
27443 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27445 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27447 /* Adjust the this parameter. */
27448 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27449 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27451 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27452 xops[0] = GEN_INT (vcall_offset);
27454 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27455 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27457 xops[1] = this_reg;
27458 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27461 /* If necessary, drop THIS back to its stack slot. */
27462 if (this_reg && this_reg != this_param)
27464 xops[0] = this_reg;
27465 xops[1] = this_param;
27466 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27469 xops[0] = XEXP (DECL_RTL (function), 0);
27472 if (!flag_pic || (*targetm.binds_local_p) (function))
27473 output_asm_insn ("jmp\t%P0", xops);
27474 /* All thunks should be in the same object as their target,
27475 and thus binds_local_p should be true. */
27476 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27477 gcc_unreachable ();
27480 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27481 tmp = gen_rtx_CONST (Pmode, tmp);
27482 tmp = gen_rtx_MEM (QImode, tmp);
27484 output_asm_insn ("jmp\t%A0", xops);
27489 if (!flag_pic || (*targetm.binds_local_p) (function))
27490 output_asm_insn ("jmp\t%P0", xops);
27495 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27496 tmp = (gen_rtx_SYMBOL_REF
27498 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27499 tmp = gen_rtx_MEM (QImode, tmp);
27501 output_asm_insn ("jmp\t%0", xops);
27504 #endif /* TARGET_MACHO */
27506 tmp = gen_rtx_REG (SImode, CX_REG);
27507 output_set_got (tmp, NULL_RTX);
27510 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27511 output_asm_insn ("jmp\t{*}%1", xops);
27517 x86_file_start (void)
27519 default_file_start ();
27521 darwin_file_start ();
27523 if (X86_FILE_START_VERSION_DIRECTIVE)
27524 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27525 if (X86_FILE_START_FLTUSED)
27526 fputs ("\t.global\t__fltused\n", asm_out_file);
27527 if (ix86_asm_dialect == ASM_INTEL)
27528 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27532 x86_field_alignment (tree field, int computed)
27534 enum machine_mode mode;
27535 tree type = TREE_TYPE (field);
27537 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27539 mode = TYPE_MODE (strip_array_types (type));
27540 if (mode == DFmode || mode == DCmode
27541 || GET_MODE_CLASS (mode) == MODE_INT
27542 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27543 return MIN (32, computed);
27547 /* Output assembler code to FILE to increment profiler label # LABELNO
27548 for profiling a function entry. */
27550 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27554 #ifndef NO_PROFILE_COUNTERS
27555 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27558 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27559 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27561 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27565 #ifndef NO_PROFILE_COUNTERS
27566 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27567 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27569 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27573 #ifndef NO_PROFILE_COUNTERS
27574 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27575 PROFILE_COUNT_REGISTER);
27577 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27581 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27582 /* We don't have exact information about the insn sizes, but we may assume
27583 quite safely that we are informed about all 1 byte insns and memory
27584 address sizes. This is enough to eliminate unnecessary padding in
27588 min_insn_size (rtx insn)
27592 if (!INSN_P (insn) || !active_insn_p (insn))
27595 /* Discard alignments we've emit and jump instructions. */
27596 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27597 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27599 if (JUMP_TABLE_DATA_P (insn))
27602 /* Important case - calls are always 5 bytes.
27603 It is common to have many calls in the row. */
27605 && symbolic_reference_mentioned_p (PATTERN (insn))
27606 && !SIBLING_CALL_P (insn))
27608 len = get_attr_length (insn);
27612 /* For normal instructions we rely on get_attr_length being exact,
27613 with a few exceptions. */
27614 if (!JUMP_P (insn))
27616 enum attr_type type = get_attr_type (insn);
27621 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27622 || asm_noperands (PATTERN (insn)) >= 0)
27629 /* Otherwise trust get_attr_length. */
27633 l = get_attr_length_address (insn);
27634 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27643 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27647 ix86_avoid_jump_mispredicts (void)
27649 rtx insn, start = get_insns ();
27650 int nbytes = 0, njumps = 0;
27653 /* Look for all minimal intervals of instructions containing 4 jumps.
27654 The intervals are bounded by START and INSN. NBYTES is the total
27655 size of instructions in the interval including INSN and not including
27656 START. When the NBYTES is smaller than 16 bytes, it is possible
27657 that the end of START and INSN ends up in the same 16byte page.
27659 The smallest offset in the page INSN can start is the case where START
27660 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27661 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27663 for (insn = start; insn; insn = NEXT_INSN (insn))
27667 if (LABEL_P (insn))
27669 int align = label_to_alignment (insn);
27670 int max_skip = label_to_max_skip (insn);
27674 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27675 already in the current 16 byte page, because otherwise
27676 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27677 bytes to reach 16 byte boundary. */
27679 || (align <= 3 && max_skip != (1 << align) - 1))
27682 fprintf (dump_file, "Label %i with max_skip %i\n",
27683 INSN_UID (insn), max_skip);
27686 while (nbytes + max_skip >= 16)
27688 start = NEXT_INSN (start);
27689 if ((JUMP_P (start)
27690 && GET_CODE (PATTERN (start)) != ADDR_VEC
27691 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27693 njumps--, isjump = 1;
27696 nbytes -= min_insn_size (start);
27702 min_size = min_insn_size (insn);
27703 nbytes += min_size;
27705 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27706 INSN_UID (insn), min_size);
27708 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27709 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27717 start = NEXT_INSN (start);
27718 if ((JUMP_P (start)
27719 && GET_CODE (PATTERN (start)) != ADDR_VEC
27720 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27722 njumps--, isjump = 1;
27725 nbytes -= min_insn_size (start);
27727 gcc_assert (njumps >= 0);
27729 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27730 INSN_UID (start), INSN_UID (insn), nbytes);
27732 if (njumps == 3 && isjump && nbytes < 16)
27734 int padsize = 15 - nbytes + min_insn_size (insn);
27737 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27738 INSN_UID (insn), padsize);
27739 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27745 /* AMD Athlon works faster
27746 when RET is not destination of conditional jump or directly preceded
27747 by other jump instruction. We avoid the penalty by inserting NOP just
27748 before the RET instructions in such cases. */
27750 ix86_pad_returns (void)
27755 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27757 basic_block bb = e->src;
27758 rtx ret = BB_END (bb);
27760 bool replace = false;
27762 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27763 || optimize_bb_for_size_p (bb))
27765 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27766 if (active_insn_p (prev) || LABEL_P (prev))
27768 if (prev && LABEL_P (prev))
27773 FOR_EACH_EDGE (e, ei, bb->preds)
27774 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27775 && !(e->flags & EDGE_FALLTHRU))
27780 prev = prev_active_insn (ret);
27782 && ((JUMP_P (prev) && any_condjump_p (prev))
27785 /* Empty functions get branch mispredict even when the jump destination
27786 is not visible to us. */
27787 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27792 emit_jump_insn_before (gen_return_internal_long (), ret);
27798 /* Implement machine specific optimizations. We implement padding of returns
27799 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27803 if (optimize && optimize_function_for_speed_p (cfun))
27805 if (TARGET_PAD_RETURNS)
27806 ix86_pad_returns ();
27807 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27808 if (TARGET_FOUR_JUMP_LIMIT)
27809 ix86_avoid_jump_mispredicts ();
27814 /* Return nonzero when QImode register that must be represented via REX prefix
27817 x86_extended_QIreg_mentioned_p (rtx insn)
27820 extract_insn_cached (insn);
27821 for (i = 0; i < recog_data.n_operands; i++)
27822 if (REG_P (recog_data.operand[i])
27823 && REGNO (recog_data.operand[i]) > BX_REG)
27828 /* Return nonzero when P points to register encoded via REX prefix.
27829 Called via for_each_rtx. */
27831 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27833 unsigned int regno;
27836 regno = REGNO (*p);
27837 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27840 /* Return true when INSN mentions register that must be encoded using REX
27843 x86_extended_reg_mentioned_p (rtx insn)
27845 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27846 extended_reg_mentioned_1, NULL);
27849 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27850 optabs would emit if we didn't have TFmode patterns. */
27853 x86_emit_floatuns (rtx operands[2])
27855 rtx neglab, donelab, i0, i1, f0, in, out;
27856 enum machine_mode mode, inmode;
27858 inmode = GET_MODE (operands[1]);
27859 gcc_assert (inmode == SImode || inmode == DImode);
27862 in = force_reg (inmode, operands[1]);
27863 mode = GET_MODE (out);
27864 neglab = gen_label_rtx ();
27865 donelab = gen_label_rtx ();
27866 f0 = gen_reg_rtx (mode);
27868 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27870 expand_float (out, in, 0);
27872 emit_jump_insn (gen_jump (donelab));
27875 emit_label (neglab);
27877 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27879 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27881 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27883 expand_float (f0, i0, 0);
27885 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27887 emit_label (donelab);
27890 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27891 with all elements equal to VAR. Return true if successful. */
27894 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27895 rtx target, rtx val)
27897 enum machine_mode hmode, smode, wsmode, wvmode;
27912 val = force_reg (GET_MODE_INNER (mode), val);
27913 x = gen_rtx_VEC_DUPLICATE (mode, val);
27914 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27920 if (TARGET_SSE || TARGET_3DNOW_A)
27922 val = gen_lowpart (SImode, val);
27923 x = gen_rtx_TRUNCATE (HImode, val);
27924 x = gen_rtx_VEC_DUPLICATE (mode, x);
27925 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27947 /* Extend HImode to SImode using a paradoxical SUBREG. */
27948 tmp1 = gen_reg_rtx (SImode);
27949 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27950 /* Insert the SImode value as low element of V4SImode vector. */
27951 tmp2 = gen_reg_rtx (V4SImode);
27952 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27953 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27954 CONST0_RTX (V4SImode),
27956 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27957 /* Cast the V4SImode vector back to a V8HImode vector. */
27958 tmp1 = gen_reg_rtx (V8HImode);
27959 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27960 /* Duplicate the low short through the whole low SImode word. */
27961 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27962 /* Cast the V8HImode vector back to a V4SImode vector. */
27963 tmp2 = gen_reg_rtx (V4SImode);
27964 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27965 /* Replicate the low element of the V4SImode vector. */
27966 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27967 /* Cast the V2SImode back to V8HImode, and store in target. */
27968 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27979 /* Extend QImode to SImode using a paradoxical SUBREG. */
27980 tmp1 = gen_reg_rtx (SImode);
27981 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27982 /* Insert the SImode value as low element of V4SImode vector. */
27983 tmp2 = gen_reg_rtx (V4SImode);
27984 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27985 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27986 CONST0_RTX (V4SImode),
27988 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27989 /* Cast the V4SImode vector back to a V16QImode vector. */
27990 tmp1 = gen_reg_rtx (V16QImode);
27991 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27992 /* Duplicate the low byte through the whole low SImode word. */
27993 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27994 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27995 /* Cast the V16QImode vector back to a V4SImode vector. */
27996 tmp2 = gen_reg_rtx (V4SImode);
27997 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27998 /* Replicate the low element of the V4SImode vector. */
27999 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
28000 /* Cast the V2SImode back to V16QImode, and store in target. */
28001 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
28009 /* Replicate the value once into the next wider mode and recurse. */
28010 val = convert_modes (wsmode, smode, val, true);
28011 x = expand_simple_binop (wsmode, ASHIFT, val,
28012 GEN_INT (GET_MODE_BITSIZE (smode)),
28013 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28014 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28016 x = gen_reg_rtx (wvmode);
28017 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
28018 gcc_unreachable ();
28019 emit_move_insn (target, gen_lowpart (mode, x));
28042 rtx tmp = gen_reg_rtx (hmode);
28043 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
28044 emit_insn (gen_rtx_SET (VOIDmode, target,
28045 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
28054 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28055 whose ONE_VAR element is VAR, and other elements are zero. Return true
28059 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28060 rtx target, rtx var, int one_var)
28062 enum machine_mode vsimode;
28065 bool use_vector_set = false;
28070 /* For SSE4.1, we normally use vector set. But if the second
28071 element is zero and inter-unit moves are OK, we use movq
28073 use_vector_set = (TARGET_64BIT
28075 && !(TARGET_INTER_UNIT_MOVES
28081 use_vector_set = TARGET_SSE4_1;
28084 use_vector_set = TARGET_SSE2;
28087 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28094 use_vector_set = TARGET_AVX;
28097 /* Use ix86_expand_vector_set in 64bit mode only. */
28098 use_vector_set = TARGET_AVX && TARGET_64BIT;
28104 if (use_vector_set)
28106 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28107 var = force_reg (GET_MODE_INNER (mode), var);
28108 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28124 var = force_reg (GET_MODE_INNER (mode), var);
28125 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28126 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28131 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28132 new_target = gen_reg_rtx (mode);
28134 new_target = target;
28135 var = force_reg (GET_MODE_INNER (mode), var);
28136 x = gen_rtx_VEC_DUPLICATE (mode, var);
28137 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28138 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28141 /* We need to shuffle the value to the correct position, so
28142 create a new pseudo to store the intermediate result. */
28144 /* With SSE2, we can use the integer shuffle insns. */
28145 if (mode != V4SFmode && TARGET_SSE2)
28147 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28149 GEN_INT (one_var == 1 ? 0 : 1),
28150 GEN_INT (one_var == 2 ? 0 : 1),
28151 GEN_INT (one_var == 3 ? 0 : 1)));
28152 if (target != new_target)
28153 emit_move_insn (target, new_target);
28157 /* Otherwise convert the intermediate result to V4SFmode and
28158 use the SSE1 shuffle instructions. */
28159 if (mode != V4SFmode)
28161 tmp = gen_reg_rtx (V4SFmode);
28162 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28167 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28169 GEN_INT (one_var == 1 ? 0 : 1),
28170 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28171 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28173 if (mode != V4SFmode)
28174 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28175 else if (tmp != target)
28176 emit_move_insn (target, tmp);
28178 else if (target != new_target)
28179 emit_move_insn (target, new_target);
28184 vsimode = V4SImode;
28190 vsimode = V2SImode;
28196 /* Zero extend the variable element to SImode and recurse. */
28197 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28199 x = gen_reg_rtx (vsimode);
28200 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28202 gcc_unreachable ();
28204 emit_move_insn (target, gen_lowpart (mode, x));
28212 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28213 consisting of the values in VALS. It is known that all elements
28214 except ONE_VAR are constants. Return true if successful. */
28217 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28218 rtx target, rtx vals, int one_var)
28220 rtx var = XVECEXP (vals, 0, one_var);
28221 enum machine_mode wmode;
28224 const_vec = copy_rtx (vals);
28225 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28226 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28234 /* For the two element vectors, it's just as easy to use
28235 the general case. */
28239 /* Use ix86_expand_vector_set in 64bit mode only. */
28262 /* There's no way to set one QImode entry easily. Combine
28263 the variable value with its adjacent constant value, and
28264 promote to an HImode set. */
28265 x = XVECEXP (vals, 0, one_var ^ 1);
28268 var = convert_modes (HImode, QImode, var, true);
28269 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28270 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28271 x = GEN_INT (INTVAL (x) & 0xff);
28275 var = convert_modes (HImode, QImode, var, true);
28276 x = gen_int_mode (INTVAL (x) << 8, HImode);
28278 if (x != const0_rtx)
28279 var = expand_simple_binop (HImode, IOR, var, x, var,
28280 1, OPTAB_LIB_WIDEN);
28282 x = gen_reg_rtx (wmode);
28283 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28284 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28286 emit_move_insn (target, gen_lowpart (mode, x));
28293 emit_move_insn (target, const_vec);
28294 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28298 /* A subroutine of ix86_expand_vector_init_general. Use vector
28299 concatenate to handle the most general case: all values variable,
28300 and none identical. */
28303 ix86_expand_vector_init_concat (enum machine_mode mode,
28304 rtx target, rtx *ops, int n)
28306 enum machine_mode cmode, hmode = VOIDmode;
28307 rtx first[8], second[4];
28347 gcc_unreachable ();
28350 if (!register_operand (ops[1], cmode))
28351 ops[1] = force_reg (cmode, ops[1]);
28352 if (!register_operand (ops[0], cmode))
28353 ops[0] = force_reg (cmode, ops[0]);
28354 emit_insn (gen_rtx_SET (VOIDmode, target,
28355 gen_rtx_VEC_CONCAT (mode, ops[0],
28375 gcc_unreachable ();
28391 gcc_unreachable ();
28396 /* FIXME: We process inputs backward to help RA. PR 36222. */
28399 for (; i > 0; i -= 2, j--)
28401 first[j] = gen_reg_rtx (cmode);
28402 v = gen_rtvec (2, ops[i - 1], ops[i]);
28403 ix86_expand_vector_init (false, first[j],
28404 gen_rtx_PARALLEL (cmode, v));
28410 gcc_assert (hmode != VOIDmode);
28411 for (i = j = 0; i < n; i += 2, j++)
28413 second[j] = gen_reg_rtx (hmode);
28414 ix86_expand_vector_init_concat (hmode, second [j],
28418 ix86_expand_vector_init_concat (mode, target, second, n);
28421 ix86_expand_vector_init_concat (mode, target, first, n);
28425 gcc_unreachable ();
28429 /* A subroutine of ix86_expand_vector_init_general. Use vector
28430 interleave to handle the most general case: all values variable,
28431 and none identical. */
28434 ix86_expand_vector_init_interleave (enum machine_mode mode,
28435 rtx target, rtx *ops, int n)
28437 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28440 rtx (*gen_load_even) (rtx, rtx, rtx);
28441 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28442 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28447 gen_load_even = gen_vec_setv8hi;
28448 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28449 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28450 inner_mode = HImode;
28451 first_imode = V4SImode;
28452 second_imode = V2DImode;
28453 third_imode = VOIDmode;
28456 gen_load_even = gen_vec_setv16qi;
28457 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28458 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28459 inner_mode = QImode;
28460 first_imode = V8HImode;
28461 second_imode = V4SImode;
28462 third_imode = V2DImode;
28465 gcc_unreachable ();
28468 for (i = 0; i < n; i++)
28470 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28471 op0 = gen_reg_rtx (SImode);
28472 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28474 /* Insert the SImode value as low element of V4SImode vector. */
28475 op1 = gen_reg_rtx (V4SImode);
28476 op0 = gen_rtx_VEC_MERGE (V4SImode,
28477 gen_rtx_VEC_DUPLICATE (V4SImode,
28479 CONST0_RTX (V4SImode),
28481 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28483 /* Cast the V4SImode vector back to a vector in orignal mode. */
28484 op0 = gen_reg_rtx (mode);
28485 emit_move_insn (op0, gen_lowpart (mode, op1));
28487 /* Load even elements into the second positon. */
28488 emit_insn ((*gen_load_even) (op0,
28489 force_reg (inner_mode,
28493 /* Cast vector to FIRST_IMODE vector. */
28494 ops[i] = gen_reg_rtx (first_imode);
28495 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28498 /* Interleave low FIRST_IMODE vectors. */
28499 for (i = j = 0; i < n; i += 2, j++)
28501 op0 = gen_reg_rtx (first_imode);
28502 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28504 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28505 ops[j] = gen_reg_rtx (second_imode);
28506 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28509 /* Interleave low SECOND_IMODE vectors. */
28510 switch (second_imode)
28513 for (i = j = 0; i < n / 2; i += 2, j++)
28515 op0 = gen_reg_rtx (second_imode);
28516 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28519 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28521 ops[j] = gen_reg_rtx (third_imode);
28522 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28524 second_imode = V2DImode;
28525 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28529 op0 = gen_reg_rtx (second_imode);
28530 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28533 /* Cast the SECOND_IMODE vector back to a vector on original
28535 emit_insn (gen_rtx_SET (VOIDmode, target,
28536 gen_lowpart (mode, op0)));
28540 gcc_unreachable ();
28544 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28545 all values variable, and none identical. */
28548 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28549 rtx target, rtx vals)
28551 rtx ops[32], op0, op1;
28552 enum machine_mode half_mode = VOIDmode;
28559 if (!mmx_ok && !TARGET_SSE)
28571 n = GET_MODE_NUNITS (mode);
28572 for (i = 0; i < n; i++)
28573 ops[i] = XVECEXP (vals, 0, i);
28574 ix86_expand_vector_init_concat (mode, target, ops, n);
28578 half_mode = V16QImode;
28582 half_mode = V8HImode;
28586 n = GET_MODE_NUNITS (mode);
28587 for (i = 0; i < n; i++)
28588 ops[i] = XVECEXP (vals, 0, i);
28589 op0 = gen_reg_rtx (half_mode);
28590 op1 = gen_reg_rtx (half_mode);
28591 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28593 ix86_expand_vector_init_interleave (half_mode, op1,
28594 &ops [n >> 1], n >> 2);
28595 emit_insn (gen_rtx_SET (VOIDmode, target,
28596 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28600 if (!TARGET_SSE4_1)
28608 /* Don't use ix86_expand_vector_init_interleave if we can't
28609 move from GPR to SSE register directly. */
28610 if (!TARGET_INTER_UNIT_MOVES)
28613 n = GET_MODE_NUNITS (mode);
28614 for (i = 0; i < n; i++)
28615 ops[i] = XVECEXP (vals, 0, i);
28616 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28624 gcc_unreachable ();
28628 int i, j, n_elts, n_words, n_elt_per_word;
28629 enum machine_mode inner_mode;
28630 rtx words[4], shift;
28632 inner_mode = GET_MODE_INNER (mode);
28633 n_elts = GET_MODE_NUNITS (mode);
28634 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28635 n_elt_per_word = n_elts / n_words;
28636 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28638 for (i = 0; i < n_words; ++i)
28640 rtx word = NULL_RTX;
28642 for (j = 0; j < n_elt_per_word; ++j)
28644 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28645 elt = convert_modes (word_mode, inner_mode, elt, true);
28651 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28652 word, 1, OPTAB_LIB_WIDEN);
28653 word = expand_simple_binop (word_mode, IOR, word, elt,
28654 word, 1, OPTAB_LIB_WIDEN);
28662 emit_move_insn (target, gen_lowpart (mode, words[0]));
28663 else if (n_words == 2)
28665 rtx tmp = gen_reg_rtx (mode);
28666 emit_clobber (tmp);
28667 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28668 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28669 emit_move_insn (target, tmp);
28671 else if (n_words == 4)
28673 rtx tmp = gen_reg_rtx (V4SImode);
28674 gcc_assert (word_mode == SImode);
28675 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28676 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28677 emit_move_insn (target, gen_lowpart (mode, tmp));
28680 gcc_unreachable ();
28684 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28685 instructions unless MMX_OK is true. */
28688 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28690 enum machine_mode mode = GET_MODE (target);
28691 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28692 int n_elts = GET_MODE_NUNITS (mode);
28693 int n_var = 0, one_var = -1;
28694 bool all_same = true, all_const_zero = true;
28698 for (i = 0; i < n_elts; ++i)
28700 x = XVECEXP (vals, 0, i);
28701 if (!(CONST_INT_P (x)
28702 || GET_CODE (x) == CONST_DOUBLE
28703 || GET_CODE (x) == CONST_FIXED))
28704 n_var++, one_var = i;
28705 else if (x != CONST0_RTX (inner_mode))
28706 all_const_zero = false;
28707 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28711 /* Constants are best loaded from the constant pool. */
28714 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28718 /* If all values are identical, broadcast the value. */
28720 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28721 XVECEXP (vals, 0, 0)))
28724 /* Values where only one field is non-constant are best loaded from
28725 the pool and overwritten via move later. */
28729 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28730 XVECEXP (vals, 0, one_var),
28734 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28738 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28742 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28744 enum machine_mode mode = GET_MODE (target);
28745 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28746 enum machine_mode half_mode;
28747 bool use_vec_merge = false;
28749 static rtx (*gen_extract[6][2]) (rtx, rtx)
28751 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28752 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28753 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28754 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28755 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28756 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28758 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28760 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28761 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28762 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28763 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28764 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28765 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28775 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28776 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28778 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28780 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28781 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28787 use_vec_merge = TARGET_SSE4_1;
28795 /* For the two element vectors, we implement a VEC_CONCAT with
28796 the extraction of the other element. */
28798 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28799 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28802 op0 = val, op1 = tmp;
28804 op0 = tmp, op1 = val;
28806 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28807 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28812 use_vec_merge = TARGET_SSE4_1;
28819 use_vec_merge = true;
28823 /* tmp = target = A B C D */
28824 tmp = copy_to_reg (target);
28825 /* target = A A B B */
28826 emit_insn (gen_sse_unpcklps (target, target, target));
28827 /* target = X A B B */
28828 ix86_expand_vector_set (false, target, val, 0);
28829 /* target = A X C D */
28830 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28831 GEN_INT (1), GEN_INT (0),
28832 GEN_INT (2+4), GEN_INT (3+4)));
28836 /* tmp = target = A B C D */
28837 tmp = copy_to_reg (target);
28838 /* tmp = X B C D */
28839 ix86_expand_vector_set (false, tmp, val, 0);
28840 /* target = A B X D */
28841 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28842 GEN_INT (0), GEN_INT (1),
28843 GEN_INT (0+4), GEN_INT (3+4)));
28847 /* tmp = target = A B C D */
28848 tmp = copy_to_reg (target);
28849 /* tmp = X B C D */
28850 ix86_expand_vector_set (false, tmp, val, 0);
28851 /* target = A B X D */
28852 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28853 GEN_INT (0), GEN_INT (1),
28854 GEN_INT (2+4), GEN_INT (0+4)));
28858 gcc_unreachable ();
28863 use_vec_merge = TARGET_SSE4_1;
28867 /* Element 0 handled by vec_merge below. */
28870 use_vec_merge = true;
28876 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28877 store into element 0, then shuffle them back. */
28881 order[0] = GEN_INT (elt);
28882 order[1] = const1_rtx;
28883 order[2] = const2_rtx;
28884 order[3] = GEN_INT (3);
28885 order[elt] = const0_rtx;
28887 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28888 order[1], order[2], order[3]));
28890 ix86_expand_vector_set (false, target, val, 0);
28892 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28893 order[1], order[2], order[3]));
28897 /* For SSE1, we have to reuse the V4SF code. */
28898 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28899 gen_lowpart (SFmode, val), elt);
28904 use_vec_merge = TARGET_SSE2;
28907 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28911 use_vec_merge = TARGET_SSE4_1;
28918 half_mode = V16QImode;
28924 half_mode = V8HImode;
28930 half_mode = V4SImode;
28936 half_mode = V2DImode;
28942 half_mode = V4SFmode;
28948 half_mode = V2DFmode;
28954 /* Compute offset. */
28958 gcc_assert (i <= 1);
28960 /* Extract the half. */
28961 tmp = gen_reg_rtx (half_mode);
28962 emit_insn ((*gen_extract[j][i]) (tmp, target));
28964 /* Put val in tmp at elt. */
28965 ix86_expand_vector_set (false, tmp, val, elt);
28968 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28977 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28978 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28979 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28983 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28985 emit_move_insn (mem, target);
28987 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28988 emit_move_insn (tmp, val);
28990 emit_move_insn (target, mem);
28995 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28997 enum machine_mode mode = GET_MODE (vec);
28998 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28999 bool use_vec_extr = false;
29012 use_vec_extr = true;
29016 use_vec_extr = TARGET_SSE4_1;
29028 tmp = gen_reg_rtx (mode);
29029 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29030 GEN_INT (elt), GEN_INT (elt),
29031 GEN_INT (elt+4), GEN_INT (elt+4)));
29035 tmp = gen_reg_rtx (mode);
29036 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
29040 gcc_unreachable ();
29043 use_vec_extr = true;
29048 use_vec_extr = TARGET_SSE4_1;
29062 tmp = gen_reg_rtx (mode);
29063 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29064 GEN_INT (elt), GEN_INT (elt),
29065 GEN_INT (elt), GEN_INT (elt)));
29069 tmp = gen_reg_rtx (mode);
29070 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
29074 gcc_unreachable ();
29077 use_vec_extr = true;
29082 /* For SSE1, we have to reuse the V4SF code. */
29083 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29084 gen_lowpart (V4SFmode, vec), elt);
29090 use_vec_extr = TARGET_SSE2;
29093 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29097 use_vec_extr = TARGET_SSE4_1;
29101 /* ??? Could extract the appropriate HImode element and shift. */
29108 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29109 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29111 /* Let the rtl optimizers know about the zero extension performed. */
29112 if (inner_mode == QImode || inner_mode == HImode)
29114 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29115 target = gen_lowpart (SImode, target);
29118 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29122 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29124 emit_move_insn (mem, vec);
29126 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29127 emit_move_insn (target, tmp);
29131 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29132 pattern to reduce; DEST is the destination; IN is the input vector. */
29135 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29137 rtx tmp1, tmp2, tmp3;
29139 tmp1 = gen_reg_rtx (V4SFmode);
29140 tmp2 = gen_reg_rtx (V4SFmode);
29141 tmp3 = gen_reg_rtx (V4SFmode);
29143 emit_insn (gen_sse_movhlps (tmp1, in, in));
29144 emit_insn (fn (tmp2, tmp1, in));
29146 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29147 GEN_INT (1), GEN_INT (1),
29148 GEN_INT (1+4), GEN_INT (1+4)));
29149 emit_insn (fn (dest, tmp2, tmp3));
29152 /* Target hook for scalar_mode_supported_p. */
29154 ix86_scalar_mode_supported_p (enum machine_mode mode)
29156 if (DECIMAL_FLOAT_MODE_P (mode))
29158 else if (mode == TFmode)
29161 return default_scalar_mode_supported_p (mode);
29164 /* Implements target hook vector_mode_supported_p. */
29166 ix86_vector_mode_supported_p (enum machine_mode mode)
29168 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29170 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29172 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29174 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29176 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29181 /* Target hook for c_mode_for_suffix. */
29182 static enum machine_mode
29183 ix86_c_mode_for_suffix (char suffix)
29193 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29195 We do this in the new i386 backend to maintain source compatibility
29196 with the old cc0-based compiler. */
29199 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29200 tree inputs ATTRIBUTE_UNUSED,
29203 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29205 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29210 /* Implements target vector targetm.asm.encode_section_info. This
29211 is not used by netware. */
29213 static void ATTRIBUTE_UNUSED
29214 ix86_encode_section_info (tree decl, rtx rtl, int first)
29216 default_encode_section_info (decl, rtl, first);
29218 if (TREE_CODE (decl) == VAR_DECL
29219 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29220 && ix86_in_large_data_p (decl))
29221 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29224 /* Worker function for REVERSE_CONDITION. */
29227 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29229 return (mode != CCFPmode && mode != CCFPUmode
29230 ? reverse_condition (code)
29231 : reverse_condition_maybe_unordered (code));
29234 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29238 output_387_reg_move (rtx insn, rtx *operands)
29240 if (REG_P (operands[0]))
29242 if (REG_P (operands[1])
29243 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29245 if (REGNO (operands[0]) == FIRST_STACK_REG)
29246 return output_387_ffreep (operands, 0);
29247 return "fstp\t%y0";
29249 if (STACK_TOP_P (operands[0]))
29250 return "fld%Z1\t%y1";
29253 else if (MEM_P (operands[0]))
29255 gcc_assert (REG_P (operands[1]));
29256 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29257 return "fstp%Z0\t%y0";
29260 /* There is no non-popping store to memory for XFmode.
29261 So if we need one, follow the store with a load. */
29262 if (GET_MODE (operands[0]) == XFmode)
29263 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29265 return "fst%Z0\t%y0";
29272 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29273 FP status register is set. */
29276 ix86_emit_fp_unordered_jump (rtx label)
29278 rtx reg = gen_reg_rtx (HImode);
29281 emit_insn (gen_x86_fnstsw_1 (reg));
29283 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29285 emit_insn (gen_x86_sahf_1 (reg));
29287 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29288 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29292 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29294 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29295 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29298 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29299 gen_rtx_LABEL_REF (VOIDmode, label),
29301 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29303 emit_jump_insn (temp);
29304 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29307 /* Output code to perform a log1p XFmode calculation. */
29309 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29311 rtx label1 = gen_label_rtx ();
29312 rtx label2 = gen_label_rtx ();
29314 rtx tmp = gen_reg_rtx (XFmode);
29315 rtx tmp2 = gen_reg_rtx (XFmode);
29318 emit_insn (gen_absxf2 (tmp, op1));
29319 test = gen_rtx_GE (VOIDmode, tmp,
29320 CONST_DOUBLE_FROM_REAL_VALUE (
29321 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29323 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29325 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29326 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29327 emit_jump (label2);
29329 emit_label (label1);
29330 emit_move_insn (tmp, CONST1_RTX (XFmode));
29331 emit_insn (gen_addxf3 (tmp, op1, tmp));
29332 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29333 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29335 emit_label (label2);
29338 /* Output code to perform a Newton-Rhapson approximation of a single precision
29339 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29341 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29343 rtx x0, x1, e0, e1, two;
29345 x0 = gen_reg_rtx (mode);
29346 e0 = gen_reg_rtx (mode);
29347 e1 = gen_reg_rtx (mode);
29348 x1 = gen_reg_rtx (mode);
29350 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29352 if (VECTOR_MODE_P (mode))
29353 two = ix86_build_const_vector (SFmode, true, two);
29355 two = force_reg (mode, two);
29357 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29359 /* x0 = rcp(b) estimate */
29360 emit_insn (gen_rtx_SET (VOIDmode, x0,
29361 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29364 emit_insn (gen_rtx_SET (VOIDmode, e0,
29365 gen_rtx_MULT (mode, x0, b)));
29367 emit_insn (gen_rtx_SET (VOIDmode, e1,
29368 gen_rtx_MINUS (mode, two, e0)));
29370 emit_insn (gen_rtx_SET (VOIDmode, x1,
29371 gen_rtx_MULT (mode, x0, e1)));
29373 emit_insn (gen_rtx_SET (VOIDmode, res,
29374 gen_rtx_MULT (mode, a, x1)));
29377 /* Output code to perform a Newton-Rhapson approximation of a
29378 single precision floating point [reciprocal] square root. */
29380 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29383 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29386 x0 = gen_reg_rtx (mode);
29387 e0 = gen_reg_rtx (mode);
29388 e1 = gen_reg_rtx (mode);
29389 e2 = gen_reg_rtx (mode);
29390 e3 = gen_reg_rtx (mode);
29392 real_from_integer (&r, VOIDmode, -3, -1, 0);
29393 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29395 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29396 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29398 if (VECTOR_MODE_P (mode))
29400 mthree = ix86_build_const_vector (SFmode, true, mthree);
29401 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29404 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29405 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29407 /* x0 = rsqrt(a) estimate */
29408 emit_insn (gen_rtx_SET (VOIDmode, x0,
29409 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29412 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29417 zero = gen_reg_rtx (mode);
29418 mask = gen_reg_rtx (mode);
29420 zero = force_reg (mode, CONST0_RTX(mode));
29421 emit_insn (gen_rtx_SET (VOIDmode, mask,
29422 gen_rtx_NE (mode, zero, a)));
29424 emit_insn (gen_rtx_SET (VOIDmode, x0,
29425 gen_rtx_AND (mode, x0, mask)));
29429 emit_insn (gen_rtx_SET (VOIDmode, e0,
29430 gen_rtx_MULT (mode, x0, a)));
29432 emit_insn (gen_rtx_SET (VOIDmode, e1,
29433 gen_rtx_MULT (mode, e0, x0)));
29436 mthree = force_reg (mode, mthree);
29437 emit_insn (gen_rtx_SET (VOIDmode, e2,
29438 gen_rtx_PLUS (mode, e1, mthree)));
29440 mhalf = force_reg (mode, mhalf);
29442 /* e3 = -.5 * x0 */
29443 emit_insn (gen_rtx_SET (VOIDmode, e3,
29444 gen_rtx_MULT (mode, x0, mhalf)));
29446 /* e3 = -.5 * e0 */
29447 emit_insn (gen_rtx_SET (VOIDmode, e3,
29448 gen_rtx_MULT (mode, e0, mhalf)));
29449 /* ret = e2 * e3 */
29450 emit_insn (gen_rtx_SET (VOIDmode, res,
29451 gen_rtx_MULT (mode, e2, e3)));
29454 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29456 static void ATTRIBUTE_UNUSED
29457 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29460 /* With Binutils 2.15, the "@unwind" marker must be specified on
29461 every occurrence of the ".eh_frame" section, not just the first
29464 && strcmp (name, ".eh_frame") == 0)
29466 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29467 flags & SECTION_WRITE ? "aw" : "a");
29470 default_elf_asm_named_section (name, flags, decl);
29473 /* Return the mangling of TYPE if it is an extended fundamental type. */
29475 static const char *
29476 ix86_mangle_type (const_tree type)
29478 type = TYPE_MAIN_VARIANT (type);
29480 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29481 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29484 switch (TYPE_MODE (type))
29487 /* __float128 is "g". */
29490 /* "long double" or __float80 is "e". */
29497 /* For 32-bit code we can save PIC register setup by using
29498 __stack_chk_fail_local hidden function instead of calling
29499 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29500 register, so it is better to call __stack_chk_fail directly. */
29503 ix86_stack_protect_fail (void)
29505 return TARGET_64BIT
29506 ? default_external_stack_protect_fail ()
29507 : default_hidden_stack_protect_fail ();
29510 /* Select a format to encode pointers in exception handling data. CODE
29511 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29512 true if the symbol may be affected by dynamic relocations.
29514 ??? All x86 object file formats are capable of representing this.
29515 After all, the relocation needed is the same as for the call insn.
29516 Whether or not a particular assembler allows us to enter such, I
29517 guess we'll have to see. */
29519 asm_preferred_eh_data_format (int code, int global)
29523 int type = DW_EH_PE_sdata8;
29525 || ix86_cmodel == CM_SMALL_PIC
29526 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29527 type = DW_EH_PE_sdata4;
29528 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29530 if (ix86_cmodel == CM_SMALL
29531 || (ix86_cmodel == CM_MEDIUM && code))
29532 return DW_EH_PE_udata4;
29533 return DW_EH_PE_absptr;
29536 /* Expand copysign from SIGN to the positive value ABS_VALUE
29537 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29540 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29542 enum machine_mode mode = GET_MODE (sign);
29543 rtx sgn = gen_reg_rtx (mode);
29544 if (mask == NULL_RTX)
29546 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29547 if (!VECTOR_MODE_P (mode))
29549 /* We need to generate a scalar mode mask in this case. */
29550 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29551 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29552 mask = gen_reg_rtx (mode);
29553 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29557 mask = gen_rtx_NOT (mode, mask);
29558 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29559 gen_rtx_AND (mode, mask, sign)));
29560 emit_insn (gen_rtx_SET (VOIDmode, result,
29561 gen_rtx_IOR (mode, abs_value, sgn)));
29564 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29565 mask for masking out the sign-bit is stored in *SMASK, if that is
29568 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29570 enum machine_mode mode = GET_MODE (op0);
29573 xa = gen_reg_rtx (mode);
29574 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29575 if (!VECTOR_MODE_P (mode))
29577 /* We need to generate a scalar mode mask in this case. */
29578 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29579 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29580 mask = gen_reg_rtx (mode);
29581 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29583 emit_insn (gen_rtx_SET (VOIDmode, xa,
29584 gen_rtx_AND (mode, op0, mask)));
29592 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29593 swapping the operands if SWAP_OPERANDS is true. The expanded
29594 code is a forward jump to a newly created label in case the
29595 comparison is true. The generated label rtx is returned. */
29597 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29598 bool swap_operands)
29609 label = gen_label_rtx ();
29610 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29611 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29612 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29613 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29614 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29615 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29616 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29617 JUMP_LABEL (tmp) = label;
29622 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29623 using comparison code CODE. Operands are swapped for the comparison if
29624 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29626 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29627 bool swap_operands)
29629 enum machine_mode mode = GET_MODE (op0);
29630 rtx mask = gen_reg_rtx (mode);
29639 if (mode == DFmode)
29640 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29641 gen_rtx_fmt_ee (code, mode, op0, op1)));
29643 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29644 gen_rtx_fmt_ee (code, mode, op0, op1)));
29649 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29650 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29652 ix86_gen_TWO52 (enum machine_mode mode)
29654 REAL_VALUE_TYPE TWO52r;
29657 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29658 TWO52 = const_double_from_real_value (TWO52r, mode);
29659 TWO52 = force_reg (mode, TWO52);
29664 /* Expand SSE sequence for computing lround from OP1 storing
29667 ix86_expand_lround (rtx op0, rtx op1)
29669 /* C code for the stuff we're doing below:
29670 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29673 enum machine_mode mode = GET_MODE (op1);
29674 const struct real_format *fmt;
29675 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29678 /* load nextafter (0.5, 0.0) */
29679 fmt = REAL_MODE_FORMAT (mode);
29680 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29681 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29683 /* adj = copysign (0.5, op1) */
29684 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29685 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29687 /* adj = op1 + adj */
29688 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29690 /* op0 = (imode)adj */
29691 expand_fix (op0, adj, 0);
29694 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29697 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29699 /* C code for the stuff we're doing below (for do_floor):
29701 xi -= (double)xi > op1 ? 1 : 0;
29704 enum machine_mode fmode = GET_MODE (op1);
29705 enum machine_mode imode = GET_MODE (op0);
29706 rtx ireg, freg, label, tmp;
29708 /* reg = (long)op1 */
29709 ireg = gen_reg_rtx (imode);
29710 expand_fix (ireg, op1, 0);
29712 /* freg = (double)reg */
29713 freg = gen_reg_rtx (fmode);
29714 expand_float (freg, ireg, 0);
29716 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29717 label = ix86_expand_sse_compare_and_jump (UNLE,
29718 freg, op1, !do_floor);
29719 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29720 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29721 emit_move_insn (ireg, tmp);
29723 emit_label (label);
29724 LABEL_NUSES (label) = 1;
29726 emit_move_insn (op0, ireg);
29729 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29730 result in OPERAND0. */
29732 ix86_expand_rint (rtx operand0, rtx operand1)
29734 /* C code for the stuff we're doing below:
29735 xa = fabs (operand1);
29736 if (!isless (xa, 2**52))
29738 xa = xa + 2**52 - 2**52;
29739 return copysign (xa, operand1);
29741 enum machine_mode mode = GET_MODE (operand0);
29742 rtx res, xa, label, TWO52, mask;
29744 res = gen_reg_rtx (mode);
29745 emit_move_insn (res, operand1);
29747 /* xa = abs (operand1) */
29748 xa = ix86_expand_sse_fabs (res, &mask);
29750 /* if (!isless (xa, TWO52)) goto label; */
29751 TWO52 = ix86_gen_TWO52 (mode);
29752 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29754 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29755 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29757 ix86_sse_copysign_to_positive (res, xa, res, mask);
29759 emit_label (label);
29760 LABEL_NUSES (label) = 1;
29762 emit_move_insn (operand0, res);
29765 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29768 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29770 /* C code for the stuff we expand below.
29771 double xa = fabs (x), x2;
29772 if (!isless (xa, TWO52))
29774 xa = xa + TWO52 - TWO52;
29775 x2 = copysign (xa, x);
29784 enum machine_mode mode = GET_MODE (operand0);
29785 rtx xa, TWO52, tmp, label, one, res, mask;
29787 TWO52 = ix86_gen_TWO52 (mode);
29789 /* Temporary for holding the result, initialized to the input
29790 operand to ease control flow. */
29791 res = gen_reg_rtx (mode);
29792 emit_move_insn (res, operand1);
29794 /* xa = abs (operand1) */
29795 xa = ix86_expand_sse_fabs (res, &mask);
29797 /* if (!isless (xa, TWO52)) goto label; */
29798 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29800 /* xa = xa + TWO52 - TWO52; */
29801 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29802 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29804 /* xa = copysign (xa, operand1) */
29805 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29807 /* generate 1.0 or -1.0 */
29808 one = force_reg (mode,
29809 const_double_from_real_value (do_floor
29810 ? dconst1 : dconstm1, mode));
29812 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29813 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29814 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29815 gen_rtx_AND (mode, one, tmp)));
29816 /* We always need to subtract here to preserve signed zero. */
29817 tmp = expand_simple_binop (mode, MINUS,
29818 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29819 emit_move_insn (res, tmp);
29821 emit_label (label);
29822 LABEL_NUSES (label) = 1;
29824 emit_move_insn (operand0, res);
29827 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29830 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29832 /* C code for the stuff we expand below.
29833 double xa = fabs (x), x2;
29834 if (!isless (xa, TWO52))
29836 x2 = (double)(long)x;
29843 if (HONOR_SIGNED_ZEROS (mode))
29844 return copysign (x2, x);
29847 enum machine_mode mode = GET_MODE (operand0);
29848 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29850 TWO52 = ix86_gen_TWO52 (mode);
29852 /* Temporary for holding the result, initialized to the input
29853 operand to ease control flow. */
29854 res = gen_reg_rtx (mode);
29855 emit_move_insn (res, operand1);
29857 /* xa = abs (operand1) */
29858 xa = ix86_expand_sse_fabs (res, &mask);
29860 /* if (!isless (xa, TWO52)) goto label; */
29861 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29863 /* xa = (double)(long)x */
29864 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29865 expand_fix (xi, res, 0);
29866 expand_float (xa, xi, 0);
29869 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29871 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29872 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29873 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29874 gen_rtx_AND (mode, one, tmp)));
29875 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29876 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29877 emit_move_insn (res, tmp);
29879 if (HONOR_SIGNED_ZEROS (mode))
29880 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29882 emit_label (label);
29883 LABEL_NUSES (label) = 1;
29885 emit_move_insn (operand0, res);
29888 /* Expand SSE sequence for computing round from OPERAND1 storing
29889 into OPERAND0. Sequence that works without relying on DImode truncation
29890 via cvttsd2siq that is only available on 64bit targets. */
29892 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29894 /* C code for the stuff we expand below.
29895 double xa = fabs (x), xa2, x2;
29896 if (!isless (xa, TWO52))
29898 Using the absolute value and copying back sign makes
29899 -0.0 -> -0.0 correct.
29900 xa2 = xa + TWO52 - TWO52;
29905 else if (dxa > 0.5)
29907 x2 = copysign (xa2, x);
29910 enum machine_mode mode = GET_MODE (operand0);
29911 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29913 TWO52 = ix86_gen_TWO52 (mode);
29915 /* Temporary for holding the result, initialized to the input
29916 operand to ease control flow. */
29917 res = gen_reg_rtx (mode);
29918 emit_move_insn (res, operand1);
29920 /* xa = abs (operand1) */
29921 xa = ix86_expand_sse_fabs (res, &mask);
29923 /* if (!isless (xa, TWO52)) goto label; */
29924 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29926 /* xa2 = xa + TWO52 - TWO52; */
29927 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29928 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29930 /* dxa = xa2 - xa; */
29931 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29933 /* generate 0.5, 1.0 and -0.5 */
29934 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29935 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29936 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29940 tmp = gen_reg_rtx (mode);
29941 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29942 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29943 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29944 gen_rtx_AND (mode, one, tmp)));
29945 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29946 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29947 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29948 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29949 gen_rtx_AND (mode, one, tmp)));
29950 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29952 /* res = copysign (xa2, operand1) */
29953 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29955 emit_label (label);
29956 LABEL_NUSES (label) = 1;
29958 emit_move_insn (operand0, res);
29961 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29964 ix86_expand_trunc (rtx operand0, rtx operand1)
29966 /* C code for SSE variant we expand below.
29967 double xa = fabs (x), x2;
29968 if (!isless (xa, TWO52))
29970 x2 = (double)(long)x;
29971 if (HONOR_SIGNED_ZEROS (mode))
29972 return copysign (x2, x);
29975 enum machine_mode mode = GET_MODE (operand0);
29976 rtx xa, xi, TWO52, label, res, mask;
29978 TWO52 = ix86_gen_TWO52 (mode);
29980 /* Temporary for holding the result, initialized to the input
29981 operand to ease control flow. */
29982 res = gen_reg_rtx (mode);
29983 emit_move_insn (res, operand1);
29985 /* xa = abs (operand1) */
29986 xa = ix86_expand_sse_fabs (res, &mask);
29988 /* if (!isless (xa, TWO52)) goto label; */
29989 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29991 /* x = (double)(long)x */
29992 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29993 expand_fix (xi, res, 0);
29994 expand_float (res, xi, 0);
29996 if (HONOR_SIGNED_ZEROS (mode))
29997 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29999 emit_label (label);
30000 LABEL_NUSES (label) = 1;
30002 emit_move_insn (operand0, res);
30005 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30008 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30010 enum machine_mode mode = GET_MODE (operand0);
30011 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30013 /* C code for SSE variant we expand below.
30014 double xa = fabs (x), x2;
30015 if (!isless (xa, TWO52))
30017 xa2 = xa + TWO52 - TWO52;
30021 x2 = copysign (xa2, x);
30025 TWO52 = ix86_gen_TWO52 (mode);
30027 /* Temporary for holding the result, initialized to the input
30028 operand to ease control flow. */
30029 res = gen_reg_rtx (mode);
30030 emit_move_insn (res, operand1);
30032 /* xa = abs (operand1) */
30033 xa = ix86_expand_sse_fabs (res, &smask);
30035 /* if (!isless (xa, TWO52)) goto label; */
30036 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30038 /* res = xa + TWO52 - TWO52; */
30039 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30040 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30041 emit_move_insn (res, tmp);
30044 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30046 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30047 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30048 emit_insn (gen_rtx_SET (VOIDmode, mask,
30049 gen_rtx_AND (mode, mask, one)));
30050 tmp = expand_simple_binop (mode, MINUS,
30051 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30052 emit_move_insn (res, tmp);
30054 /* res = copysign (res, operand1) */
30055 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30057 emit_label (label);
30058 LABEL_NUSES (label) = 1;
30060 emit_move_insn (operand0, res);
30063 /* Expand SSE sequence for computing round from OPERAND1 storing
30066 ix86_expand_round (rtx operand0, rtx operand1)
30068 /* C code for the stuff we're doing below:
30069 double xa = fabs (x);
30070 if (!isless (xa, TWO52))
30072 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30073 return copysign (xa, x);
30075 enum machine_mode mode = GET_MODE (operand0);
30076 rtx res, TWO52, xa, label, xi, half, mask;
30077 const struct real_format *fmt;
30078 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30080 /* Temporary for holding the result, initialized to the input
30081 operand to ease control flow. */
30082 res = gen_reg_rtx (mode);
30083 emit_move_insn (res, operand1);
30085 TWO52 = ix86_gen_TWO52 (mode);
30086 xa = ix86_expand_sse_fabs (res, &mask);
30087 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30089 /* load nextafter (0.5, 0.0) */
30090 fmt = REAL_MODE_FORMAT (mode);
30091 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30092 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30094 /* xa = xa + 0.5 */
30095 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30096 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30098 /* xa = (double)(int64_t)xa */
30099 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30100 expand_fix (xi, xa, 0);
30101 expand_float (xa, xi, 0);
30103 /* res = copysign (xa, operand1) */
30104 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30106 emit_label (label);
30107 LABEL_NUSES (label) = 1;
30109 emit_move_insn (operand0, res);
30113 /* Validate whether a SSE5 instruction is valid or not.
30114 OPERANDS is the array of operands.
30115 NUM is the number of operands.
30116 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
30117 NUM_MEMORY is the maximum number of memory operands to accept.
30118 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
30121 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
30122 bool uses_oc0, int num_memory, bool commutative)
30128 /* Count the number of memory arguments */
30131 for (i = 0; i < num; i++)
30133 enum machine_mode mode = GET_MODE (operands[i]);
30134 if (register_operand (operands[i], mode))
30137 else if (memory_operand (operands[i], mode))
30139 mem_mask |= (1 << i);
30145 rtx pattern = PATTERN (insn);
30147 /* allow 0 for pcmov */
30148 if (GET_CODE (pattern) != SET
30149 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
30151 || operands[i] != CONST0_RTX (mode))
30156 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
30157 a memory operation. */
30158 if (num_memory < 0)
30160 num_memory = -num_memory;
30161 if ((mem_mask & (1 << (num-1))) != 0)
30163 mem_mask &= ~(1 << (num-1));
30168 /* If there were no memory operations, allow the insn */
30172 /* Do not allow the destination register to be a memory operand. */
30173 else if (mem_mask & (1 << 0))
30176 /* If there are too many memory operations, disallow the instruction. While
30177 the hardware only allows 1 memory reference, before register allocation
30178 for some insns, we allow two memory operations sometimes in order to allow
30179 code like the following to be optimized:
30181 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
30183 or similar cases that are vectorized into using the fmaddss
30185 else if (mem_count > num_memory)
30188 /* Don't allow more than one memory operation if not optimizing. */
30189 else if (mem_count > 1 && !optimize)
30192 else if (num == 4 && mem_count == 1)
30194 /* formats (destination is the first argument), example fmaddss:
30195 xmm1, xmm1, xmm2, xmm3/mem
30196 xmm1, xmm1, xmm2/mem, xmm3
30197 xmm1, xmm2, xmm3/mem, xmm1
30198 xmm1, xmm2/mem, xmm3, xmm1 */
30200 return ((mem_mask == (1 << 1))
30201 || (mem_mask == (1 << 2))
30202 || (mem_mask == (1 << 3)));
30204 /* format, example pmacsdd:
30205 xmm1, xmm2, xmm3/mem, xmm1 */
30207 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
30209 return (mem_mask == (1 << 2));
30212 else if (num == 4 && num_memory == 2)
30214 /* If there are two memory operations, we can load one of the memory ops
30215 into the destination register. This is for optimizing the
30216 multiply/add ops, which the combiner has optimized both the multiply
30217 and the add insns to have a memory operation. We have to be careful
30218 that the destination doesn't overlap with the inputs. */
30219 rtx op0 = operands[0];
30221 if (reg_mentioned_p (op0, operands[1])
30222 || reg_mentioned_p (op0, operands[2])
30223 || reg_mentioned_p (op0, operands[3]))
30226 /* formats (destination is the first argument), example fmaddss:
30227 xmm1, xmm1, xmm2, xmm3/mem
30228 xmm1, xmm1, xmm2/mem, xmm3
30229 xmm1, xmm2, xmm3/mem, xmm1
30230 xmm1, xmm2/mem, xmm3, xmm1
30232 For the oc0 case, we will load either operands[1] or operands[3] into
30233 operands[0], so any combination of 2 memory operands is ok. */
30237 /* format, example pmacsdd:
30238 xmm1, xmm2, xmm3/mem, xmm1
30240 For the integer multiply/add instructions be more restrictive and
30241 require operands[2] and operands[3] to be the memory operands. */
30243 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
30245 return (mem_mask == ((1 << 2) | (1 << 3)));
30248 else if (num == 3 && num_memory == 1)
30250 /* formats, example protb:
30251 xmm1, xmm2, xmm3/mem
30252 xmm1, xmm2/mem, xmm3 */
30254 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
30256 /* format, example comeq:
30257 xmm1, xmm2, xmm3/mem */
30259 return (mem_mask == (1 << 2));
30263 gcc_unreachable ();
30269 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
30270 hardware will allow by using the destination register to load one of the
30271 memory operations. Presently this is used by the multiply/add routines to
30272 allow 2 memory references. */
30275 ix86_expand_sse5_multiple_memory (rtx operands[],
30277 enum machine_mode mode)
30279 rtx op0 = operands[0];
30281 || memory_operand (op0, mode)
30282 || reg_mentioned_p (op0, operands[1])
30283 || reg_mentioned_p (op0, operands[2])
30284 || reg_mentioned_p (op0, operands[3]))
30285 gcc_unreachable ();
30287 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
30288 the destination register. */
30289 if (memory_operand (operands[1], mode))
30291 emit_move_insn (op0, operands[1]);
30294 else if (memory_operand (operands[3], mode))
30296 emit_move_insn (op0, operands[3]);
30300 gcc_unreachable ();
30306 /* Table of valid machine attributes. */
30307 static const struct attribute_spec ix86_attribute_table[] =
30309 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30310 /* Stdcall attribute says callee is responsible for popping arguments
30311 if they are not variable. */
30312 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30313 /* Fastcall attribute says callee is responsible for popping arguments
30314 if they are not variable. */
30315 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30316 /* Cdecl attribute says the callee is a normal C declaration */
30317 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30318 /* Regparm attribute specifies how many integer arguments are to be
30319 passed in registers. */
30320 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30321 /* Sseregparm attribute says we are using x86_64 calling conventions
30322 for FP arguments. */
30323 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30324 /* force_align_arg_pointer says this function realigns the stack at entry. */
30325 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30326 false, true, true, ix86_handle_cconv_attribute },
30327 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30330 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30332 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30333 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30334 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30335 SUBTARGET_ATTRIBUTE_TABLE,
30337 /* ms_abi and sysv_abi calling convention function attributes. */
30338 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30339 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30341 { NULL, 0, 0, false, false, false, NULL }
30344 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30346 x86_builtin_vectorization_cost (bool runtime_test)
30348 /* If the branch of the runtime test is taken - i.e. - the vectorized
30349 version is skipped - this incurs a misprediction cost (because the
30350 vectorized version is expected to be the fall-through). So we subtract
30351 the latency of a mispredicted branch from the costs that are incured
30352 when the vectorized version is executed.
30354 TODO: The values in individual target tables have to be tuned or new
30355 fields may be needed. For eg. on K8, the default branch path is the
30356 not-taken path. If the taken path is predicted correctly, the minimum
30357 penalty of going down the taken-path is 1 cycle. If the taken-path is
30358 not predicted correctly, then the minimum penalty is 10 cycles. */
30362 return (-(ix86_cost->cond_taken_branch_cost));
30368 /* This function returns the calling abi specific va_list type node.
30369 It returns the FNDECL specific va_list type. */
30372 ix86_fn_abi_va_list (tree fndecl)
30375 return va_list_type_node;
30376 gcc_assert (fndecl != NULL_TREE);
30378 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30379 return ms_va_list_type_node;
30381 return sysv_va_list_type_node;
30384 /* Returns the canonical va_list type specified by TYPE. If there
30385 is no valid TYPE provided, it return NULL_TREE. */
30388 ix86_canonical_va_list_type (tree type)
30392 /* Resolve references and pointers to va_list type. */
30393 if (INDIRECT_REF_P (type))
30394 type = TREE_TYPE (type);
30395 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30396 type = TREE_TYPE (type);
30400 wtype = va_list_type_node;
30401 gcc_assert (wtype != NULL_TREE);
30403 if (TREE_CODE (wtype) == ARRAY_TYPE)
30405 /* If va_list is an array type, the argument may have decayed
30406 to a pointer type, e.g. by being passed to another function.
30407 In that case, unwrap both types so that we can compare the
30408 underlying records. */
30409 if (TREE_CODE (htype) == ARRAY_TYPE
30410 || POINTER_TYPE_P (htype))
30412 wtype = TREE_TYPE (wtype);
30413 htype = TREE_TYPE (htype);
30416 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30417 return va_list_type_node;
30418 wtype = sysv_va_list_type_node;
30419 gcc_assert (wtype != NULL_TREE);
30421 if (TREE_CODE (wtype) == ARRAY_TYPE)
30423 /* If va_list is an array type, the argument may have decayed
30424 to a pointer type, e.g. by being passed to another function.
30425 In that case, unwrap both types so that we can compare the
30426 underlying records. */
30427 if (TREE_CODE (htype) == ARRAY_TYPE
30428 || POINTER_TYPE_P (htype))
30430 wtype = TREE_TYPE (wtype);
30431 htype = TREE_TYPE (htype);
30434 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30435 return sysv_va_list_type_node;
30436 wtype = ms_va_list_type_node;
30437 gcc_assert (wtype != NULL_TREE);
30439 if (TREE_CODE (wtype) == ARRAY_TYPE)
30441 /* If va_list is an array type, the argument may have decayed
30442 to a pointer type, e.g. by being passed to another function.
30443 In that case, unwrap both types so that we can compare the
30444 underlying records. */
30445 if (TREE_CODE (htype) == ARRAY_TYPE
30446 || POINTER_TYPE_P (htype))
30448 wtype = TREE_TYPE (wtype);
30449 htype = TREE_TYPE (htype);
30452 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30453 return ms_va_list_type_node;
30456 return std_canonical_va_list_type (type);
30459 /* Iterate through the target-specific builtin types for va_list.
30460 IDX denotes the iterator, *PTREE is set to the result type of
30461 the va_list builtin, and *PNAME to its internal type.
30462 Returns zero if there is no element for this index, otherwise
30463 IDX should be increased upon the next call.
30464 Note, do not iterate a base builtin's name like __builtin_va_list.
30465 Used from c_common_nodes_and_builtins. */
30468 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30474 *ptree = ms_va_list_type_node;
30475 *pname = "__builtin_ms_va_list";
30478 *ptree = sysv_va_list_type_node;
30479 *pname = "__builtin_sysv_va_list";
30487 /* Initialize the GCC target structure. */
30488 #undef TARGET_RETURN_IN_MEMORY
30489 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30491 #undef TARGET_LEGITIMIZE_ADDRESS
30492 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30494 #undef TARGET_ATTRIBUTE_TABLE
30495 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30496 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30497 # undef TARGET_MERGE_DECL_ATTRIBUTES
30498 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30501 #undef TARGET_COMP_TYPE_ATTRIBUTES
30502 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30504 #undef TARGET_INIT_BUILTINS
30505 #define TARGET_INIT_BUILTINS ix86_init_builtins
30506 #undef TARGET_EXPAND_BUILTIN
30507 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30509 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30510 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30511 ix86_builtin_vectorized_function
30513 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30514 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30516 #undef TARGET_BUILTIN_RECIPROCAL
30517 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30519 #undef TARGET_ASM_FUNCTION_EPILOGUE
30520 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30522 #undef TARGET_ENCODE_SECTION_INFO
30523 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30524 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30526 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30529 #undef TARGET_ASM_OPEN_PAREN
30530 #define TARGET_ASM_OPEN_PAREN ""
30531 #undef TARGET_ASM_CLOSE_PAREN
30532 #define TARGET_ASM_CLOSE_PAREN ""
30534 #undef TARGET_ASM_ALIGNED_HI_OP
30535 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30536 #undef TARGET_ASM_ALIGNED_SI_OP
30537 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30539 #undef TARGET_ASM_ALIGNED_DI_OP
30540 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30543 #undef TARGET_ASM_UNALIGNED_HI_OP
30544 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30545 #undef TARGET_ASM_UNALIGNED_SI_OP
30546 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30547 #undef TARGET_ASM_UNALIGNED_DI_OP
30548 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30550 #undef TARGET_SCHED_ADJUST_COST
30551 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30552 #undef TARGET_SCHED_ISSUE_RATE
30553 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30554 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30555 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30556 ia32_multipass_dfa_lookahead
30558 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30559 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30562 #undef TARGET_HAVE_TLS
30563 #define TARGET_HAVE_TLS true
30565 #undef TARGET_CANNOT_FORCE_CONST_MEM
30566 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30567 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30568 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30570 #undef TARGET_DELEGITIMIZE_ADDRESS
30571 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30573 #undef TARGET_MS_BITFIELD_LAYOUT_P
30574 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30577 #undef TARGET_BINDS_LOCAL_P
30578 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30580 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30581 #undef TARGET_BINDS_LOCAL_P
30582 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30585 #undef TARGET_ASM_OUTPUT_MI_THUNK
30586 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30587 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30588 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30590 #undef TARGET_ASM_FILE_START
30591 #define TARGET_ASM_FILE_START x86_file_start
30593 #undef TARGET_DEFAULT_TARGET_FLAGS
30594 #define TARGET_DEFAULT_TARGET_FLAGS \
30596 | TARGET_SUBTARGET_DEFAULT \
30597 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30599 #undef TARGET_HANDLE_OPTION
30600 #define TARGET_HANDLE_OPTION ix86_handle_option
30602 #undef TARGET_RTX_COSTS
30603 #define TARGET_RTX_COSTS ix86_rtx_costs
30604 #undef TARGET_ADDRESS_COST
30605 #define TARGET_ADDRESS_COST ix86_address_cost
30607 #undef TARGET_FIXED_CONDITION_CODE_REGS
30608 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30609 #undef TARGET_CC_MODES_COMPATIBLE
30610 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30612 #undef TARGET_MACHINE_DEPENDENT_REORG
30613 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30615 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30616 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30618 #undef TARGET_BUILD_BUILTIN_VA_LIST
30619 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30621 #undef TARGET_FN_ABI_VA_LIST
30622 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30624 #undef TARGET_CANONICAL_VA_LIST_TYPE
30625 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30627 #undef TARGET_EXPAND_BUILTIN_VA_START
30628 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30630 #undef TARGET_MD_ASM_CLOBBERS
30631 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30633 #undef TARGET_PROMOTE_PROTOTYPES
30634 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30635 #undef TARGET_STRUCT_VALUE_RTX
30636 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30637 #undef TARGET_SETUP_INCOMING_VARARGS
30638 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30639 #undef TARGET_MUST_PASS_IN_STACK
30640 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30641 #undef TARGET_PASS_BY_REFERENCE
30642 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30643 #undef TARGET_INTERNAL_ARG_POINTER
30644 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30645 #undef TARGET_UPDATE_STACK_BOUNDARY
30646 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30647 #undef TARGET_GET_DRAP_RTX
30648 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30649 #undef TARGET_STRICT_ARGUMENT_NAMING
30650 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30652 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30653 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30655 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30656 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30658 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30659 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30661 #undef TARGET_C_MODE_FOR_SUFFIX
30662 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30665 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30666 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30669 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30670 #undef TARGET_INSERT_ATTRIBUTES
30671 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30674 #undef TARGET_MANGLE_TYPE
30675 #define TARGET_MANGLE_TYPE ix86_mangle_type
30677 #undef TARGET_STACK_PROTECT_FAIL
30678 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30680 #undef TARGET_FUNCTION_VALUE
30681 #define TARGET_FUNCTION_VALUE ix86_function_value
30683 #undef TARGET_SECONDARY_RELOAD
30684 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30686 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30687 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30689 #undef TARGET_SET_CURRENT_FUNCTION
30690 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30692 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30693 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30695 #undef TARGET_OPTION_SAVE
30696 #define TARGET_OPTION_SAVE ix86_function_specific_save
30698 #undef TARGET_OPTION_RESTORE
30699 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30701 #undef TARGET_OPTION_PRINT
30702 #define TARGET_OPTION_PRINT ix86_function_specific_print
30704 #undef TARGET_OPTION_CAN_INLINE_P
30705 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30707 #undef TARGET_EXPAND_TO_RTL_HOOK
30708 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30710 #undef TARGET_LEGITIMATE_ADDRESS_P
30711 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30713 struct gcc_target targetm = TARGET_INITIALIZER;
30715 #include "gt-i386.h"