1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1761 HOST_WIDE_INT frame;
1763 int outgoing_arguments_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1974 /* Define a set of ISAs which aren't available when a given ISA is
1975 disabled. MMX and SSE ISAs are handled separately. */
1977 #define OPTION_MASK_ISA_MMX_UNSET \
1978 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1979 #define OPTION_MASK_ISA_3DNOW_UNSET \
1980 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1981 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1983 #define OPTION_MASK_ISA_SSE_UNSET \
1984 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1985 #define OPTION_MASK_ISA_SSE2_UNSET \
1986 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1987 #define OPTION_MASK_ISA_SSE3_UNSET \
1988 (OPTION_MASK_ISA_SSE3 \
1989 | OPTION_MASK_ISA_SSSE3_UNSET \
1990 | OPTION_MASK_ISA_SSE4A_UNSET )
1991 #define OPTION_MASK_ISA_SSSE3_UNSET \
1992 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1993 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1994 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1995 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1996 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1997 #define OPTION_MASK_ISA_AVX_UNSET \
1998 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1999 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2001 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2003 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2005 #define OPTION_MASK_ISA_SSE4A_UNSET \
2006 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2007 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2008 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2009 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2010 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2011 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2012 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2013 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2014 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2016 /* Vectorization library interface and handlers. */
2017 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2018 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2019 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2021 /* Processor target table, indexed by processor number */
2024 const struct processor_costs *cost; /* Processor costs */
2025 const int align_loop; /* Default alignments. */
2026 const int align_loop_max_skip;
2027 const int align_jump;
2028 const int align_jump_max_skip;
2029 const int align_func;
2032 static const struct ptt processor_target_table[PROCESSOR_max] =
2034 {&i386_cost, 4, 3, 4, 3, 4},
2035 {&i486_cost, 16, 15, 16, 15, 16},
2036 {&pentium_cost, 16, 7, 16, 7, 16},
2037 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2038 {&geode_cost, 0, 0, 0, 0, 0},
2039 {&k6_cost, 32, 7, 32, 7, 32},
2040 {&athlon_cost, 16, 7, 16, 7, 16},
2041 {&pentium4_cost, 0, 0, 0, 0, 0},
2042 {&k8_cost, 16, 7, 16, 7, 16},
2043 {&nocona_cost, 0, 0, 0, 0, 0},
2044 {&core2_cost, 16, 10, 16, 10, 16},
2045 {&generic32_cost, 16, 7, 16, 7, 16},
2046 {&generic64_cost, 16, 10, 16, 10, 16},
2047 {&amdfam10_cost, 32, 24, 32, 7, 32},
2048 {&atom_cost, 16, 7, 16, 7, 16}
2051 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2077 /* Implement TARGET_HANDLE_OPTION. */
2080 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2087 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2088 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2092 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2093 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2100 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2101 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2105 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2106 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2116 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2117 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2121 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2122 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2129 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2134 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2135 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2142 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2143 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2147 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2148 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2155 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2156 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2160 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2161 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2168 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2169 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2173 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2174 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2181 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2182 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2186 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2187 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2194 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2195 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2199 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2200 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2207 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2208 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2212 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2213 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2218 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2219 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2223 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2224 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2230 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2231 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2235 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2236 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2243 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2244 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2248 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2249 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2256 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2257 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2261 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2262 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2269 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2270 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2274 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2275 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2282 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2283 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2287 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2288 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2295 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2296 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2300 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2301 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2308 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2309 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2313 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2314 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2321 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2322 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2326 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2327 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2334 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2335 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2339 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2340 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2349 /* Return a string the documents the current -m options. The caller is
2350 responsible for freeing the string. */
2353 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2354 const char *fpmath, bool add_nl_p)
2356 struct ix86_target_opts
2358 const char *option; /* option string */
2359 int mask; /* isa mask options */
2362 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2363 preceding options while match those first. */
2364 static struct ix86_target_opts isa_opts[] =
2366 { "-m64", OPTION_MASK_ISA_64BIT },
2367 { "-msse5", OPTION_MASK_ISA_SSE5 },
2368 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2369 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2370 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2371 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2372 { "-msse3", OPTION_MASK_ISA_SSE3 },
2373 { "-msse2", OPTION_MASK_ISA_SSE2 },
2374 { "-msse", OPTION_MASK_ISA_SSE },
2375 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2376 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2377 { "-mmmx", OPTION_MASK_ISA_MMX },
2378 { "-mabm", OPTION_MASK_ISA_ABM },
2379 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2380 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2381 { "-maes", OPTION_MASK_ISA_AES },
2382 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2386 static struct ix86_target_opts flag_opts[] =
2388 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2389 { "-m80387", MASK_80387 },
2390 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2391 { "-malign-double", MASK_ALIGN_DOUBLE },
2392 { "-mcld", MASK_CLD },
2393 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2394 { "-mieee-fp", MASK_IEEE_FP },
2395 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2396 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2397 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2398 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2399 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2400 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2401 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2402 { "-mno-red-zone", MASK_NO_RED_ZONE },
2403 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2404 { "-mrecip", MASK_RECIP },
2405 { "-mrtd", MASK_RTD },
2406 { "-msseregparm", MASK_SSEREGPARM },
2407 { "-mstack-arg-probe", MASK_STACK_PROBE },
2408 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2411 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2414 char target_other[40];
2423 memset (opts, '\0', sizeof (opts));
2425 /* Add -march= option. */
2428 opts[num][0] = "-march=";
2429 opts[num++][1] = arch;
2432 /* Add -mtune= option. */
2435 opts[num][0] = "-mtune=";
2436 opts[num++][1] = tune;
2439 /* Pick out the options in isa options. */
2440 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2442 if ((isa & isa_opts[i].mask) != 0)
2444 opts[num++][0] = isa_opts[i].option;
2445 isa &= ~ isa_opts[i].mask;
2449 if (isa && add_nl_p)
2451 opts[num++][0] = isa_other;
2452 sprintf (isa_other, "(other isa: 0x%x)", isa);
2455 /* Add flag options. */
2456 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2458 if ((flags & flag_opts[i].mask) != 0)
2460 opts[num++][0] = flag_opts[i].option;
2461 flags &= ~ flag_opts[i].mask;
2465 if (flags && add_nl_p)
2467 opts[num++][0] = target_other;
2468 sprintf (target_other, "(other flags: 0x%x)", isa);
2471 /* Add -fpmath= option. */
2474 opts[num][0] = "-mfpmath=";
2475 opts[num++][1] = fpmath;
2482 gcc_assert (num < ARRAY_SIZE (opts));
2484 /* Size the string. */
2486 sep_len = (add_nl_p) ? 3 : 1;
2487 for (i = 0; i < num; i++)
2490 for (j = 0; j < 2; j++)
2492 len += strlen (opts[i][j]);
2495 /* Build the string. */
2496 ret = ptr = (char *) xmalloc (len);
2499 for (i = 0; i < num; i++)
2503 for (j = 0; j < 2; j++)
2504 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2511 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2519 for (j = 0; j < 2; j++)
2522 memcpy (ptr, opts[i][j], len2[j]);
2524 line_len += len2[j];
2529 gcc_assert (ret + len >= ptr);
2534 /* Function that is callable from the debugger to print the current
2537 ix86_debug_options (void)
2539 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2540 ix86_arch_string, ix86_tune_string,
2541 ix86_fpmath_string, true);
2545 fprintf (stderr, "%s\n\n", opts);
2549 fprintf (stderr, "<no options>\n\n");
2554 /* Sometimes certain combinations of command options do not make
2555 sense on a particular target machine. You can define a macro
2556 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2557 defined, is executed once just after all the command options have
2560 Don't use this macro to turn on various extra optimizations for
2561 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2564 override_options (bool main_args_p)
2567 unsigned int ix86_arch_mask, ix86_tune_mask;
2572 /* Comes from final.c -- no real reason to change it. */
2573 #define MAX_CODE_ALIGN 16
2581 PTA_PREFETCH_SSE = 1 << 4,
2583 PTA_3DNOW_A = 1 << 6,
2587 PTA_POPCNT = 1 << 10,
2589 PTA_SSE4A = 1 << 12,
2590 PTA_NO_SAHF = 1 << 13,
2591 PTA_SSE4_1 = 1 << 14,
2592 PTA_SSE4_2 = 1 << 15,
2595 PTA_PCLMUL = 1 << 18,
2603 const char *const name; /* processor name or nickname. */
2604 const enum processor_type processor;
2605 const enum attr_cpu schedule;
2606 const unsigned /*enum pta_flags*/ flags;
2608 const processor_alias_table[] =
2610 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2611 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2612 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2613 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2614 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2615 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2616 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2617 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2618 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2619 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2620 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2621 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2622 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2624 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2626 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2627 PTA_MMX | PTA_SSE | PTA_SSE2},
2628 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2629 PTA_MMX |PTA_SSE | PTA_SSE2},
2630 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2631 PTA_MMX | PTA_SSE | PTA_SSE2},
2632 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2633 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2634 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2635 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2636 | PTA_CX16 | PTA_NO_SAHF},
2637 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2638 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2639 | PTA_SSSE3 | PTA_CX16},
2640 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2641 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2642 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2643 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2644 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2645 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2646 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2647 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2648 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2649 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2650 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2651 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2652 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2653 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2654 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2655 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2656 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2657 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2658 {"x86-64", PROCESSOR_K8, CPU_K8,
2659 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2660 {"k8", PROCESSOR_K8, CPU_K8,
2661 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2662 | PTA_SSE2 | PTA_NO_SAHF},
2663 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2664 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2665 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2666 {"opteron", PROCESSOR_K8, CPU_K8,
2667 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2668 | PTA_SSE2 | PTA_NO_SAHF},
2669 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2670 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2671 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2672 {"athlon64", PROCESSOR_K8, CPU_K8,
2673 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2674 | PTA_SSE2 | PTA_NO_SAHF},
2675 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2676 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2677 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2678 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2679 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2680 | PTA_SSE2 | PTA_NO_SAHF},
2681 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2682 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2683 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2684 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2685 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2686 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2687 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2688 0 /* flags are only used for -march switch. */ },
2689 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2690 PTA_64BIT /* flags are only used for -march switch. */ },
2693 int const pta_size = ARRAY_SIZE (processor_alias_table);
2695 /* Set up prefix/suffix so the error messages refer to either the command
2696 line argument, or the attribute(target). */
2705 prefix = "option(\"";
2710 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2711 SUBTARGET_OVERRIDE_OPTIONS;
2714 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2715 SUBSUBTARGET_OVERRIDE_OPTIONS;
2718 /* -fPIC is the default for x86_64. */
2719 if (TARGET_MACHO && TARGET_64BIT)
2722 /* Set the default values for switches whose default depends on TARGET_64BIT
2723 in case they weren't overwritten by command line options. */
2726 /* Mach-O doesn't support omitting the frame pointer for now. */
2727 if (flag_omit_frame_pointer == 2)
2728 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2729 if (flag_asynchronous_unwind_tables == 2)
2730 flag_asynchronous_unwind_tables = 1;
2731 if (flag_pcc_struct_return == 2)
2732 flag_pcc_struct_return = 0;
2736 if (flag_omit_frame_pointer == 2)
2737 flag_omit_frame_pointer = 0;
2738 if (flag_asynchronous_unwind_tables == 2)
2739 flag_asynchronous_unwind_tables = 0;
2740 if (flag_pcc_struct_return == 2)
2741 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2744 /* Need to check -mtune=generic first. */
2745 if (ix86_tune_string)
2747 if (!strcmp (ix86_tune_string, "generic")
2748 || !strcmp (ix86_tune_string, "i686")
2749 /* As special support for cross compilers we read -mtune=native
2750 as -mtune=generic. With native compilers we won't see the
2751 -mtune=native, as it was changed by the driver. */
2752 || !strcmp (ix86_tune_string, "native"))
2755 ix86_tune_string = "generic64";
2757 ix86_tune_string = "generic32";
2759 /* If this call is for setting the option attribute, allow the
2760 generic32/generic64 that was previously set. */
2761 else if (!main_args_p
2762 && (!strcmp (ix86_tune_string, "generic32")
2763 || !strcmp (ix86_tune_string, "generic64")))
2765 else if (!strncmp (ix86_tune_string, "generic", 7))
2766 error ("bad value (%s) for %stune=%s %s",
2767 ix86_tune_string, prefix, suffix, sw);
2771 if (ix86_arch_string)
2772 ix86_tune_string = ix86_arch_string;
2773 if (!ix86_tune_string)
2775 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2776 ix86_tune_defaulted = 1;
2779 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2780 need to use a sensible tune option. */
2781 if (!strcmp (ix86_tune_string, "generic")
2782 || !strcmp (ix86_tune_string, "x86-64")
2783 || !strcmp (ix86_tune_string, "i686"))
2786 ix86_tune_string = "generic64";
2788 ix86_tune_string = "generic32";
2791 if (ix86_stringop_string)
2793 if (!strcmp (ix86_stringop_string, "rep_byte"))
2794 stringop_alg = rep_prefix_1_byte;
2795 else if (!strcmp (ix86_stringop_string, "libcall"))
2796 stringop_alg = libcall;
2797 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2798 stringop_alg = rep_prefix_4_byte;
2799 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2801 /* rep; movq isn't available in 32-bit code. */
2802 stringop_alg = rep_prefix_8_byte;
2803 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2804 stringop_alg = loop_1_byte;
2805 else if (!strcmp (ix86_stringop_string, "loop"))
2806 stringop_alg = loop;
2807 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2808 stringop_alg = unrolled_loop;
2810 error ("bad value (%s) for %sstringop-strategy=%s %s",
2811 ix86_stringop_string, prefix, suffix, sw);
2813 if (!strcmp (ix86_tune_string, "x86-64"))
2814 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2815 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2816 prefix, suffix, prefix, suffix, prefix, suffix);
2818 if (!ix86_arch_string)
2819 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2821 ix86_arch_specified = 1;
2823 if (!strcmp (ix86_arch_string, "generic"))
2824 error ("generic CPU can be used only for %stune=%s %s",
2825 prefix, suffix, sw);
2826 if (!strncmp (ix86_arch_string, "generic", 7))
2827 error ("bad value (%s) for %sarch=%s %s",
2828 ix86_arch_string, prefix, suffix, sw);
2830 /* Validate -mabi= value. */
2831 if (ix86_abi_string)
2833 if (strcmp (ix86_abi_string, "sysv") == 0)
2834 ix86_abi = SYSV_ABI;
2835 else if (strcmp (ix86_abi_string, "ms") == 0)
2838 error ("unknown ABI (%s) for %sabi=%s %s",
2839 ix86_abi_string, prefix, suffix, sw);
2842 ix86_abi = DEFAULT_ABI;
2844 if (ix86_cmodel_string != 0)
2846 if (!strcmp (ix86_cmodel_string, "small"))
2847 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2848 else if (!strcmp (ix86_cmodel_string, "medium"))
2849 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2850 else if (!strcmp (ix86_cmodel_string, "large"))
2851 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2853 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2854 else if (!strcmp (ix86_cmodel_string, "32"))
2855 ix86_cmodel = CM_32;
2856 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2857 ix86_cmodel = CM_KERNEL;
2859 error ("bad value (%s) for %scmodel=%s %s",
2860 ix86_cmodel_string, prefix, suffix, sw);
2864 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2865 use of rip-relative addressing. This eliminates fixups that
2866 would otherwise be needed if this object is to be placed in a
2867 DLL, and is essentially just as efficient as direct addressing. */
2868 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2869 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2870 else if (TARGET_64BIT)
2871 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2873 ix86_cmodel = CM_32;
2875 if (ix86_asm_string != 0)
2878 && !strcmp (ix86_asm_string, "intel"))
2879 ix86_asm_dialect = ASM_INTEL;
2880 else if (!strcmp (ix86_asm_string, "att"))
2881 ix86_asm_dialect = ASM_ATT;
2883 error ("bad value (%s) for %sasm=%s %s",
2884 ix86_asm_string, prefix, suffix, sw);
2886 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2887 error ("code model %qs not supported in the %s bit mode",
2888 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2889 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2890 sorry ("%i-bit mode not compiled in",
2891 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2893 for (i = 0; i < pta_size; i++)
2894 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2896 ix86_schedule = processor_alias_table[i].schedule;
2897 ix86_arch = processor_alias_table[i].processor;
2898 /* Default cpu tuning to the architecture. */
2899 ix86_tune = ix86_arch;
2901 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2902 error ("CPU you selected does not support x86-64 "
2905 if (processor_alias_table[i].flags & PTA_MMX
2906 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2907 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2908 if (processor_alias_table[i].flags & PTA_3DNOW
2909 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2910 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2911 if (processor_alias_table[i].flags & PTA_3DNOW_A
2912 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2913 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2914 if (processor_alias_table[i].flags & PTA_SSE
2915 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2916 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2917 if (processor_alias_table[i].flags & PTA_SSE2
2918 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2919 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2920 if (processor_alias_table[i].flags & PTA_SSE3
2921 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2922 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2923 if (processor_alias_table[i].flags & PTA_SSSE3
2924 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2925 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2926 if (processor_alias_table[i].flags & PTA_SSE4_1
2927 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2928 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2929 if (processor_alias_table[i].flags & PTA_SSE4_2
2930 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2931 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2932 if (processor_alias_table[i].flags & PTA_AVX
2933 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2934 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2935 if (processor_alias_table[i].flags & PTA_FMA
2936 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2937 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2938 if (processor_alias_table[i].flags & PTA_SSE4A
2939 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2940 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2941 if (processor_alias_table[i].flags & PTA_SSE5
2942 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2943 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2944 if (processor_alias_table[i].flags & PTA_ABM
2945 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2946 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2947 if (processor_alias_table[i].flags & PTA_CX16
2948 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2949 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2950 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2951 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2952 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2953 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2954 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2955 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2956 if (processor_alias_table[i].flags & PTA_MOVBE
2957 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
2958 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
2959 if (processor_alias_table[i].flags & PTA_AES
2960 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2961 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2962 if (processor_alias_table[i].flags & PTA_PCLMUL
2963 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2964 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2965 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2966 x86_prefetch_sse = true;
2972 error ("bad value (%s) for %sarch=%s %s",
2973 ix86_arch_string, prefix, suffix, sw);
2975 ix86_arch_mask = 1u << ix86_arch;
2976 for (i = 0; i < X86_ARCH_LAST; ++i)
2977 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2979 for (i = 0; i < pta_size; i++)
2980 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2982 ix86_schedule = processor_alias_table[i].schedule;
2983 ix86_tune = processor_alias_table[i].processor;
2984 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2986 if (ix86_tune_defaulted)
2988 ix86_tune_string = "x86-64";
2989 for (i = 0; i < pta_size; i++)
2990 if (! strcmp (ix86_tune_string,
2991 processor_alias_table[i].name))
2993 ix86_schedule = processor_alias_table[i].schedule;
2994 ix86_tune = processor_alias_table[i].processor;
2997 error ("CPU you selected does not support x86-64 "
3000 /* Intel CPUs have always interpreted SSE prefetch instructions as
3001 NOPs; so, we can enable SSE prefetch instructions even when
3002 -mtune (rather than -march) points us to a processor that has them.
3003 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3004 higher processors. */
3006 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3007 x86_prefetch_sse = true;
3011 error ("bad value (%s) for %stune=%s %s",
3012 ix86_tune_string, prefix, suffix, sw);
3014 ix86_tune_mask = 1u << ix86_tune;
3015 for (i = 0; i < X86_TUNE_LAST; ++i)
3016 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3019 ix86_cost = &ix86_size_cost;
3021 ix86_cost = processor_target_table[ix86_tune].cost;
3023 /* Arrange to set up i386_stack_locals for all functions. */
3024 init_machine_status = ix86_init_machine_status;
3026 /* Validate -mregparm= value. */
3027 if (ix86_regparm_string)
3030 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3031 i = atoi (ix86_regparm_string);
3032 if (i < 0 || i > REGPARM_MAX)
3033 error ("%sregparm=%d%s is not between 0 and %d",
3034 prefix, i, suffix, REGPARM_MAX);
3039 ix86_regparm = REGPARM_MAX;
3041 /* If the user has provided any of the -malign-* options,
3042 warn and use that value only if -falign-* is not set.
3043 Remove this code in GCC 3.2 or later. */
3044 if (ix86_align_loops_string)
3046 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3047 prefix, suffix, suffix);
3048 if (align_loops == 0)
3050 i = atoi (ix86_align_loops_string);
3051 if (i < 0 || i > MAX_CODE_ALIGN)
3052 error ("%salign-loops=%d%s is not between 0 and %d",
3053 prefix, i, suffix, MAX_CODE_ALIGN);
3055 align_loops = 1 << i;
3059 if (ix86_align_jumps_string)
3061 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3062 prefix, suffix, suffix);
3063 if (align_jumps == 0)
3065 i = atoi (ix86_align_jumps_string);
3066 if (i < 0 || i > MAX_CODE_ALIGN)
3067 error ("%salign-loops=%d%s is not between 0 and %d",
3068 prefix, i, suffix, MAX_CODE_ALIGN);
3070 align_jumps = 1 << i;
3074 if (ix86_align_funcs_string)
3076 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3077 prefix, suffix, suffix);
3078 if (align_functions == 0)
3080 i = atoi (ix86_align_funcs_string);
3081 if (i < 0 || i > MAX_CODE_ALIGN)
3082 error ("%salign-loops=%d%s is not between 0 and %d",
3083 prefix, i, suffix, MAX_CODE_ALIGN);
3085 align_functions = 1 << i;
3089 /* Default align_* from the processor table. */
3090 if (align_loops == 0)
3092 align_loops = processor_target_table[ix86_tune].align_loop;
3093 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3095 if (align_jumps == 0)
3097 align_jumps = processor_target_table[ix86_tune].align_jump;
3098 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3100 if (align_functions == 0)
3102 align_functions = processor_target_table[ix86_tune].align_func;
3105 /* Validate -mbranch-cost= value, or provide default. */
3106 ix86_branch_cost = ix86_cost->branch_cost;
3107 if (ix86_branch_cost_string)
3109 i = atoi (ix86_branch_cost_string);
3111 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3113 ix86_branch_cost = i;
3115 if (ix86_section_threshold_string)
3117 i = atoi (ix86_section_threshold_string);
3119 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3121 ix86_section_threshold = i;
3124 if (ix86_tls_dialect_string)
3126 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3127 ix86_tls_dialect = TLS_DIALECT_GNU;
3128 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3129 ix86_tls_dialect = TLS_DIALECT_GNU2;
3130 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3131 ix86_tls_dialect = TLS_DIALECT_SUN;
3133 error ("bad value (%s) for %stls-dialect=%s %s",
3134 ix86_tls_dialect_string, prefix, suffix, sw);
3137 if (ix87_precision_string)
3139 i = atoi (ix87_precision_string);
3140 if (i != 32 && i != 64 && i != 80)
3141 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3146 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3148 /* Enable by default the SSE and MMX builtins. Do allow the user to
3149 explicitly disable any of these. In particular, disabling SSE and
3150 MMX for kernel code is extremely useful. */
3151 if (!ix86_arch_specified)
3153 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3154 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3157 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3161 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3163 if (!ix86_arch_specified)
3165 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3167 /* i386 ABI does not specify red zone. It still makes sense to use it
3168 when programmer takes care to stack from being destroyed. */
3169 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3170 target_flags |= MASK_NO_RED_ZONE;
3173 /* Keep nonleaf frame pointers. */
3174 if (flag_omit_frame_pointer)
3175 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3176 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3177 flag_omit_frame_pointer = 1;
3179 /* If we're doing fast math, we don't care about comparison order
3180 wrt NaNs. This lets us use a shorter comparison sequence. */
3181 if (flag_finite_math_only)
3182 target_flags &= ~MASK_IEEE_FP;
3184 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3185 since the insns won't need emulation. */
3186 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3187 target_flags &= ~MASK_NO_FANCY_MATH_387;
3189 /* Likewise, if the target doesn't have a 387, or we've specified
3190 software floating point, don't use 387 inline intrinsics. */
3192 target_flags |= MASK_NO_FANCY_MATH_387;
3194 /* Turn on MMX builtins for -msse. */
3197 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3198 x86_prefetch_sse = true;
3201 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3202 if (TARGET_SSE4_2 || TARGET_ABM)
3203 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3205 /* Validate -mpreferred-stack-boundary= value or default it to
3206 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3207 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3208 if (ix86_preferred_stack_boundary_string)
3210 i = atoi (ix86_preferred_stack_boundary_string);
3211 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3212 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3213 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3215 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3218 /* Set the default value for -mstackrealign. */
3219 if (ix86_force_align_arg_pointer == -1)
3220 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3222 /* Validate -mincoming-stack-boundary= value or default it to
3223 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3224 if (ix86_force_align_arg_pointer)
3225 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3227 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3228 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3229 if (ix86_incoming_stack_boundary_string)
3231 i = atoi (ix86_incoming_stack_boundary_string);
3232 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3233 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3234 i, TARGET_64BIT ? 4 : 2);
3237 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3238 ix86_incoming_stack_boundary
3239 = ix86_user_incoming_stack_boundary;
3243 /* Accept -msseregparm only if at least SSE support is enabled. */
3244 if (TARGET_SSEREGPARM
3246 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3248 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3249 if (ix86_fpmath_string != 0)
3251 if (! strcmp (ix86_fpmath_string, "387"))
3252 ix86_fpmath = FPMATH_387;
3253 else if (! strcmp (ix86_fpmath_string, "sse"))
3257 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3258 ix86_fpmath = FPMATH_387;
3261 ix86_fpmath = FPMATH_SSE;
3263 else if (! strcmp (ix86_fpmath_string, "387,sse")
3264 || ! strcmp (ix86_fpmath_string, "387+sse")
3265 || ! strcmp (ix86_fpmath_string, "sse,387")
3266 || ! strcmp (ix86_fpmath_string, "sse+387")
3267 || ! strcmp (ix86_fpmath_string, "both"))
3271 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3272 ix86_fpmath = FPMATH_387;
3274 else if (!TARGET_80387)
3276 warning (0, "387 instruction set disabled, using SSE arithmetics");
3277 ix86_fpmath = FPMATH_SSE;
3280 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3283 error ("bad value (%s) for %sfpmath=%s %s",
3284 ix86_fpmath_string, prefix, suffix, sw);
3287 /* If the i387 is disabled, then do not return values in it. */
3289 target_flags &= ~MASK_FLOAT_RETURNS;
3291 /* Use external vectorized library in vectorizing intrinsics. */
3292 if (ix86_veclibabi_string)
3294 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3295 ix86_veclib_handler = ix86_veclibabi_svml;
3296 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3297 ix86_veclib_handler = ix86_veclibabi_acml;
3299 error ("unknown vectorization library ABI type (%s) for "
3300 "%sveclibabi=%s %s", ix86_veclibabi_string,
3301 prefix, suffix, sw);
3304 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3305 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3307 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3309 /* ??? Unwind info is not correct around the CFG unless either a frame
3310 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3311 unwind info generation to be aware of the CFG and propagating states
3313 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3314 || flag_exceptions || flag_non_call_exceptions)
3315 && flag_omit_frame_pointer
3316 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3318 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3319 warning (0, "unwind tables currently require either a frame pointer "
3320 "or %saccumulate-outgoing-args%s for correctness",
3322 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3325 /* If stack probes are required, the space used for large function
3326 arguments on the stack must also be probed, so enable
3327 -maccumulate-outgoing-args so this happens in the prologue. */
3328 if (TARGET_STACK_PROBE
3329 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3331 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3332 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3333 "for correctness", prefix, suffix);
3334 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3337 /* For sane SSE instruction set generation we need fcomi instruction.
3338 It is safe to enable all CMOVE instructions. */
3342 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3345 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3346 p = strchr (internal_label_prefix, 'X');
3347 internal_label_prefix_len = p - internal_label_prefix;
3351 /* When scheduling description is not available, disable scheduler pass
3352 so it won't slow down the compilation and make x87 code slower. */
3353 if (!TARGET_SCHEDULE)
3354 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3356 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3357 set_param_value ("simultaneous-prefetches",
3358 ix86_cost->simultaneous_prefetches);
3359 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3360 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3361 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3362 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3363 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3364 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3366 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3367 can be optimized to ap = __builtin_next_arg (0). */
3369 targetm.expand_builtin_va_start = NULL;
3373 ix86_gen_leave = gen_leave_rex64;
3374 ix86_gen_pop1 = gen_popdi1;
3375 ix86_gen_add3 = gen_adddi3;
3376 ix86_gen_sub3 = gen_subdi3;
3377 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3378 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3379 ix86_gen_monitor = gen_sse3_monitor64;
3380 ix86_gen_andsp = gen_anddi3;
3384 ix86_gen_leave = gen_leave;
3385 ix86_gen_pop1 = gen_popsi1;
3386 ix86_gen_add3 = gen_addsi3;
3387 ix86_gen_sub3 = gen_subsi3;
3388 ix86_gen_sub3_carry = gen_subsi3_carry;
3389 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3390 ix86_gen_monitor = gen_sse3_monitor;
3391 ix86_gen_andsp = gen_andsi3;
3395 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3397 target_flags |= MASK_CLD & ~target_flags_explicit;
3400 /* Save the initial options in case the user does function specific options */
3402 target_option_default_node = target_option_current_node
3403 = build_target_option_node ();
3406 /* Save the current options */
3409 ix86_function_specific_save (struct cl_target_option *ptr)
3411 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3412 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3413 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3414 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3415 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3417 ptr->arch = ix86_arch;
3418 ptr->schedule = ix86_schedule;
3419 ptr->tune = ix86_tune;
3420 ptr->fpmath = ix86_fpmath;
3421 ptr->branch_cost = ix86_branch_cost;
3422 ptr->tune_defaulted = ix86_tune_defaulted;
3423 ptr->arch_specified = ix86_arch_specified;
3424 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3425 ptr->target_flags_explicit = target_flags_explicit;
3428 /* Restore the current options */
3431 ix86_function_specific_restore (struct cl_target_option *ptr)
3433 enum processor_type old_tune = ix86_tune;
3434 enum processor_type old_arch = ix86_arch;
3435 unsigned int ix86_arch_mask, ix86_tune_mask;
3438 ix86_arch = (enum processor_type) ptr->arch;
3439 ix86_schedule = (enum attr_cpu) ptr->schedule;
3440 ix86_tune = (enum processor_type) ptr->tune;
3441 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3442 ix86_branch_cost = ptr->branch_cost;
3443 ix86_tune_defaulted = ptr->tune_defaulted;
3444 ix86_arch_specified = ptr->arch_specified;
3445 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3446 target_flags_explicit = ptr->target_flags_explicit;
3448 /* Recreate the arch feature tests if the arch changed */
3449 if (old_arch != ix86_arch)
3451 ix86_arch_mask = 1u << ix86_arch;
3452 for (i = 0; i < X86_ARCH_LAST; ++i)
3453 ix86_arch_features[i]
3454 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3457 /* Recreate the tune optimization tests */
3458 if (old_tune != ix86_tune)
3460 ix86_tune_mask = 1u << ix86_tune;
3461 for (i = 0; i < X86_TUNE_LAST; ++i)
3462 ix86_tune_features[i]
3463 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3467 /* Print the current options */
3470 ix86_function_specific_print (FILE *file, int indent,
3471 struct cl_target_option *ptr)
3474 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3475 NULL, NULL, NULL, false);
3477 fprintf (file, "%*sarch = %d (%s)\n",
3480 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3481 ? cpu_names[ptr->arch]
3484 fprintf (file, "%*stune = %d (%s)\n",
3487 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3488 ? cpu_names[ptr->tune]
3491 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3492 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3493 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3494 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3498 fprintf (file, "%*s%s\n", indent, "", target_string);
3499 free (target_string);
3504 /* Inner function to process the attribute((target(...))), take an argument and
3505 set the current options from the argument. If we have a list, recursively go
3509 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3514 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3515 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3516 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3517 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3532 enum ix86_opt_type type;
3537 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3538 IX86_ATTR_ISA ("abm", OPT_mabm),
3539 IX86_ATTR_ISA ("aes", OPT_maes),
3540 IX86_ATTR_ISA ("avx", OPT_mavx),
3541 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3542 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3543 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3544 IX86_ATTR_ISA ("sse", OPT_msse),
3545 IX86_ATTR_ISA ("sse2", OPT_msse2),
3546 IX86_ATTR_ISA ("sse3", OPT_msse3),
3547 IX86_ATTR_ISA ("sse4", OPT_msse4),
3548 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3549 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3550 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3551 IX86_ATTR_ISA ("sse5", OPT_msse5),
3552 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3554 /* string options */
3555 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3556 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3557 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3560 IX86_ATTR_YES ("cld",
3564 IX86_ATTR_NO ("fancy-math-387",
3565 OPT_mfancy_math_387,
3566 MASK_NO_FANCY_MATH_387),
3568 IX86_ATTR_NO ("fused-madd",
3570 MASK_NO_FUSED_MADD),
3572 IX86_ATTR_YES ("ieee-fp",
3576 IX86_ATTR_YES ("inline-all-stringops",
3577 OPT_minline_all_stringops,
3578 MASK_INLINE_ALL_STRINGOPS),
3580 IX86_ATTR_YES ("inline-stringops-dynamically",
3581 OPT_minline_stringops_dynamically,
3582 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3584 IX86_ATTR_NO ("align-stringops",
3585 OPT_mno_align_stringops,
3586 MASK_NO_ALIGN_STRINGOPS),
3588 IX86_ATTR_YES ("recip",
3594 /* If this is a list, recurse to get the options. */
3595 if (TREE_CODE (args) == TREE_LIST)
3599 for (; args; args = TREE_CHAIN (args))
3600 if (TREE_VALUE (args)
3601 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3607 else if (TREE_CODE (args) != STRING_CST)
3610 /* Handle multiple arguments separated by commas. */
3611 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3613 while (next_optstr && *next_optstr != '\0')
3615 char *p = next_optstr;
3617 char *comma = strchr (next_optstr, ',');
3618 const char *opt_string;
3619 size_t len, opt_len;
3624 enum ix86_opt_type type = ix86_opt_unknown;
3630 len = comma - next_optstr;
3631 next_optstr = comma + 1;
3639 /* Recognize no-xxx. */
3640 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3649 /* Find the option. */
3652 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3654 type = attrs[i].type;
3655 opt_len = attrs[i].len;
3656 if (ch == attrs[i].string[0]
3657 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3658 && memcmp (p, attrs[i].string, opt_len) == 0)
3661 mask = attrs[i].mask;
3662 opt_string = attrs[i].string;
3667 /* Process the option. */
3670 error ("attribute(target(\"%s\")) is unknown", orig_p);
3674 else if (type == ix86_opt_isa)
3675 ix86_handle_option (opt, p, opt_set_p);
3677 else if (type == ix86_opt_yes || type == ix86_opt_no)
3679 if (type == ix86_opt_no)
3680 opt_set_p = !opt_set_p;
3683 target_flags |= mask;
3685 target_flags &= ~mask;
3688 else if (type == ix86_opt_str)
3692 error ("option(\"%s\") was already specified", opt_string);
3696 p_strings[opt] = xstrdup (p + opt_len);
3706 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3709 ix86_valid_target_attribute_tree (tree args)
3711 const char *orig_arch_string = ix86_arch_string;
3712 const char *orig_tune_string = ix86_tune_string;
3713 const char *orig_fpmath_string = ix86_fpmath_string;
3714 int orig_tune_defaulted = ix86_tune_defaulted;
3715 int orig_arch_specified = ix86_arch_specified;
3716 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3719 struct cl_target_option *def
3720 = TREE_TARGET_OPTION (target_option_default_node);
3722 /* Process each of the options on the chain. */
3723 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3726 /* If the changed options are different from the default, rerun override_options,
3727 and then save the options away. The string options are are attribute options,
3728 and will be undone when we copy the save structure. */
3729 if (ix86_isa_flags != def->ix86_isa_flags
3730 || target_flags != def->target_flags
3731 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3732 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3733 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3735 /* If we are using the default tune= or arch=, undo the string assigned,
3736 and use the default. */
3737 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3738 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3739 else if (!orig_arch_specified)
3740 ix86_arch_string = NULL;
3742 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3743 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3744 else if (orig_tune_defaulted)
3745 ix86_tune_string = NULL;
3747 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3748 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3749 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3750 else if (!TARGET_64BIT && TARGET_SSE)
3751 ix86_fpmath_string = "sse,387";
3753 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3754 override_options (false);
3756 /* Add any builtin functions with the new isa if any. */
3757 ix86_add_new_builtins (ix86_isa_flags);
3759 /* Save the current options unless we are validating options for
3761 t = build_target_option_node ();
3763 ix86_arch_string = orig_arch_string;
3764 ix86_tune_string = orig_tune_string;
3765 ix86_fpmath_string = orig_fpmath_string;
3767 /* Free up memory allocated to hold the strings */
3768 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3769 if (option_strings[i])
3770 free (option_strings[i]);
3776 /* Hook to validate attribute((target("string"))). */
3779 ix86_valid_target_attribute_p (tree fndecl,
3780 tree ARG_UNUSED (name),
3782 int ARG_UNUSED (flags))
3784 struct cl_target_option cur_target;
3786 tree old_optimize = build_optimization_node ();
3787 tree new_target, new_optimize;
3788 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3790 /* If the function changed the optimization levels as well as setting target
3791 options, start with the optimizations specified. */
3792 if (func_optimize && func_optimize != old_optimize)
3793 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3795 /* The target attributes may also change some optimization flags, so update
3796 the optimization options if necessary. */
3797 cl_target_option_save (&cur_target);
3798 new_target = ix86_valid_target_attribute_tree (args);
3799 new_optimize = build_optimization_node ();
3806 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3808 if (old_optimize != new_optimize)
3809 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3812 cl_target_option_restore (&cur_target);
3814 if (old_optimize != new_optimize)
3815 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3821 /* Hook to determine if one function can safely inline another. */
3824 ix86_can_inline_p (tree caller, tree callee)
3827 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3828 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3830 /* If callee has no option attributes, then it is ok to inline. */
3834 /* If caller has no option attributes, but callee does then it is not ok to
3836 else if (!caller_tree)
3841 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3842 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3844 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3845 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3847 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3848 != callee_opts->ix86_isa_flags)
3851 /* See if we have the same non-isa options. */
3852 else if (caller_opts->target_flags != callee_opts->target_flags)
3855 /* See if arch, tune, etc. are the same. */
3856 else if (caller_opts->arch != callee_opts->arch)
3859 else if (caller_opts->tune != callee_opts->tune)
3862 else if (caller_opts->fpmath != callee_opts->fpmath)
3865 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3876 /* Remember the last target of ix86_set_current_function. */
3877 static GTY(()) tree ix86_previous_fndecl;
3879 /* Establish appropriate back-end context for processing the function
3880 FNDECL. The argument might be NULL to indicate processing at top
3881 level, outside of any function scope. */
3883 ix86_set_current_function (tree fndecl)
3885 /* Only change the context if the function changes. This hook is called
3886 several times in the course of compiling a function, and we don't want to
3887 slow things down too much or call target_reinit when it isn't safe. */
3888 if (fndecl && fndecl != ix86_previous_fndecl)
3890 tree old_tree = (ix86_previous_fndecl
3891 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3894 tree new_tree = (fndecl
3895 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3898 ix86_previous_fndecl = fndecl;
3899 if (old_tree == new_tree)
3904 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3910 struct cl_target_option *def
3911 = TREE_TARGET_OPTION (target_option_current_node);
3913 cl_target_option_restore (def);
3920 /* Return true if this goes in large data/bss. */
3923 ix86_in_large_data_p (tree exp)
3925 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3928 /* Functions are never large data. */
3929 if (TREE_CODE (exp) == FUNCTION_DECL)
3932 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3934 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3935 if (strcmp (section, ".ldata") == 0
3936 || strcmp (section, ".lbss") == 0)
3942 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3944 /* If this is an incomplete type with size 0, then we can't put it
3945 in data because it might be too big when completed. */
3946 if (!size || size > ix86_section_threshold)
3953 /* Switch to the appropriate section for output of DECL.
3954 DECL is either a `VAR_DECL' node or a constant of some sort.
3955 RELOC indicates whether forming the initial value of DECL requires
3956 link-time relocations. */
3958 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3962 x86_64_elf_select_section (tree decl, int reloc,
3963 unsigned HOST_WIDE_INT align)
3965 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3966 && ix86_in_large_data_p (decl))
3968 const char *sname = NULL;
3969 unsigned int flags = SECTION_WRITE;
3970 switch (categorize_decl_for_section (decl, reloc))
3975 case SECCAT_DATA_REL:
3976 sname = ".ldata.rel";
3978 case SECCAT_DATA_REL_LOCAL:
3979 sname = ".ldata.rel.local";
3981 case SECCAT_DATA_REL_RO:
3982 sname = ".ldata.rel.ro";
3984 case SECCAT_DATA_REL_RO_LOCAL:
3985 sname = ".ldata.rel.ro.local";
3989 flags |= SECTION_BSS;
3992 case SECCAT_RODATA_MERGE_STR:
3993 case SECCAT_RODATA_MERGE_STR_INIT:
3994 case SECCAT_RODATA_MERGE_CONST:
3998 case SECCAT_SRODATA:
4005 /* We don't split these for medium model. Place them into
4006 default sections and hope for best. */
4008 case SECCAT_EMUTLS_VAR:
4009 case SECCAT_EMUTLS_TMPL:
4014 /* We might get called with string constants, but get_named_section
4015 doesn't like them as they are not DECLs. Also, we need to set
4016 flags in that case. */
4018 return get_section (sname, flags, NULL);
4019 return get_named_section (decl, sname, reloc);
4022 return default_elf_select_section (decl, reloc, align);
4025 /* Build up a unique section name, expressed as a
4026 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4027 RELOC indicates whether the initial value of EXP requires
4028 link-time relocations. */
4030 static void ATTRIBUTE_UNUSED
4031 x86_64_elf_unique_section (tree decl, int reloc)
4033 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4034 && ix86_in_large_data_p (decl))
4036 const char *prefix = NULL;
4037 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4038 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4040 switch (categorize_decl_for_section (decl, reloc))
4043 case SECCAT_DATA_REL:
4044 case SECCAT_DATA_REL_LOCAL:
4045 case SECCAT_DATA_REL_RO:
4046 case SECCAT_DATA_REL_RO_LOCAL:
4047 prefix = one_only ? ".ld" : ".ldata";
4050 prefix = one_only ? ".lb" : ".lbss";
4053 case SECCAT_RODATA_MERGE_STR:
4054 case SECCAT_RODATA_MERGE_STR_INIT:
4055 case SECCAT_RODATA_MERGE_CONST:
4056 prefix = one_only ? ".lr" : ".lrodata";
4058 case SECCAT_SRODATA:
4065 /* We don't split these for medium model. Place them into
4066 default sections and hope for best. */
4068 case SECCAT_EMUTLS_VAR:
4069 prefix = targetm.emutls.var_section;
4071 case SECCAT_EMUTLS_TMPL:
4072 prefix = targetm.emutls.tmpl_section;
4077 const char *name, *linkonce;
4080 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4081 name = targetm.strip_name_encoding (name);
4083 /* If we're using one_only, then there needs to be a .gnu.linkonce
4084 prefix to the section name. */
4085 linkonce = one_only ? ".gnu.linkonce" : "";
4087 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4089 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4093 default_unique_section (decl, reloc);
4096 #ifdef COMMON_ASM_OP
4097 /* This says how to output assembler code to declare an
4098 uninitialized external linkage data object.
4100 For medium model x86-64 we need to use .largecomm opcode for
4103 x86_elf_aligned_common (FILE *file,
4104 const char *name, unsigned HOST_WIDE_INT size,
4107 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4108 && size > (unsigned int)ix86_section_threshold)
4109 fprintf (file, ".largecomm\t");
4111 fprintf (file, "%s", COMMON_ASM_OP);
4112 assemble_name (file, name);
4113 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4114 size, align / BITS_PER_UNIT);
4118 /* Utility function for targets to use in implementing
4119 ASM_OUTPUT_ALIGNED_BSS. */
4122 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4123 const char *name, unsigned HOST_WIDE_INT size,
4126 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4127 && size > (unsigned int)ix86_section_threshold)
4128 switch_to_section (get_named_section (decl, ".lbss", 0));
4130 switch_to_section (bss_section);
4131 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4132 #ifdef ASM_DECLARE_OBJECT_NAME
4133 last_assemble_variable_decl = decl;
4134 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4136 /* Standard thing is just output label for the object. */
4137 ASM_OUTPUT_LABEL (file, name);
4138 #endif /* ASM_DECLARE_OBJECT_NAME */
4139 ASM_OUTPUT_SKIP (file, size ? size : 1);
4143 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4145 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4146 make the problem with not enough registers even worse. */
4147 #ifdef INSN_SCHEDULING
4149 flag_schedule_insns = 0;
4153 /* The Darwin libraries never set errno, so we might as well
4154 avoid calling them when that's the only reason we would. */
4155 flag_errno_math = 0;
4157 /* The default values of these switches depend on the TARGET_64BIT
4158 that is not known at this moment. Mark these values with 2 and
4159 let user the to override these. In case there is no command line option
4160 specifying them, we will set the defaults in override_options. */
4162 flag_omit_frame_pointer = 2;
4163 flag_pcc_struct_return = 2;
4164 flag_asynchronous_unwind_tables = 2;
4165 flag_vect_cost_model = 1;
4166 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4167 SUBTARGET_OPTIMIZATION_OPTIONS;
4171 /* Decide whether we can make a sibling call to a function. DECL is the
4172 declaration of the function being targeted by the call and EXP is the
4173 CALL_EXPR representing the call. */
4176 ix86_function_ok_for_sibcall (tree decl, tree exp)
4181 /* If we are generating position-independent code, we cannot sibcall
4182 optimize any indirect call, or a direct call to a global function,
4183 as the PLT requires %ebx be live. */
4184 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4191 func = TREE_TYPE (CALL_EXPR_FN (exp));
4192 if (POINTER_TYPE_P (func))
4193 func = TREE_TYPE (func);
4196 /* Check that the return value locations are the same. Like
4197 if we are returning floats on the 80387 register stack, we cannot
4198 make a sibcall from a function that doesn't return a float to a
4199 function that does or, conversely, from a function that does return
4200 a float to a function that doesn't; the necessary stack adjustment
4201 would not be executed. This is also the place we notice
4202 differences in the return value ABI. Note that it is ok for one
4203 of the functions to have void return type as long as the return
4204 value of the other is passed in a register. */
4205 a = ix86_function_value (TREE_TYPE (exp), func, false);
4206 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4208 if (STACK_REG_P (a) || STACK_REG_P (b))
4210 if (!rtx_equal_p (a, b))
4213 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4215 else if (!rtx_equal_p (a, b))
4218 /* If this call is indirect, we'll need to be able to use a call-clobbered
4219 register for the address of the target function. Make sure that all
4220 such registers are not used for passing parameters. */
4221 if (!decl && !TARGET_64BIT)
4225 /* We're looking at the CALL_EXPR, we need the type of the function. */
4226 type = CALL_EXPR_FN (exp); /* pointer expression */
4227 type = TREE_TYPE (type); /* pointer type */
4228 type = TREE_TYPE (type); /* function type */
4230 if (ix86_function_regparm (type, NULL) >= 3)
4232 /* ??? Need to count the actual number of registers to be used,
4233 not the possible number of registers. Fix later. */
4238 /* Dllimport'd functions are also called indirectly. */
4239 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4241 && decl && DECL_DLLIMPORT_P (decl)
4242 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4245 /* If we need to align the outgoing stack, then sibcalling would
4246 unalign the stack, which may break the called function. */
4247 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4250 /* Otherwise okay. That also includes certain types of indirect calls. */
4254 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4255 calling convention attributes;
4256 arguments as in struct attribute_spec.handler. */
4259 ix86_handle_cconv_attribute (tree *node, tree name,
4261 int flags ATTRIBUTE_UNUSED,
4264 if (TREE_CODE (*node) != FUNCTION_TYPE
4265 && TREE_CODE (*node) != METHOD_TYPE
4266 && TREE_CODE (*node) != FIELD_DECL
4267 && TREE_CODE (*node) != TYPE_DECL)
4269 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4271 *no_add_attrs = true;
4275 /* Can combine regparm with all attributes but fastcall. */
4276 if (is_attribute_p ("regparm", name))
4280 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4282 error ("fastcall and regparm attributes are not compatible");
4285 cst = TREE_VALUE (args);
4286 if (TREE_CODE (cst) != INTEGER_CST)
4288 warning (OPT_Wattributes,
4289 "%qE attribute requires an integer constant argument",
4291 *no_add_attrs = true;
4293 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4295 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4297 *no_add_attrs = true;
4305 /* Do not warn when emulating the MS ABI. */
4306 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4307 warning (OPT_Wattributes, "%qE attribute ignored",
4309 *no_add_attrs = true;
4313 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4314 if (is_attribute_p ("fastcall", name))
4316 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4318 error ("fastcall and cdecl attributes are not compatible");
4320 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4322 error ("fastcall and stdcall attributes are not compatible");
4324 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4326 error ("fastcall and regparm attributes are not compatible");
4330 /* Can combine stdcall with fastcall (redundant), regparm and
4332 else if (is_attribute_p ("stdcall", name))
4334 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4336 error ("stdcall and cdecl attributes are not compatible");
4338 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4340 error ("stdcall and fastcall attributes are not compatible");
4344 /* Can combine cdecl with regparm and sseregparm. */
4345 else if (is_attribute_p ("cdecl", name))
4347 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4349 error ("stdcall and cdecl attributes are not compatible");
4351 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4353 error ("fastcall and cdecl attributes are not compatible");
4357 /* Can combine sseregparm with all attributes. */
4362 /* Return 0 if the attributes for two types are incompatible, 1 if they
4363 are compatible, and 2 if they are nearly compatible (which causes a
4364 warning to be generated). */
4367 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4369 /* Check for mismatch of non-default calling convention. */
4370 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4372 if (TREE_CODE (type1) != FUNCTION_TYPE
4373 && TREE_CODE (type1) != METHOD_TYPE)
4376 /* Check for mismatched fastcall/regparm types. */
4377 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4378 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4379 || (ix86_function_regparm (type1, NULL)
4380 != ix86_function_regparm (type2, NULL)))
4383 /* Check for mismatched sseregparm types. */
4384 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4385 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4388 /* Check for mismatched return types (cdecl vs stdcall). */
4389 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4390 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4396 /* Return the regparm value for a function with the indicated TYPE and DECL.
4397 DECL may be NULL when calling function indirectly
4398 or considering a libcall. */
4401 ix86_function_regparm (const_tree type, const_tree decl)
4406 static bool error_issued;
4409 return (ix86_function_type_abi (type) == SYSV_ABI
4410 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4412 regparm = ix86_regparm;
4413 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4417 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4419 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4421 /* We can't use regparm(3) for nested functions because
4422 these pass static chain pointer in %ecx register. */
4423 if (!error_issued && regparm == 3
4424 && decl_function_context (decl)
4425 && !DECL_NO_STATIC_CHAIN (decl))
4427 error ("nested functions are limited to 2 register parameters");
4428 error_issued = true;
4436 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4439 /* Use register calling convention for local functions when possible. */
4441 && TREE_CODE (decl) == FUNCTION_DECL
4445 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4446 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4449 int local_regparm, globals = 0, regno;
4452 /* Make sure no regparm register is taken by a
4453 fixed register variable. */
4454 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4455 if (fixed_regs[local_regparm])
4458 /* We can't use regparm(3) for nested functions as these use
4459 static chain pointer in third argument. */
4460 if (local_regparm == 3
4461 && decl_function_context (decl)
4462 && !DECL_NO_STATIC_CHAIN (decl))
4465 /* If the function realigns its stackpointer, the prologue will
4466 clobber %ecx. If we've already generated code for the callee,
4467 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4468 scanning the attributes for the self-realigning property. */
4469 f = DECL_STRUCT_FUNCTION (decl);
4470 /* Since current internal arg pointer won't conflict with
4471 parameter passing regs, so no need to change stack
4472 realignment and adjust regparm number.
4474 Each fixed register usage increases register pressure,
4475 so less registers should be used for argument passing.
4476 This functionality can be overriden by an explicit
4478 for (regno = 0; regno <= DI_REG; regno++)
4479 if (fixed_regs[regno])
4483 = globals < local_regparm ? local_regparm - globals : 0;
4485 if (local_regparm > regparm)
4486 regparm = local_regparm;
4493 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4494 DFmode (2) arguments in SSE registers for a function with the
4495 indicated TYPE and DECL. DECL may be NULL when calling function
4496 indirectly or considering a libcall. Otherwise return 0. */
4499 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4501 gcc_assert (!TARGET_64BIT);
4503 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4504 by the sseregparm attribute. */
4505 if (TARGET_SSEREGPARM
4506 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4513 error ("Calling %qD with attribute sseregparm without "
4514 "SSE/SSE2 enabled", decl);
4516 error ("Calling %qT with attribute sseregparm without "
4517 "SSE/SSE2 enabled", type);
4525 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4526 (and DFmode for SSE2) arguments in SSE registers. */
4527 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4529 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4530 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4532 return TARGET_SSE2 ? 2 : 1;
4538 /* Return true if EAX is live at the start of the function. Used by
4539 ix86_expand_prologue to determine if we need special help before
4540 calling allocate_stack_worker. */
4543 ix86_eax_live_at_start_p (void)
4545 /* Cheat. Don't bother working forward from ix86_function_regparm
4546 to the function type to whether an actual argument is located in
4547 eax. Instead just look at cfg info, which is still close enough
4548 to correct at this point. This gives false positives for broken
4549 functions that might use uninitialized data that happens to be
4550 allocated in eax, but who cares? */
4551 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4554 /* Value is the number of bytes of arguments automatically
4555 popped when returning from a subroutine call.
4556 FUNDECL is the declaration node of the function (as a tree),
4557 FUNTYPE is the data type of the function (as a tree),
4558 or for a library call it is an identifier node for the subroutine name.
4559 SIZE is the number of bytes of arguments passed on the stack.
4561 On the 80386, the RTD insn may be used to pop them if the number
4562 of args is fixed, but if the number is variable then the caller
4563 must pop them all. RTD can't be used for library calls now
4564 because the library is compiled with the Unix compiler.
4565 Use of RTD is a selectable option, since it is incompatible with
4566 standard Unix calling sequences. If the option is not selected,
4567 the caller must always pop the args.
4569 The attribute stdcall is equivalent to RTD on a per module basis. */
4572 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4576 /* None of the 64-bit ABIs pop arguments. */
4580 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4582 /* Cdecl functions override -mrtd, and never pop the stack. */
4583 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4585 /* Stdcall and fastcall functions will pop the stack if not
4587 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4588 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4591 if (rtd && ! stdarg_p (funtype))
4595 /* Lose any fake structure return argument if it is passed on the stack. */
4596 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4597 && !KEEP_AGGREGATE_RETURN_POINTER)
4599 int nregs = ix86_function_regparm (funtype, fundecl);
4601 return GET_MODE_SIZE (Pmode);
4607 /* Argument support functions. */
4609 /* Return true when register may be used to pass function parameters. */
4611 ix86_function_arg_regno_p (int regno)
4614 const int *parm_regs;
4619 return (regno < REGPARM_MAX
4620 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4622 return (regno < REGPARM_MAX
4623 || (TARGET_MMX && MMX_REGNO_P (regno)
4624 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4625 || (TARGET_SSE && SSE_REGNO_P (regno)
4626 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4631 if (SSE_REGNO_P (regno) && TARGET_SSE)
4636 if (TARGET_SSE && SSE_REGNO_P (regno)
4637 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4641 /* TODO: The function should depend on current function ABI but
4642 builtins.c would need updating then. Therefore we use the
4645 /* RAX is used as hidden argument to va_arg functions. */
4646 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4649 if (ix86_abi == MS_ABI)
4650 parm_regs = x86_64_ms_abi_int_parameter_registers;
4652 parm_regs = x86_64_int_parameter_registers;
4653 for (i = 0; i < (ix86_abi == MS_ABI
4654 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
4655 if (regno == parm_regs[i])
4660 /* Return if we do not know how to pass TYPE solely in registers. */
4663 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4665 if (must_pass_in_stack_var_size_or_pad (mode, type))
4668 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4669 The layout_type routine is crafty and tries to trick us into passing
4670 currently unsupported vector types on the stack by using TImode. */
4671 return (!TARGET_64BIT && mode == TImode
4672 && type && TREE_CODE (type) != VECTOR_TYPE);
4675 /* It returns the size, in bytes, of the area reserved for arguments passed
4676 in registers for the function represented by fndecl dependent to the used
4679 ix86_reg_parm_stack_space (const_tree fndecl)
4681 enum calling_abi call_abi = SYSV_ABI;
4682 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4683 call_abi = ix86_function_abi (fndecl);
4685 call_abi = ix86_function_type_abi (fndecl);
4686 if (call_abi == MS_ABI)
4691 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4694 ix86_function_type_abi (const_tree fntype)
4696 if (TARGET_64BIT && fntype != NULL)
4698 enum calling_abi abi = ix86_abi;
4699 if (abi == SYSV_ABI)
4701 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4704 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4711 static enum calling_abi
4712 ix86_function_abi (const_tree fndecl)
4716 return ix86_function_type_abi (TREE_TYPE (fndecl));
4719 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4722 ix86_cfun_abi (void)
4724 if (! cfun || ! TARGET_64BIT)
4726 return cfun->machine->call_abi;
4730 extern void init_regs (void);
4732 /* Implementation of call abi switching target hook. Specific to FNDECL
4733 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4734 for more details. */
4736 ix86_call_abi_override (const_tree fndecl)
4738 if (fndecl == NULL_TREE)
4739 cfun->machine->call_abi = ix86_abi;
4741 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4744 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4745 re-initialization of init_regs each time we switch function context since
4746 this is needed only during RTL expansion. */
4748 ix86_maybe_switch_abi (void)
4751 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4755 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4756 for a call to a function whose data type is FNTYPE.
4757 For a library call, FNTYPE is 0. */
4760 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4761 tree fntype, /* tree ptr for function decl */
4762 rtx libname, /* SYMBOL_REF of library name or 0 */
4765 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4766 memset (cum, 0, sizeof (*cum));
4769 cum->call_abi = ix86_function_abi (fndecl);
4771 cum->call_abi = ix86_function_type_abi (fntype);
4772 /* Set up the number of registers to use for passing arguments. */
4774 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4775 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4776 cum->nregs = ix86_regparm;
4779 if (cum->call_abi != ix86_abi)
4780 cum->nregs = (ix86_abi != SYSV_ABI
4781 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4785 cum->sse_nregs = SSE_REGPARM_MAX;
4788 if (cum->call_abi != ix86_abi)
4789 cum->sse_nregs = (ix86_abi != SYSV_ABI
4790 ? X86_64_SSE_REGPARM_MAX
4791 : X86_64_MS_SSE_REGPARM_MAX);
4795 cum->mmx_nregs = MMX_REGPARM_MAX;
4796 cum->warn_avx = true;
4797 cum->warn_sse = true;
4798 cum->warn_mmx = true;
4800 /* Because type might mismatch in between caller and callee, we need to
4801 use actual type of function for local calls.
4802 FIXME: cgraph_analyze can be told to actually record if function uses
4803 va_start so for local functions maybe_vaarg can be made aggressive
4805 FIXME: once typesytem is fixed, we won't need this code anymore. */
4807 fntype = TREE_TYPE (fndecl);
4808 cum->maybe_vaarg = (fntype
4809 ? (!prototype_p (fntype) || stdarg_p (fntype))
4814 /* If there are variable arguments, then we won't pass anything
4815 in registers in 32-bit mode. */
4816 if (stdarg_p (fntype))
4827 /* Use ecx and edx registers if function has fastcall attribute,
4828 else look for regparm information. */
4831 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4837 cum->nregs = ix86_function_regparm (fntype, fndecl);
4840 /* Set up the number of SSE registers used for passing SFmode
4841 and DFmode arguments. Warn for mismatching ABI. */
4842 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4846 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4847 But in the case of vector types, it is some vector mode.
4849 When we have only some of our vector isa extensions enabled, then there
4850 are some modes for which vector_mode_supported_p is false. For these
4851 modes, the generic vector support in gcc will choose some non-vector mode
4852 in order to implement the type. By computing the natural mode, we'll
4853 select the proper ABI location for the operand and not depend on whatever
4854 the middle-end decides to do with these vector types.
4856 The midde-end can't deal with the vector types > 16 bytes. In this
4857 case, we return the original mode and warn ABI change if CUM isn't
4860 static enum machine_mode
4861 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4863 enum machine_mode mode = TYPE_MODE (type);
4865 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4867 HOST_WIDE_INT size = int_size_in_bytes (type);
4868 if ((size == 8 || size == 16 || size == 32)
4869 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4870 && TYPE_VECTOR_SUBPARTS (type) > 1)
4872 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4874 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4875 mode = MIN_MODE_VECTOR_FLOAT;
4877 mode = MIN_MODE_VECTOR_INT;
4879 /* Get the mode which has this inner mode and number of units. */
4880 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4881 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4882 && GET_MODE_INNER (mode) == innermode)
4884 if (size == 32 && !TARGET_AVX)
4886 static bool warnedavx;
4893 warning (0, "AVX vector argument without AVX "
4894 "enabled changes the ABI");
4896 return TYPE_MODE (type);
4909 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4910 this may not agree with the mode that the type system has chosen for the
4911 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4912 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4915 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4920 if (orig_mode != BLKmode)
4921 tmp = gen_rtx_REG (orig_mode, regno);
4924 tmp = gen_rtx_REG (mode, regno);
4925 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4926 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4932 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4933 of this code is to classify each 8bytes of incoming argument by the register
4934 class and assign registers accordingly. */
4936 /* Return the union class of CLASS1 and CLASS2.
4937 See the x86-64 PS ABI for details. */
4939 static enum x86_64_reg_class
4940 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4942 /* Rule #1: If both classes are equal, this is the resulting class. */
4943 if (class1 == class2)
4946 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4948 if (class1 == X86_64_NO_CLASS)
4950 if (class2 == X86_64_NO_CLASS)
4953 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4954 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4955 return X86_64_MEMORY_CLASS;
4957 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4958 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4959 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4960 return X86_64_INTEGERSI_CLASS;
4961 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4962 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4963 return X86_64_INTEGER_CLASS;
4965 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4967 if (class1 == X86_64_X87_CLASS
4968 || class1 == X86_64_X87UP_CLASS
4969 || class1 == X86_64_COMPLEX_X87_CLASS
4970 || class2 == X86_64_X87_CLASS
4971 || class2 == X86_64_X87UP_CLASS
4972 || class2 == X86_64_COMPLEX_X87_CLASS)
4973 return X86_64_MEMORY_CLASS;
4975 /* Rule #6: Otherwise class SSE is used. */
4976 return X86_64_SSE_CLASS;
4979 /* Classify the argument of type TYPE and mode MODE.
4980 CLASSES will be filled by the register class used to pass each word
4981 of the operand. The number of words is returned. In case the parameter
4982 should be passed in memory, 0 is returned. As a special case for zero
4983 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4985 BIT_OFFSET is used internally for handling records and specifies offset
4986 of the offset in bits modulo 256 to avoid overflow cases.
4988 See the x86-64 PS ABI for details.
4992 classify_argument (enum machine_mode mode, const_tree type,
4993 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4995 HOST_WIDE_INT bytes =
4996 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4997 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4999 /* Variable sized entities are always passed/returned in memory. */
5003 if (mode != VOIDmode
5004 && targetm.calls.must_pass_in_stack (mode, type))
5007 if (type && AGGREGATE_TYPE_P (type))
5011 enum x86_64_reg_class subclasses[MAX_CLASSES];
5013 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5017 for (i = 0; i < words; i++)
5018 classes[i] = X86_64_NO_CLASS;
5020 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5021 signalize memory class, so handle it as special case. */
5024 classes[0] = X86_64_NO_CLASS;
5028 /* Classify each field of record and merge classes. */
5029 switch (TREE_CODE (type))
5032 /* And now merge the fields of structure. */
5033 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5035 if (TREE_CODE (field) == FIELD_DECL)
5039 if (TREE_TYPE (field) == error_mark_node)
5042 /* Bitfields are always classified as integer. Handle them
5043 early, since later code would consider them to be
5044 misaligned integers. */
5045 if (DECL_BIT_FIELD (field))
5047 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5048 i < ((int_bit_position (field) + (bit_offset % 64))
5049 + tree_low_cst (DECL_SIZE (field), 0)
5052 merge_classes (X86_64_INTEGER_CLASS,
5059 type = TREE_TYPE (field);
5061 /* Flexible array member is ignored. */
5062 if (TYPE_MODE (type) == BLKmode
5063 && TREE_CODE (type) == ARRAY_TYPE
5064 && TYPE_SIZE (type) == NULL_TREE
5065 && TYPE_DOMAIN (type) != NULL_TREE
5066 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5071 if (!warned && warn_psabi)
5074 inform (input_location,
5075 "The ABI of passing struct with"
5076 " a flexible array member has"
5077 " changed in GCC 4.4");
5081 num = classify_argument (TYPE_MODE (type), type,
5083 (int_bit_position (field)
5084 + bit_offset) % 256);
5087 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5088 for (i = 0; i < num && (i + pos) < words; i++)
5090 merge_classes (subclasses[i], classes[i + pos]);
5097 /* Arrays are handled as small records. */
5100 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5101 TREE_TYPE (type), subclasses, bit_offset);
5105 /* The partial classes are now full classes. */
5106 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5107 subclasses[0] = X86_64_SSE_CLASS;
5108 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5109 && !((bit_offset % 64) == 0 && bytes == 4))
5110 subclasses[0] = X86_64_INTEGER_CLASS;
5112 for (i = 0; i < words; i++)
5113 classes[i] = subclasses[i % num];
5118 case QUAL_UNION_TYPE:
5119 /* Unions are similar to RECORD_TYPE but offset is always 0.
5121 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5123 if (TREE_CODE (field) == FIELD_DECL)
5127 if (TREE_TYPE (field) == error_mark_node)
5130 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5131 TREE_TYPE (field), subclasses,
5135 for (i = 0; i < num; i++)
5136 classes[i] = merge_classes (subclasses[i], classes[i]);
5147 /* When size > 16 bytes, if the first one isn't
5148 X86_64_SSE_CLASS or any other ones aren't
5149 X86_64_SSEUP_CLASS, everything should be passed in
5151 if (classes[0] != X86_64_SSE_CLASS)
5154 for (i = 1; i < words; i++)
5155 if (classes[i] != X86_64_SSEUP_CLASS)
5159 /* Final merger cleanup. */
5160 for (i = 0; i < words; i++)
5162 /* If one class is MEMORY, everything should be passed in
5164 if (classes[i] == X86_64_MEMORY_CLASS)
5167 /* The X86_64_SSEUP_CLASS should be always preceded by
5168 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5169 if (classes[i] == X86_64_SSEUP_CLASS
5170 && classes[i - 1] != X86_64_SSE_CLASS
5171 && classes[i - 1] != X86_64_SSEUP_CLASS)
5173 /* The first one should never be X86_64_SSEUP_CLASS. */
5174 gcc_assert (i != 0);
5175 classes[i] = X86_64_SSE_CLASS;
5178 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5179 everything should be passed in memory. */
5180 if (classes[i] == X86_64_X87UP_CLASS
5181 && (classes[i - 1] != X86_64_X87_CLASS))
5185 /* The first one should never be X86_64_X87UP_CLASS. */
5186 gcc_assert (i != 0);
5187 if (!warned && warn_psabi)
5190 inform (input_location,
5191 "The ABI of passing union with long double"
5192 " has changed in GCC 4.4");
5200 /* Compute alignment needed. We align all types to natural boundaries with
5201 exception of XFmode that is aligned to 64bits. */
5202 if (mode != VOIDmode && mode != BLKmode)
5204 int mode_alignment = GET_MODE_BITSIZE (mode);
5207 mode_alignment = 128;
5208 else if (mode == XCmode)
5209 mode_alignment = 256;
5210 if (COMPLEX_MODE_P (mode))
5211 mode_alignment /= 2;
5212 /* Misaligned fields are always returned in memory. */
5213 if (bit_offset % mode_alignment)
5217 /* for V1xx modes, just use the base mode */
5218 if (VECTOR_MODE_P (mode) && mode != V1DImode
5219 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5220 mode = GET_MODE_INNER (mode);
5222 /* Classification of atomic types. */
5227 classes[0] = X86_64_SSE_CLASS;
5230 classes[0] = X86_64_SSE_CLASS;
5231 classes[1] = X86_64_SSEUP_CLASS;
5241 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5245 classes[0] = X86_64_INTEGERSI_CLASS;
5248 else if (size <= 64)
5250 classes[0] = X86_64_INTEGER_CLASS;
5253 else if (size <= 64+32)
5255 classes[0] = X86_64_INTEGER_CLASS;
5256 classes[1] = X86_64_INTEGERSI_CLASS;
5259 else if (size <= 64+64)
5261 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5269 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5273 /* OImode shouldn't be used directly. */
5278 if (!(bit_offset % 64))
5279 classes[0] = X86_64_SSESF_CLASS;
5281 classes[0] = X86_64_SSE_CLASS;
5284 classes[0] = X86_64_SSEDF_CLASS;
5287 classes[0] = X86_64_X87_CLASS;
5288 classes[1] = X86_64_X87UP_CLASS;
5291 classes[0] = X86_64_SSE_CLASS;
5292 classes[1] = X86_64_SSEUP_CLASS;
5295 classes[0] = X86_64_SSE_CLASS;
5296 if (!(bit_offset % 64))
5302 if (!warned && warn_psabi)
5305 inform (input_location,
5306 "The ABI of passing structure with complex float"
5307 " member has changed in GCC 4.4");
5309 classes[1] = X86_64_SSESF_CLASS;
5313 classes[0] = X86_64_SSEDF_CLASS;
5314 classes[1] = X86_64_SSEDF_CLASS;
5317 classes[0] = X86_64_COMPLEX_X87_CLASS;
5320 /* This modes is larger than 16 bytes. */
5328 classes[0] = X86_64_SSE_CLASS;
5329 classes[1] = X86_64_SSEUP_CLASS;
5330 classes[2] = X86_64_SSEUP_CLASS;
5331 classes[3] = X86_64_SSEUP_CLASS;
5339 classes[0] = X86_64_SSE_CLASS;
5340 classes[1] = X86_64_SSEUP_CLASS;
5347 classes[0] = X86_64_SSE_CLASS;
5353 gcc_assert (VECTOR_MODE_P (mode));
5358 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5360 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5361 classes[0] = X86_64_INTEGERSI_CLASS;
5363 classes[0] = X86_64_INTEGER_CLASS;
5364 classes[1] = X86_64_INTEGER_CLASS;
5365 return 1 + (bytes > 8);
5369 /* Examine the argument and return set number of register required in each
5370 class. Return 0 iff parameter should be passed in memory. */
5372 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5373 int *int_nregs, int *sse_nregs)
5375 enum x86_64_reg_class regclass[MAX_CLASSES];
5376 int n = classify_argument (mode, type, regclass, 0);
5382 for (n--; n >= 0; n--)
5383 switch (regclass[n])
5385 case X86_64_INTEGER_CLASS:
5386 case X86_64_INTEGERSI_CLASS:
5389 case X86_64_SSE_CLASS:
5390 case X86_64_SSESF_CLASS:
5391 case X86_64_SSEDF_CLASS:
5394 case X86_64_NO_CLASS:
5395 case X86_64_SSEUP_CLASS:
5397 case X86_64_X87_CLASS:
5398 case X86_64_X87UP_CLASS:
5402 case X86_64_COMPLEX_X87_CLASS:
5403 return in_return ? 2 : 0;
5404 case X86_64_MEMORY_CLASS:
5410 /* Construct container for the argument used by GCC interface. See
5411 FUNCTION_ARG for the detailed description. */
5414 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5415 const_tree type, int in_return, int nintregs, int nsseregs,
5416 const int *intreg, int sse_regno)
5418 /* The following variables hold the static issued_error state. */
5419 static bool issued_sse_arg_error;
5420 static bool issued_sse_ret_error;
5421 static bool issued_x87_ret_error;
5423 enum machine_mode tmpmode;
5425 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5426 enum x86_64_reg_class regclass[MAX_CLASSES];
5430 int needed_sseregs, needed_intregs;
5431 rtx exp[MAX_CLASSES];
5434 n = classify_argument (mode, type, regclass, 0);
5437 if (!examine_argument (mode, type, in_return, &needed_intregs,
5440 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5443 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5444 some less clueful developer tries to use floating-point anyway. */
5445 if (needed_sseregs && !TARGET_SSE)
5449 if (!issued_sse_ret_error)
5451 error ("SSE register return with SSE disabled");
5452 issued_sse_ret_error = true;
5455 else if (!issued_sse_arg_error)
5457 error ("SSE register argument with SSE disabled");
5458 issued_sse_arg_error = true;
5463 /* Likewise, error if the ABI requires us to return values in the
5464 x87 registers and the user specified -mno-80387. */
5465 if (!TARGET_80387 && in_return)
5466 for (i = 0; i < n; i++)
5467 if (regclass[i] == X86_64_X87_CLASS
5468 || regclass[i] == X86_64_X87UP_CLASS
5469 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5471 if (!issued_x87_ret_error)
5473 error ("x87 register return with x87 disabled");
5474 issued_x87_ret_error = true;
5479 /* First construct simple cases. Avoid SCmode, since we want to use
5480 single register to pass this type. */
5481 if (n == 1 && mode != SCmode)
5482 switch (regclass[0])
5484 case X86_64_INTEGER_CLASS:
5485 case X86_64_INTEGERSI_CLASS:
5486 return gen_rtx_REG (mode, intreg[0]);
5487 case X86_64_SSE_CLASS:
5488 case X86_64_SSESF_CLASS:
5489 case X86_64_SSEDF_CLASS:
5490 if (mode != BLKmode)
5491 return gen_reg_or_parallel (mode, orig_mode,
5492 SSE_REGNO (sse_regno));
5494 case X86_64_X87_CLASS:
5495 case X86_64_COMPLEX_X87_CLASS:
5496 return gen_rtx_REG (mode, FIRST_STACK_REG);
5497 case X86_64_NO_CLASS:
5498 /* Zero sized array, struct or class. */
5503 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5504 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5505 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5507 && regclass[0] == X86_64_SSE_CLASS
5508 && regclass[1] == X86_64_SSEUP_CLASS
5509 && regclass[2] == X86_64_SSEUP_CLASS
5510 && regclass[3] == X86_64_SSEUP_CLASS
5512 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5515 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5516 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5517 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5518 && regclass[1] == X86_64_INTEGER_CLASS
5519 && (mode == CDImode || mode == TImode || mode == TFmode)
5520 && intreg[0] + 1 == intreg[1])
5521 return gen_rtx_REG (mode, intreg[0]);
5523 /* Otherwise figure out the entries of the PARALLEL. */
5524 for (i = 0; i < n; i++)
5528 switch (regclass[i])
5530 case X86_64_NO_CLASS:
5532 case X86_64_INTEGER_CLASS:
5533 case X86_64_INTEGERSI_CLASS:
5534 /* Merge TImodes on aligned occasions here too. */
5535 if (i * 8 + 8 > bytes)
5536 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5537 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5541 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5542 if (tmpmode == BLKmode)
5544 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5545 gen_rtx_REG (tmpmode, *intreg),
5549 case X86_64_SSESF_CLASS:
5550 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5551 gen_rtx_REG (SFmode,
5552 SSE_REGNO (sse_regno)),
5556 case X86_64_SSEDF_CLASS:
5557 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5558 gen_rtx_REG (DFmode,
5559 SSE_REGNO (sse_regno)),
5563 case X86_64_SSE_CLASS:
5571 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5581 && regclass[1] == X86_64_SSEUP_CLASS
5582 && regclass[2] == X86_64_SSEUP_CLASS
5583 && regclass[3] == X86_64_SSEUP_CLASS);
5590 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5591 gen_rtx_REG (tmpmode,
5592 SSE_REGNO (sse_regno)),
5601 /* Empty aligned struct, union or class. */
5605 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5606 for (i = 0; i < nexps; i++)
5607 XVECEXP (ret, 0, i) = exp [i];
5611 /* Update the data in CUM to advance over an argument of mode MODE
5612 and data type TYPE. (TYPE is null for libcalls where that information
5613 may not be available.) */
5616 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5617 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5633 cum->words += words;
5634 cum->nregs -= words;
5635 cum->regno += words;
5637 if (cum->nregs <= 0)
5645 /* OImode shouldn't be used directly. */
5649 if (cum->float_in_sse < 2)
5652 if (cum->float_in_sse < 1)
5669 if (!type || !AGGREGATE_TYPE_P (type))
5671 cum->sse_words += words;
5672 cum->sse_nregs -= 1;
5673 cum->sse_regno += 1;
5674 if (cum->sse_nregs <= 0)
5687 if (!type || !AGGREGATE_TYPE_P (type))
5689 cum->mmx_words += words;
5690 cum->mmx_nregs -= 1;
5691 cum->mmx_regno += 1;
5692 if (cum->mmx_nregs <= 0)
5703 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5704 tree type, HOST_WIDE_INT words, int named)
5706 int int_nregs, sse_nregs;
5708 /* Unnamed 256bit vector mode parameters are passed on stack. */
5709 if (!named && VALID_AVX256_REG_MODE (mode))
5712 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5713 cum->words += words;
5714 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5716 cum->nregs -= int_nregs;
5717 cum->sse_nregs -= sse_nregs;
5718 cum->regno += int_nregs;
5719 cum->sse_regno += sse_nregs;
5722 cum->words += words;
5726 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5727 HOST_WIDE_INT words)
5729 /* Otherwise, this should be passed indirect. */
5730 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5732 cum->words += words;
5741 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5742 tree type, int named)
5744 HOST_WIDE_INT bytes, words;
5746 if (mode == BLKmode)
5747 bytes = int_size_in_bytes (type);
5749 bytes = GET_MODE_SIZE (mode);
5750 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5753 mode = type_natural_mode (type, NULL);
5755 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5756 function_arg_advance_ms_64 (cum, bytes, words);
5757 else if (TARGET_64BIT)
5758 function_arg_advance_64 (cum, mode, type, words, named);
5760 function_arg_advance_32 (cum, mode, type, bytes, words);
5763 /* Define where to put the arguments to a function.
5764 Value is zero to push the argument on the stack,
5765 or a hard register in which to store the argument.
5767 MODE is the argument's machine mode.
5768 TYPE is the data type of the argument (as a tree).
5769 This is null for libcalls where that information may
5771 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5772 the preceding args and about the function being called.
5773 NAMED is nonzero if this argument is a named parameter
5774 (otherwise it is an extra parameter matching an ellipsis). */
5777 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5778 enum machine_mode orig_mode, tree type,
5779 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5781 static bool warnedsse, warnedmmx;
5783 /* Avoid the AL settings for the Unix64 ABI. */
5784 if (mode == VOIDmode)
5800 if (words <= cum->nregs)
5802 int regno = cum->regno;
5804 /* Fastcall allocates the first two DWORD (SImode) or
5805 smaller arguments to ECX and EDX if it isn't an
5811 || (type && AGGREGATE_TYPE_P (type)))
5814 /* ECX not EAX is the first allocated register. */
5815 if (regno == AX_REG)
5818 return gen_rtx_REG (mode, regno);
5823 if (cum->float_in_sse < 2)
5826 if (cum->float_in_sse < 1)
5830 /* In 32bit, we pass TImode in xmm registers. */
5837 if (!type || !AGGREGATE_TYPE_P (type))
5839 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5842 warning (0, "SSE vector argument without SSE enabled "
5846 return gen_reg_or_parallel (mode, orig_mode,
5847 cum->sse_regno + FIRST_SSE_REG);
5852 /* OImode shouldn't be used directly. */
5861 if (!type || !AGGREGATE_TYPE_P (type))
5864 return gen_reg_or_parallel (mode, orig_mode,
5865 cum->sse_regno + FIRST_SSE_REG);
5874 if (!type || !AGGREGATE_TYPE_P (type))
5876 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5879 warning (0, "MMX vector argument without MMX enabled "
5883 return gen_reg_or_parallel (mode, orig_mode,
5884 cum->mmx_regno + FIRST_MMX_REG);
5893 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5894 enum machine_mode orig_mode, tree type, int named)
5896 /* Handle a hidden AL argument containing number of registers
5897 for varargs x86-64 functions. */
5898 if (mode == VOIDmode)
5899 return GEN_INT (cum->maybe_vaarg
5900 ? (cum->sse_nregs < 0
5901 ? (cum->call_abi == ix86_abi
5903 : (ix86_abi != SYSV_ABI
5904 ? X86_64_SSE_REGPARM_MAX
5905 : X86_64_MS_SSE_REGPARM_MAX))
5920 /* Unnamed 256bit vector mode parameters are passed on stack. */
5926 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5928 &x86_64_int_parameter_registers [cum->regno],
5933 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5934 enum machine_mode orig_mode, int named,
5935 HOST_WIDE_INT bytes)
5939 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5940 We use value of -2 to specify that current function call is MSABI. */
5941 if (mode == VOIDmode)
5942 return GEN_INT (-2);
5944 /* If we've run out of registers, it goes on the stack. */
5945 if (cum->nregs == 0)
5948 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5950 /* Only floating point modes are passed in anything but integer regs. */
5951 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5954 regno = cum->regno + FIRST_SSE_REG;
5959 /* Unnamed floating parameters are passed in both the
5960 SSE and integer registers. */
5961 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5962 t2 = gen_rtx_REG (mode, regno);
5963 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5964 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5965 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5968 /* Handle aggregated types passed in register. */
5969 if (orig_mode == BLKmode)
5971 if (bytes > 0 && bytes <= 8)
5972 mode = (bytes > 4 ? DImode : SImode);
5973 if (mode == BLKmode)
5977 return gen_reg_or_parallel (mode, orig_mode, regno);
5981 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5982 tree type, int named)
5984 enum machine_mode mode = omode;
5985 HOST_WIDE_INT bytes, words;
5987 if (mode == BLKmode)
5988 bytes = int_size_in_bytes (type);
5990 bytes = GET_MODE_SIZE (mode);
5991 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5993 /* To simplify the code below, represent vector types with a vector mode
5994 even if MMX/SSE are not active. */
5995 if (type && TREE_CODE (type) == VECTOR_TYPE)
5996 mode = type_natural_mode (type, cum);
5998 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5999 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6000 else if (TARGET_64BIT)
6001 return function_arg_64 (cum, mode, omode, type, named);
6003 return function_arg_32 (cum, mode, omode, type, bytes, words);
6006 /* A C expression that indicates when an argument must be passed by
6007 reference. If nonzero for an argument, a copy of that argument is
6008 made in memory and a pointer to the argument is passed instead of
6009 the argument itself. The pointer is passed in whatever way is
6010 appropriate for passing a pointer to that type. */
6013 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6014 enum machine_mode mode ATTRIBUTE_UNUSED,
6015 const_tree type, bool named ATTRIBUTE_UNUSED)
6017 /* See Windows x64 Software Convention. */
6018 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6020 int msize = (int) GET_MODE_SIZE (mode);
6023 /* Arrays are passed by reference. */
6024 if (TREE_CODE (type) == ARRAY_TYPE)
6027 if (AGGREGATE_TYPE_P (type))
6029 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6030 are passed by reference. */
6031 msize = int_size_in_bytes (type);
6035 /* __m128 is passed by reference. */
6037 case 1: case 2: case 4: case 8:
6043 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6049 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6052 contains_aligned_value_p (tree type)
6054 enum machine_mode mode = TYPE_MODE (type);
6055 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6059 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6061 if (TYPE_ALIGN (type) < 128)
6064 if (AGGREGATE_TYPE_P (type))
6066 /* Walk the aggregates recursively. */
6067 switch (TREE_CODE (type))
6071 case QUAL_UNION_TYPE:
6075 /* Walk all the structure fields. */
6076 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6078 if (TREE_CODE (field) == FIELD_DECL
6079 && contains_aligned_value_p (TREE_TYPE (field)))
6086 /* Just for use if some languages passes arrays by value. */
6087 if (contains_aligned_value_p (TREE_TYPE (type)))
6098 /* Gives the alignment boundary, in bits, of an argument with the
6099 specified mode and type. */
6102 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6107 /* Since canonical type is used for call, we convert it to
6108 canonical type if needed. */
6109 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6110 type = TYPE_CANONICAL (type);
6111 align = TYPE_ALIGN (type);
6114 align = GET_MODE_ALIGNMENT (mode);
6115 if (align < PARM_BOUNDARY)
6116 align = PARM_BOUNDARY;
6117 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6118 natural boundaries. */
6119 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6121 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6122 make an exception for SSE modes since these require 128bit
6125 The handling here differs from field_alignment. ICC aligns MMX
6126 arguments to 4 byte boundaries, while structure fields are aligned
6127 to 8 byte boundaries. */
6130 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6131 align = PARM_BOUNDARY;
6135 if (!contains_aligned_value_p (type))
6136 align = PARM_BOUNDARY;
6139 if (align > BIGGEST_ALIGNMENT)
6140 align = BIGGEST_ALIGNMENT;
6144 /* Return true if N is a possible register number of function value. */
6147 ix86_function_value_regno_p (int regno)
6154 case FIRST_FLOAT_REG:
6155 /* TODO: The function should depend on current function ABI but
6156 builtins.c would need updating then. Therefore we use the
6158 if (TARGET_64BIT && ix86_abi == MS_ABI)
6160 return TARGET_FLOAT_RETURNS_IN_80387;
6166 if (TARGET_MACHO || TARGET_64BIT)
6174 /* Define how to find the value returned by a function.
6175 VALTYPE is the data type of the value (as a tree).
6176 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6177 otherwise, FUNC is 0. */
6180 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6181 const_tree fntype, const_tree fn)
6185 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6186 we normally prevent this case when mmx is not available. However
6187 some ABIs may require the result to be returned like DImode. */
6188 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6189 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6191 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6192 we prevent this case when sse is not available. However some ABIs
6193 may require the result to be returned like integer TImode. */
6194 else if (mode == TImode
6195 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6196 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6198 /* 32-byte vector modes in %ymm0. */
6199 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6200 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6202 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6203 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6204 regno = FIRST_FLOAT_REG;
6206 /* Most things go in %eax. */
6209 /* Override FP return register with %xmm0 for local functions when
6210 SSE math is enabled or for functions with sseregparm attribute. */
6211 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6213 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6214 if ((sse_level >= 1 && mode == SFmode)
6215 || (sse_level == 2 && mode == DFmode))
6216 regno = FIRST_SSE_REG;
6219 /* OImode shouldn't be used directly. */
6220 gcc_assert (mode != OImode);
6222 return gen_rtx_REG (orig_mode, regno);
6226 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6231 /* Handle libcalls, which don't provide a type node. */
6232 if (valtype == NULL)
6244 return gen_rtx_REG (mode, FIRST_SSE_REG);
6247 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6251 return gen_rtx_REG (mode, AX_REG);
6255 ret = construct_container (mode, orig_mode, valtype, 1,
6256 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6257 x86_64_int_return_registers, 0);
6259 /* For zero sized structures, construct_container returns NULL, but we
6260 need to keep rest of compiler happy by returning meaningful value. */
6262 ret = gen_rtx_REG (orig_mode, AX_REG);
6268 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6270 unsigned int regno = AX_REG;
6274 switch (GET_MODE_SIZE (mode))
6277 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6278 && !COMPLEX_MODE_P (mode))
6279 regno = FIRST_SSE_REG;
6283 if (mode == SFmode || mode == DFmode)
6284 regno = FIRST_SSE_REG;
6290 return gen_rtx_REG (orig_mode, regno);
6294 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6295 enum machine_mode orig_mode, enum machine_mode mode)
6297 const_tree fn, fntype;
6300 if (fntype_or_decl && DECL_P (fntype_or_decl))
6301 fn = fntype_or_decl;
6302 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6304 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6305 return function_value_ms_64 (orig_mode, mode);
6306 else if (TARGET_64BIT)
6307 return function_value_64 (orig_mode, mode, valtype);
6309 return function_value_32 (orig_mode, mode, fntype, fn);
6313 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6314 bool outgoing ATTRIBUTE_UNUSED)
6316 enum machine_mode mode, orig_mode;
6318 orig_mode = TYPE_MODE (valtype);
6319 mode = type_natural_mode (valtype, NULL);
6320 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6324 ix86_libcall_value (enum machine_mode mode)
6326 return ix86_function_value_1 (NULL, NULL, mode, mode);
6329 /* Return true iff type is returned in memory. */
6331 static int ATTRIBUTE_UNUSED
6332 return_in_memory_32 (const_tree type, enum machine_mode mode)
6336 if (mode == BLKmode)
6339 size = int_size_in_bytes (type);
6341 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6344 if (VECTOR_MODE_P (mode) || mode == TImode)
6346 /* User-created vectors small enough to fit in EAX. */
6350 /* MMX/3dNow values are returned in MM0,
6351 except when it doesn't exits. */
6353 return (TARGET_MMX ? 0 : 1);
6355 /* SSE values are returned in XMM0, except when it doesn't exist. */
6357 return (TARGET_SSE ? 0 : 1);
6359 /* AVX values are returned in YMM0, except when it doesn't exist. */
6361 return TARGET_AVX ? 0 : 1;
6370 /* OImode shouldn't be used directly. */
6371 gcc_assert (mode != OImode);
6376 static int ATTRIBUTE_UNUSED
6377 return_in_memory_64 (const_tree type, enum machine_mode mode)
6379 int needed_intregs, needed_sseregs;
6380 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6383 static int ATTRIBUTE_UNUSED
6384 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6386 HOST_WIDE_INT size = int_size_in_bytes (type);
6388 /* __m128 is returned in xmm0. */
6389 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6390 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6393 /* Otherwise, the size must be exactly in [1248]. */
6394 return (size != 1 && size != 2 && size != 4 && size != 8);
6398 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6400 #ifdef SUBTARGET_RETURN_IN_MEMORY
6401 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6403 const enum machine_mode mode = type_natural_mode (type, NULL);
6407 if (ix86_function_type_abi (fntype) == MS_ABI)
6408 return return_in_memory_ms_64 (type, mode);
6410 return return_in_memory_64 (type, mode);
6413 return return_in_memory_32 (type, mode);
6417 /* Return false iff TYPE is returned in memory. This version is used
6418 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6419 but differs notably in that when MMX is available, 8-byte vectors
6420 are returned in memory, rather than in MMX registers. */
6423 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6426 enum machine_mode mode = type_natural_mode (type, NULL);
6429 return return_in_memory_64 (type, mode);
6431 if (mode == BLKmode)
6434 size = int_size_in_bytes (type);
6436 if (VECTOR_MODE_P (mode))
6438 /* Return in memory only if MMX registers *are* available. This
6439 seems backwards, but it is consistent with the existing
6446 else if (mode == TImode)
6448 else if (mode == XFmode)
6454 /* When returning SSE vector types, we have a choice of either
6455 (1) being abi incompatible with a -march switch, or
6456 (2) generating an error.
6457 Given no good solution, I think the safest thing is one warning.
6458 The user won't be able to use -Werror, but....
6460 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6461 called in response to actually generating a caller or callee that
6462 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6463 via aggregate_value_p for general type probing from tree-ssa. */
6466 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6468 static bool warnedsse, warnedmmx;
6470 if (!TARGET_64BIT && type)
6472 /* Look at the return type of the function, not the function type. */
6473 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6475 if (!TARGET_SSE && !warnedsse)
6478 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6481 warning (0, "SSE vector return without SSE enabled "
6486 if (!TARGET_MMX && !warnedmmx)
6488 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6491 warning (0, "MMX vector return without MMX enabled "
6501 /* Create the va_list data type. */
6503 /* Returns the calling convention specific va_list date type.
6504 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6507 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6509 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6511 /* For i386 we use plain pointer to argument area. */
6512 if (!TARGET_64BIT || abi == MS_ABI)
6513 return build_pointer_type (char_type_node);
6515 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6516 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6518 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6519 unsigned_type_node);
6520 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6521 unsigned_type_node);
6522 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6524 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6527 va_list_gpr_counter_field = f_gpr;
6528 va_list_fpr_counter_field = f_fpr;
6530 DECL_FIELD_CONTEXT (f_gpr) = record;
6531 DECL_FIELD_CONTEXT (f_fpr) = record;
6532 DECL_FIELD_CONTEXT (f_ovf) = record;
6533 DECL_FIELD_CONTEXT (f_sav) = record;
6535 TREE_CHAIN (record) = type_decl;
6536 TYPE_NAME (record) = type_decl;
6537 TYPE_FIELDS (record) = f_gpr;
6538 TREE_CHAIN (f_gpr) = f_fpr;
6539 TREE_CHAIN (f_fpr) = f_ovf;
6540 TREE_CHAIN (f_ovf) = f_sav;
6542 layout_type (record);
6544 /* The correct type is an array type of one element. */
6545 return build_array_type (record, build_index_type (size_zero_node));
6548 /* Setup the builtin va_list data type and for 64-bit the additional
6549 calling convention specific va_list data types. */
6552 ix86_build_builtin_va_list (void)
6554 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6556 /* Initialize abi specific va_list builtin types. */
6560 if (ix86_abi == MS_ABI)
6562 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6563 if (TREE_CODE (t) != RECORD_TYPE)
6564 t = build_variant_type_copy (t);
6565 sysv_va_list_type_node = t;
6570 if (TREE_CODE (t) != RECORD_TYPE)
6571 t = build_variant_type_copy (t);
6572 sysv_va_list_type_node = t;
6574 if (ix86_abi != MS_ABI)
6576 t = ix86_build_builtin_va_list_abi (MS_ABI);
6577 if (TREE_CODE (t) != RECORD_TYPE)
6578 t = build_variant_type_copy (t);
6579 ms_va_list_type_node = t;
6584 if (TREE_CODE (t) != RECORD_TYPE)
6585 t = build_variant_type_copy (t);
6586 ms_va_list_type_node = t;
6593 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6596 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6605 int regparm = ix86_regparm;
6607 if (cum->call_abi != ix86_abi)
6608 regparm = (ix86_abi != SYSV_ABI
6609 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6611 /* GPR size of varargs save area. */
6612 if (cfun->va_list_gpr_size)
6613 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6615 ix86_varargs_gpr_size = 0;
6617 /* FPR size of varargs save area. We don't need it if we don't pass
6618 anything in SSE registers. */
6619 if (cum->sse_nregs && cfun->va_list_fpr_size)
6620 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6622 ix86_varargs_fpr_size = 0;
6624 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6627 save_area = frame_pointer_rtx;
6628 set = get_varargs_alias_set ();
6630 for (i = cum->regno;
6632 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6635 mem = gen_rtx_MEM (Pmode,
6636 plus_constant (save_area, i * UNITS_PER_WORD));
6637 MEM_NOTRAP_P (mem) = 1;
6638 set_mem_alias_set (mem, set);
6639 emit_move_insn (mem, gen_rtx_REG (Pmode,
6640 x86_64_int_parameter_registers[i]));
6643 if (ix86_varargs_fpr_size)
6645 /* Now emit code to save SSE registers. The AX parameter contains number
6646 of SSE parameter registers used to call this function. We use
6647 sse_prologue_save insn template that produces computed jump across
6648 SSE saves. We need some preparation work to get this working. */
6650 label = gen_label_rtx ();
6651 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6653 /* Compute address to jump to :
6654 label - eax*4 + nnamed_sse_arguments*4 Or
6655 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6656 tmp_reg = gen_reg_rtx (Pmode);
6657 nsse_reg = gen_reg_rtx (Pmode);
6658 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6659 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6660 gen_rtx_MULT (Pmode, nsse_reg,
6663 /* vmovaps is one byte longer than movaps. */
6665 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6666 gen_rtx_PLUS (Pmode, tmp_reg,
6672 gen_rtx_CONST (DImode,
6673 gen_rtx_PLUS (DImode,
6675 GEN_INT (cum->sse_regno
6676 * (TARGET_AVX ? 5 : 4)))));
6678 emit_move_insn (nsse_reg, label_ref);
6679 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6681 /* Compute address of memory block we save into. We always use pointer
6682 pointing 127 bytes after first byte to store - this is needed to keep
6683 instruction size limited by 4 bytes (5 bytes for AVX) with one
6684 byte displacement. */
6685 tmp_reg = gen_reg_rtx (Pmode);
6686 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6687 plus_constant (save_area,
6688 ix86_varargs_gpr_size + 127)));
6689 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6690 MEM_NOTRAP_P (mem) = 1;
6691 set_mem_alias_set (mem, set);
6692 set_mem_align (mem, BITS_PER_WORD);
6694 /* And finally do the dirty job! */
6695 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6696 GEN_INT (cum->sse_regno), label));
6701 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6703 alias_set_type set = get_varargs_alias_set ();
6706 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
6710 mem = gen_rtx_MEM (Pmode,
6711 plus_constant (virtual_incoming_args_rtx,
6712 i * UNITS_PER_WORD));
6713 MEM_NOTRAP_P (mem) = 1;
6714 set_mem_alias_set (mem, set);
6716 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6717 emit_move_insn (mem, reg);
6722 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6723 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6726 CUMULATIVE_ARGS next_cum;
6729 /* This argument doesn't appear to be used anymore. Which is good,
6730 because the old code here didn't suppress rtl generation. */
6731 gcc_assert (!no_rtl);
6736 fntype = TREE_TYPE (current_function_decl);
6738 /* For varargs, we do not want to skip the dummy va_dcl argument.
6739 For stdargs, we do want to skip the last named argument. */
6741 if (stdarg_p (fntype))
6742 function_arg_advance (&next_cum, mode, type, 1);
6744 if (cum->call_abi == MS_ABI)
6745 setup_incoming_varargs_ms_64 (&next_cum);
6747 setup_incoming_varargs_64 (&next_cum);
6750 /* Checks if TYPE is of kind va_list char *. */
6753 is_va_list_char_pointer (tree type)
6757 /* For 32-bit it is always true. */
6760 canonic = ix86_canonical_va_list_type (type);
6761 return (canonic == ms_va_list_type_node
6762 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6765 /* Implement va_start. */
6768 ix86_va_start (tree valist, rtx nextarg)
6770 HOST_WIDE_INT words, n_gpr, n_fpr;
6771 tree f_gpr, f_fpr, f_ovf, f_sav;
6772 tree gpr, fpr, ovf, sav, t;
6775 /* Only 64bit target needs something special. */
6776 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6778 std_expand_builtin_va_start (valist, nextarg);
6782 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6783 f_fpr = TREE_CHAIN (f_gpr);
6784 f_ovf = TREE_CHAIN (f_fpr);
6785 f_sav = TREE_CHAIN (f_ovf);
6787 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6788 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6789 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6790 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6791 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6793 /* Count number of gp and fp argument registers used. */
6794 words = crtl->args.info.words;
6795 n_gpr = crtl->args.info.regno;
6796 n_fpr = crtl->args.info.sse_regno;
6798 if (cfun->va_list_gpr_size)
6800 type = TREE_TYPE (gpr);
6801 t = build2 (MODIFY_EXPR, type,
6802 gpr, build_int_cst (type, n_gpr * 8));
6803 TREE_SIDE_EFFECTS (t) = 1;
6804 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6807 if (TARGET_SSE && cfun->va_list_fpr_size)
6809 type = TREE_TYPE (fpr);
6810 t = build2 (MODIFY_EXPR, type, fpr,
6811 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6812 TREE_SIDE_EFFECTS (t) = 1;
6813 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6816 /* Find the overflow area. */
6817 type = TREE_TYPE (ovf);
6818 t = make_tree (type, crtl->args.internal_arg_pointer);
6820 t = build2 (POINTER_PLUS_EXPR, type, t,
6821 size_int (words * UNITS_PER_WORD));
6822 t = build2 (MODIFY_EXPR, type, ovf, t);
6823 TREE_SIDE_EFFECTS (t) = 1;
6824 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6826 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6828 /* Find the register save area.
6829 Prologue of the function save it right above stack frame. */
6830 type = TREE_TYPE (sav);
6831 t = make_tree (type, frame_pointer_rtx);
6832 if (!ix86_varargs_gpr_size)
6833 t = build2 (POINTER_PLUS_EXPR, type, t,
6834 size_int (-8 * X86_64_REGPARM_MAX));
6835 t = build2 (MODIFY_EXPR, type, sav, t);
6836 TREE_SIDE_EFFECTS (t) = 1;
6837 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6841 /* Implement va_arg. */
6844 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6847 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6848 tree f_gpr, f_fpr, f_ovf, f_sav;
6849 tree gpr, fpr, ovf, sav, t;
6851 tree lab_false, lab_over = NULL_TREE;
6856 enum machine_mode nat_mode;
6859 /* Only 64bit target needs something special. */
6860 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6861 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6863 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6864 f_fpr = TREE_CHAIN (f_gpr);
6865 f_ovf = TREE_CHAIN (f_fpr);
6866 f_sav = TREE_CHAIN (f_ovf);
6868 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6869 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6870 valist = build_va_arg_indirect_ref (valist);
6871 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6872 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6873 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6875 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6877 type = build_pointer_type (type);
6878 size = int_size_in_bytes (type);
6879 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6881 nat_mode = type_natural_mode (type, NULL);
6890 /* Unnamed 256bit vector mode parameters are passed on stack. */
6891 if (ix86_cfun_abi () == SYSV_ABI)
6898 container = construct_container (nat_mode, TYPE_MODE (type),
6899 type, 0, X86_64_REGPARM_MAX,
6900 X86_64_SSE_REGPARM_MAX, intreg,
6905 /* Pull the value out of the saved registers. */
6907 addr = create_tmp_var (ptr_type_node, "addr");
6908 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6912 int needed_intregs, needed_sseregs;
6914 tree int_addr, sse_addr;
6916 lab_false = create_artificial_label ();
6917 lab_over = create_artificial_label ();
6919 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6921 need_temp = (!REG_P (container)
6922 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6923 || TYPE_ALIGN (type) > 128));
6925 /* In case we are passing structure, verify that it is consecutive block
6926 on the register save area. If not we need to do moves. */
6927 if (!need_temp && !REG_P (container))
6929 /* Verify that all registers are strictly consecutive */
6930 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6934 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6936 rtx slot = XVECEXP (container, 0, i);
6937 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6938 || INTVAL (XEXP (slot, 1)) != i * 16)
6946 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6948 rtx slot = XVECEXP (container, 0, i);
6949 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6950 || INTVAL (XEXP (slot, 1)) != i * 8)
6962 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6963 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6964 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6965 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6968 /* First ensure that we fit completely in registers. */
6971 t = build_int_cst (TREE_TYPE (gpr),
6972 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6973 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6974 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6975 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6976 gimplify_and_add (t, pre_p);
6980 t = build_int_cst (TREE_TYPE (fpr),
6981 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6982 + X86_64_REGPARM_MAX * 8);
6983 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6984 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6985 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6986 gimplify_and_add (t, pre_p);
6989 /* Compute index to start of area used for integer regs. */
6992 /* int_addr = gpr + sav; */
6993 t = fold_convert (sizetype, gpr);
6994 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6995 gimplify_assign (int_addr, t, pre_p);
6999 /* sse_addr = fpr + sav; */
7000 t = fold_convert (sizetype, fpr);
7001 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7002 gimplify_assign (sse_addr, t, pre_p);
7007 tree temp = create_tmp_var (type, "va_arg_tmp");
7010 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7011 gimplify_assign (addr, t, pre_p);
7013 for (i = 0; i < XVECLEN (container, 0); i++)
7015 rtx slot = XVECEXP (container, 0, i);
7016 rtx reg = XEXP (slot, 0);
7017 enum machine_mode mode = GET_MODE (reg);
7018 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7019 tree addr_type = build_pointer_type (piece_type);
7020 tree daddr_type = build_pointer_type_for_mode (piece_type,
7024 tree dest_addr, dest;
7026 if (SSE_REGNO_P (REGNO (reg)))
7028 src_addr = sse_addr;
7029 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7033 src_addr = int_addr;
7034 src_offset = REGNO (reg) * 8;
7036 src_addr = fold_convert (addr_type, src_addr);
7037 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7038 size_int (src_offset));
7039 src = build_va_arg_indirect_ref (src_addr);
7041 dest_addr = fold_convert (daddr_type, addr);
7042 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7043 size_int (INTVAL (XEXP (slot, 1))));
7044 dest = build_va_arg_indirect_ref (dest_addr);
7046 gimplify_assign (dest, src, pre_p);
7052 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7053 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7054 gimplify_assign (gpr, t, pre_p);
7059 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7060 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7061 gimplify_assign (fpr, t, pre_p);
7064 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7066 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7069 /* ... otherwise out of the overflow area. */
7071 /* When we align parameter on stack for caller, if the parameter
7072 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7073 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7074 here with caller. */
7075 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7076 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7077 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7079 /* Care for on-stack alignment if needed. */
7080 if (arg_boundary <= 64
7081 || integer_zerop (TYPE_SIZE (type)))
7085 HOST_WIDE_INT align = arg_boundary / 8;
7086 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7087 size_int (align - 1));
7088 t = fold_convert (sizetype, t);
7089 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7091 t = fold_convert (TREE_TYPE (ovf), t);
7093 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7094 gimplify_assign (addr, t, pre_p);
7096 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7097 size_int (rsize * UNITS_PER_WORD));
7098 gimplify_assign (unshare_expr (ovf), t, pre_p);
7101 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7103 ptrtype = build_pointer_type (type);
7104 addr = fold_convert (ptrtype, addr);
7107 addr = build_va_arg_indirect_ref (addr);
7108 return build_va_arg_indirect_ref (addr);
7111 /* Return nonzero if OPNUM's MEM should be matched
7112 in movabs* patterns. */
7115 ix86_check_movabs (rtx insn, int opnum)
7119 set = PATTERN (insn);
7120 if (GET_CODE (set) == PARALLEL)
7121 set = XVECEXP (set, 0, 0);
7122 gcc_assert (GET_CODE (set) == SET);
7123 mem = XEXP (set, opnum);
7124 while (GET_CODE (mem) == SUBREG)
7125 mem = SUBREG_REG (mem);
7126 gcc_assert (MEM_P (mem));
7127 return (volatile_ok || !MEM_VOLATILE_P (mem));
7130 /* Initialize the table of extra 80387 mathematical constants. */
7133 init_ext_80387_constants (void)
7135 static const char * cst[5] =
7137 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7138 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7139 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7140 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7141 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7145 for (i = 0; i < 5; i++)
7147 real_from_string (&ext_80387_constants_table[i], cst[i]);
7148 /* Ensure each constant is rounded to XFmode precision. */
7149 real_convert (&ext_80387_constants_table[i],
7150 XFmode, &ext_80387_constants_table[i]);
7153 ext_80387_constants_init = 1;
7156 /* Return true if the constant is something that can be loaded with
7157 a special instruction. */
7160 standard_80387_constant_p (rtx x)
7162 enum machine_mode mode = GET_MODE (x);
7166 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7169 if (x == CONST0_RTX (mode))
7171 if (x == CONST1_RTX (mode))
7174 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7176 /* For XFmode constants, try to find a special 80387 instruction when
7177 optimizing for size or on those CPUs that benefit from them. */
7179 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7183 if (! ext_80387_constants_init)
7184 init_ext_80387_constants ();
7186 for (i = 0; i < 5; i++)
7187 if (real_identical (&r, &ext_80387_constants_table[i]))
7191 /* Load of the constant -0.0 or -1.0 will be split as
7192 fldz;fchs or fld1;fchs sequence. */
7193 if (real_isnegzero (&r))
7195 if (real_identical (&r, &dconstm1))
7201 /* Return the opcode of the special instruction to be used to load
7205 standard_80387_constant_opcode (rtx x)
7207 switch (standard_80387_constant_p (x))
7231 /* Return the CONST_DOUBLE representing the 80387 constant that is
7232 loaded by the specified special instruction. The argument IDX
7233 matches the return value from standard_80387_constant_p. */
7236 standard_80387_constant_rtx (int idx)
7240 if (! ext_80387_constants_init)
7241 init_ext_80387_constants ();
7257 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7261 /* Return 1 if mode is a valid mode for sse. */
7263 standard_sse_mode_p (enum machine_mode mode)
7280 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7281 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7282 modes and AVX is enabled. */
7285 standard_sse_constant_p (rtx x)
7287 enum machine_mode mode = GET_MODE (x);
7289 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7291 if (vector_all_ones_operand (x, mode))
7293 if (standard_sse_mode_p (mode))
7294 return TARGET_SSE2 ? 2 : -2;
7295 else if (VALID_AVX256_REG_MODE (mode))
7296 return TARGET_AVX ? 3 : -3;
7302 /* Return the opcode of the special instruction to be used to load
7306 standard_sse_constant_opcode (rtx insn, rtx x)
7308 switch (standard_sse_constant_p (x))
7311 switch (get_attr_mode (insn))
7314 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7316 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7318 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7320 return "vxorps\t%x0, %x0, %x0";
7322 return "vxorpd\t%x0, %x0, %x0";
7324 return "vpxor\t%x0, %x0, %x0";
7330 switch (get_attr_mode (insn))
7335 return "vpcmpeqd\t%0, %0, %0";
7341 return "pcmpeqd\t%0, %0";
7346 /* Returns 1 if OP contains a symbol reference */
7349 symbolic_reference_mentioned_p (rtx op)
7354 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7357 fmt = GET_RTX_FORMAT (GET_CODE (op));
7358 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7364 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7365 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7369 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7376 /* Return 1 if it is appropriate to emit `ret' instructions in the
7377 body of a function. Do this only if the epilogue is simple, needing a
7378 couple of insns. Prior to reloading, we can't tell how many registers
7379 must be saved, so return 0 then. Return 0 if there is no frame
7380 marker to de-allocate. */
7383 ix86_can_use_return_insn_p (void)
7385 struct ix86_frame frame;
7387 if (! reload_completed || frame_pointer_needed)
7390 /* Don't allow more than 32 pop, since that's all we can do
7391 with one instruction. */
7392 if (crtl->args.pops_args
7393 && crtl->args.size >= 32768)
7396 ix86_compute_frame_layout (&frame);
7397 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7400 /* Value should be nonzero if functions must have frame pointers.
7401 Zero means the frame pointer need not be set up (and parms may
7402 be accessed via the stack pointer) in functions that seem suitable. */
7405 ix86_frame_pointer_required (void)
7407 /* If we accessed previous frames, then the generated code expects
7408 to be able to access the saved ebp value in our frame. */
7409 if (cfun->machine->accesses_prev_frame)
7412 /* Several x86 os'es need a frame pointer for other reasons,
7413 usually pertaining to setjmp. */
7414 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7417 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7418 the frame pointer by default. Turn it back on now if we've not
7419 got a leaf function. */
7420 if (TARGET_OMIT_LEAF_FRAME_POINTER
7421 && (!current_function_is_leaf
7422 || ix86_current_function_calls_tls_descriptor))
7431 /* Record that the current function accesses previous call frames. */
7434 ix86_setup_frame_addresses (void)
7436 cfun->machine->accesses_prev_frame = 1;
7439 #ifndef USE_HIDDEN_LINKONCE
7440 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7441 # define USE_HIDDEN_LINKONCE 1
7443 # define USE_HIDDEN_LINKONCE 0
7447 static int pic_labels_used;
7449 /* Fills in the label name that should be used for a pc thunk for
7450 the given register. */
7453 get_pc_thunk_name (char name[32], unsigned int regno)
7455 gcc_assert (!TARGET_64BIT);
7457 if (USE_HIDDEN_LINKONCE)
7458 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7460 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7464 /* This function generates code for -fpic that loads %ebx with
7465 the return address of the caller and then returns. */
7468 ix86_file_end (void)
7473 for (regno = 0; regno < 8; ++regno)
7477 if (! ((pic_labels_used >> regno) & 1))
7480 get_pc_thunk_name (name, regno);
7485 switch_to_section (darwin_sections[text_coal_section]);
7486 fputs ("\t.weak_definition\t", asm_out_file);
7487 assemble_name (asm_out_file, name);
7488 fputs ("\n\t.private_extern\t", asm_out_file);
7489 assemble_name (asm_out_file, name);
7490 fputs ("\n", asm_out_file);
7491 ASM_OUTPUT_LABEL (asm_out_file, name);
7495 if (USE_HIDDEN_LINKONCE)
7499 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7501 TREE_PUBLIC (decl) = 1;
7502 TREE_STATIC (decl) = 1;
7503 DECL_ONE_ONLY (decl) = 1;
7505 (*targetm.asm_out.unique_section) (decl, 0);
7506 switch_to_section (get_named_section (decl, NULL, 0));
7508 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7509 fputs ("\t.hidden\t", asm_out_file);
7510 assemble_name (asm_out_file, name);
7511 fputc ('\n', asm_out_file);
7512 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7516 switch_to_section (text_section);
7517 ASM_OUTPUT_LABEL (asm_out_file, name);
7520 xops[0] = gen_rtx_REG (Pmode, regno);
7521 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7522 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7523 output_asm_insn ("ret", xops);
7526 if (NEED_INDICATE_EXEC_STACK)
7527 file_end_indicate_exec_stack ();
7530 /* Emit code for the SET_GOT patterns. */
7533 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7539 if (TARGET_VXWORKS_RTP && flag_pic)
7541 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7542 xops[2] = gen_rtx_MEM (Pmode,
7543 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7544 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7546 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7547 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7548 an unadorned address. */
7549 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7550 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7551 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7555 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7557 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7559 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7562 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7564 output_asm_insn ("call\t%a2", xops);
7567 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7568 is what will be referenced by the Mach-O PIC subsystem. */
7570 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7573 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7574 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7577 output_asm_insn ("pop%z0\t%0", xops);
7582 get_pc_thunk_name (name, REGNO (dest));
7583 pic_labels_used |= 1 << REGNO (dest);
7585 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7586 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7587 output_asm_insn ("call\t%X2", xops);
7588 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7589 is what will be referenced by the Mach-O PIC subsystem. */
7592 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7594 targetm.asm_out.internal_label (asm_out_file, "L",
7595 CODE_LABEL_NUMBER (label));
7602 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7603 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7605 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7610 /* Generate an "push" pattern for input ARG. */
7615 if (ix86_cfa_state->reg == stack_pointer_rtx)
7616 ix86_cfa_state->offset += UNITS_PER_WORD;
7618 return gen_rtx_SET (VOIDmode,
7620 gen_rtx_PRE_DEC (Pmode,
7621 stack_pointer_rtx)),
7625 /* Return >= 0 if there is an unused call-clobbered register available
7626 for the entire function. */
7629 ix86_select_alt_pic_regnum (void)
7631 if (current_function_is_leaf && !crtl->profile
7632 && !ix86_current_function_calls_tls_descriptor)
7635 /* Can't use the same register for both PIC and DRAP. */
7637 drap = REGNO (crtl->drap_reg);
7640 for (i = 2; i >= 0; --i)
7641 if (i != drap && !df_regs_ever_live_p (i))
7645 return INVALID_REGNUM;
7648 /* Return 1 if we need to save REGNO. */
7650 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7652 if (pic_offset_table_rtx
7653 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7654 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7656 || crtl->calls_eh_return
7657 || crtl->uses_const_pool))
7659 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7664 if (crtl->calls_eh_return && maybe_eh_return)
7669 unsigned test = EH_RETURN_DATA_REGNO (i);
7670 if (test == INVALID_REGNUM)
7677 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
7680 return (df_regs_ever_live_p (regno)
7681 && !call_used_regs[regno]
7682 && !fixed_regs[regno]
7683 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7686 /* Return number of saved general prupose registers. */
7689 ix86_nsaved_regs (void)
7694 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7695 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7700 /* Return number of saved SSE registrers. */
7703 ix86_nsaved_sseregs (void)
7708 if (ix86_cfun_abi () != MS_ABI)
7710 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7711 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7716 /* Given FROM and TO register numbers, say whether this elimination is
7717 allowed. If stack alignment is needed, we can only replace argument
7718 pointer with hard frame pointer, or replace frame pointer with stack
7719 pointer. Otherwise, frame pointer elimination is automatically
7720 handled and all other eliminations are valid. */
7723 ix86_can_eliminate (int from, int to)
7725 if (stack_realign_fp)
7726 return ((from == ARG_POINTER_REGNUM
7727 && to == HARD_FRAME_POINTER_REGNUM)
7728 || (from == FRAME_POINTER_REGNUM
7729 && to == STACK_POINTER_REGNUM));
7731 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7734 /* Return the offset between two registers, one to be eliminated, and the other
7735 its replacement, at the start of a routine. */
7738 ix86_initial_elimination_offset (int from, int to)
7740 struct ix86_frame frame;
7741 ix86_compute_frame_layout (&frame);
7743 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7744 return frame.hard_frame_pointer_offset;
7745 else if (from == FRAME_POINTER_REGNUM
7746 && to == HARD_FRAME_POINTER_REGNUM)
7747 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7750 gcc_assert (to == STACK_POINTER_REGNUM);
7752 if (from == ARG_POINTER_REGNUM)
7753 return frame.stack_pointer_offset;
7755 gcc_assert (from == FRAME_POINTER_REGNUM);
7756 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7760 /* In a dynamically-aligned function, we can't know the offset from
7761 stack pointer to frame pointer, so we must ensure that setjmp
7762 eliminates fp against the hard fp (%ebp) rather than trying to
7763 index from %esp up to the top of the frame across a gap that is
7764 of unknown (at compile-time) size. */
7766 ix86_builtin_setjmp_frame_value (void)
7768 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7771 /* Fill structure ix86_frame about frame of currently computed function. */
7774 ix86_compute_frame_layout (struct ix86_frame *frame)
7776 HOST_WIDE_INT total_size;
7777 unsigned int stack_alignment_needed;
7778 HOST_WIDE_INT offset;
7779 unsigned int preferred_alignment;
7780 HOST_WIDE_INT size = get_frame_size ();
7782 frame->nregs = ix86_nsaved_regs ();
7783 frame->nsseregs = ix86_nsaved_sseregs ();
7786 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7787 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7789 /* MS ABI seem to require stack alignment to be always 16 except for function
7791 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7793 preferred_alignment = 16;
7794 stack_alignment_needed = 16;
7795 crtl->preferred_stack_boundary = 128;
7796 crtl->stack_alignment_needed = 128;
7799 gcc_assert (!size || stack_alignment_needed);
7800 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7801 gcc_assert (preferred_alignment <= stack_alignment_needed);
7803 /* During reload iteration the amount of registers saved can change.
7804 Recompute the value as needed. Do not recompute when amount of registers
7805 didn't change as reload does multiple calls to the function and does not
7806 expect the decision to change within single iteration. */
7807 if (!optimize_function_for_size_p (cfun)
7808 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7810 int count = frame->nregs;
7812 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7813 /* The fast prologue uses move instead of push to save registers. This
7814 is significantly longer, but also executes faster as modern hardware
7815 can execute the moves in parallel, but can't do that for push/pop.
7817 Be careful about choosing what prologue to emit: When function takes
7818 many instructions to execute we may use slow version as well as in
7819 case function is known to be outside hot spot (this is known with
7820 feedback only). Weight the size of function by number of registers
7821 to save as it is cheap to use one or two push instructions but very
7822 slow to use many of them. */
7824 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7825 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7826 || (flag_branch_probabilities
7827 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7828 cfun->machine->use_fast_prologue_epilogue = false;
7830 cfun->machine->use_fast_prologue_epilogue
7831 = !expensive_function_p (count);
7833 if (TARGET_PROLOGUE_USING_MOVE
7834 && cfun->machine->use_fast_prologue_epilogue)
7835 frame->save_regs_using_mov = true;
7837 frame->save_regs_using_mov = false;
7840 /* Skip return address and saved base pointer. */
7841 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7843 frame->hard_frame_pointer_offset = offset;
7845 /* Set offset to aligned because the realigned frame starts from
7847 if (stack_realign_fp)
7848 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7850 /* Register save area */
7851 offset += frame->nregs * UNITS_PER_WORD;
7853 /* Align SSE reg save area. */
7854 if (frame->nsseregs)
7855 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7857 frame->padding0 = 0;
7859 /* SSE register save area. */
7860 offset += frame->padding0 + frame->nsseregs * 16;
7863 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7864 offset += frame->va_arg_size;
7866 /* Align start of frame for local function. */
7867 frame->padding1 = ((offset + stack_alignment_needed - 1)
7868 & -stack_alignment_needed) - offset;
7870 offset += frame->padding1;
7872 /* Frame pointer points here. */
7873 frame->frame_pointer_offset = offset;
7877 /* Add outgoing arguments area. Can be skipped if we eliminated
7878 all the function calls as dead code.
7879 Skipping is however impossible when function calls alloca. Alloca
7880 expander assumes that last crtl->outgoing_args_size
7881 of stack frame are unused. */
7882 if (ACCUMULATE_OUTGOING_ARGS
7883 && (!current_function_is_leaf || cfun->calls_alloca
7884 || ix86_current_function_calls_tls_descriptor))
7886 offset += crtl->outgoing_args_size;
7887 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7890 frame->outgoing_arguments_size = 0;
7892 /* Align stack boundary. Only needed if we're calling another function
7894 if (!current_function_is_leaf || cfun->calls_alloca
7895 || ix86_current_function_calls_tls_descriptor)
7896 frame->padding2 = ((offset + preferred_alignment - 1)
7897 & -preferred_alignment) - offset;
7899 frame->padding2 = 0;
7901 offset += frame->padding2;
7903 /* We've reached end of stack frame. */
7904 frame->stack_pointer_offset = offset;
7906 /* Size prologue needs to allocate. */
7907 frame->to_allocate =
7908 (size + frame->padding1 + frame->padding2
7909 + frame->outgoing_arguments_size + frame->va_arg_size);
7911 if ((!frame->to_allocate && frame->nregs <= 1)
7912 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7913 frame->save_regs_using_mov = false;
7915 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7916 && current_function_is_leaf
7917 && !ix86_current_function_calls_tls_descriptor)
7919 frame->red_zone_size = frame->to_allocate;
7920 if (frame->save_regs_using_mov)
7921 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7922 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7923 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7926 frame->red_zone_size = 0;
7927 frame->to_allocate -= frame->red_zone_size;
7928 frame->stack_pointer_offset -= frame->red_zone_size;
7930 fprintf (stderr, "\n");
7931 fprintf (stderr, "size: %ld\n", (long)size);
7932 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7933 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7934 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7935 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7936 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7937 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7938 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7939 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7940 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7941 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7942 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7943 (long)frame->hard_frame_pointer_offset);
7944 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7945 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7946 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7947 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7951 /* Emit code to save registers in the prologue. */
7954 ix86_emit_save_regs (void)
7959 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7960 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7962 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7963 RTX_FRAME_RELATED_P (insn) = 1;
7967 /* Emit code to save registers using MOV insns. First register
7968 is restored from POINTER + OFFSET. */
7970 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7975 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7976 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7978 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7980 gen_rtx_REG (Pmode, regno));
7981 RTX_FRAME_RELATED_P (insn) = 1;
7982 offset += UNITS_PER_WORD;
7986 /* Emit code to save registers using MOV insns. First register
7987 is restored from POINTER + OFFSET. */
7989 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7995 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7996 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7998 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7999 set_mem_align (mem, 128);
8000 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8001 RTX_FRAME_RELATED_P (insn) = 1;
8006 static GTY(()) rtx queued_cfa_restores;
8008 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8009 manipulation insn. Don't add it if the previously
8010 saved value will be left untouched within stack red-zone till return,
8011 as unwinders can find the same value in the register and
8015 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8018 && !TARGET_64BIT_MS_ABI
8019 && red_offset + RED_ZONE_SIZE >= 0
8020 && crtl->args.pops_args < 65536)
8025 add_reg_note (insn, REG_CFA_RESTORE, reg);
8026 RTX_FRAME_RELATED_P (insn) = 1;
8030 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8033 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8036 ix86_add_queued_cfa_restore_notes (rtx insn)
8039 if (!queued_cfa_restores)
8041 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8043 XEXP (last, 1) = REG_NOTES (insn);
8044 REG_NOTES (insn) = queued_cfa_restores;
8045 queued_cfa_restores = NULL_RTX;
8046 RTX_FRAME_RELATED_P (insn) = 1;
8049 /* Expand prologue or epilogue stack adjustment.
8050 The pattern exist to put a dependency on all ebp-based memory accesses.
8051 STYLE should be negative if instructions should be marked as frame related,
8052 zero if %r11 register is live and cannot be freely used and positive
8056 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8057 int style, bool set_cfa)
8062 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8063 else if (x86_64_immediate_operand (offset, DImode))
8064 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8068 /* r11 is used by indirect sibcall return as well, set before the
8069 epilogue and used after the epilogue. ATM indirect sibcall
8070 shouldn't be used together with huge frame sizes in one
8071 function because of the frame_size check in sibcall.c. */
8073 r11 = gen_rtx_REG (DImode, R11_REG);
8074 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8076 RTX_FRAME_RELATED_P (insn) = 1;
8077 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8082 ix86_add_queued_cfa_restore_notes (insn);
8088 gcc_assert (ix86_cfa_state->reg == src);
8089 ix86_cfa_state->offset += INTVAL (offset);
8090 ix86_cfa_state->reg = dest;
8092 r = gen_rtx_PLUS (Pmode, src, offset);
8093 r = gen_rtx_SET (VOIDmode, dest, r);
8094 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8095 RTX_FRAME_RELATED_P (insn) = 1;
8098 RTX_FRAME_RELATED_P (insn) = 1;
8101 /* Find an available register to be used as dynamic realign argument
8102 pointer regsiter. Such a register will be written in prologue and
8103 used in begin of body, so it must not be
8104 1. parameter passing register.
8106 We reuse static-chain register if it is available. Otherwise, we
8107 use DI for i386 and R13 for x86-64. We chose R13 since it has
8110 Return: the regno of chosen register. */
8113 find_drap_reg (void)
8115 tree decl = cfun->decl;
8119 /* Use R13 for nested function or function need static chain.
8120 Since function with tail call may use any caller-saved
8121 registers in epilogue, DRAP must not use caller-saved
8122 register in such case. */
8123 if ((decl_function_context (decl)
8124 && !DECL_NO_STATIC_CHAIN (decl))
8125 || crtl->tail_call_emit)
8132 /* Use DI for nested function or function need static chain.
8133 Since function with tail call may use any caller-saved
8134 registers in epilogue, DRAP must not use caller-saved
8135 register in such case. */
8136 if ((decl_function_context (decl)
8137 && !DECL_NO_STATIC_CHAIN (decl))
8138 || crtl->tail_call_emit)
8141 /* Reuse static chain register if it isn't used for parameter
8143 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8144 && !lookup_attribute ("fastcall",
8145 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8152 /* Update incoming stack boundary and estimated stack alignment. */
8155 ix86_update_stack_boundary (void)
8157 /* Prefer the one specified at command line. */
8158 ix86_incoming_stack_boundary
8159 = (ix86_user_incoming_stack_boundary
8160 ? ix86_user_incoming_stack_boundary
8161 : ix86_default_incoming_stack_boundary);
8163 /* Incoming stack alignment can be changed on individual functions
8164 via force_align_arg_pointer attribute. We use the smallest
8165 incoming stack boundary. */
8166 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8167 && lookup_attribute (ix86_force_align_arg_pointer_string,
8168 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8169 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8171 /* The incoming stack frame has to be aligned at least at
8172 parm_stack_boundary. */
8173 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8174 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8176 /* Stack at entrance of main is aligned by runtime. We use the
8177 smallest incoming stack boundary. */
8178 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8179 && DECL_NAME (current_function_decl)
8180 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8181 && DECL_FILE_SCOPE_P (current_function_decl))
8182 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8184 /* x86_64 vararg needs 16byte stack alignment for register save
8188 && crtl->stack_alignment_estimated < 128)
8189 crtl->stack_alignment_estimated = 128;
8192 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8193 needed or an rtx for DRAP otherwise. */
8196 ix86_get_drap_rtx (void)
8198 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8199 crtl->need_drap = true;
8201 if (stack_realign_drap)
8203 /* Assign DRAP to vDRAP and returns vDRAP */
8204 unsigned int regno = find_drap_reg ();
8209 arg_ptr = gen_rtx_REG (Pmode, regno);
8210 crtl->drap_reg = arg_ptr;
8213 drap_vreg = copy_to_reg (arg_ptr);
8217 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8218 RTX_FRAME_RELATED_P (insn) = 1;
8225 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8228 ix86_internal_arg_pointer (void)
8230 return virtual_incoming_args_rtx;
8233 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8234 to be generated in correct form. */
8236 ix86_finalize_stack_realign_flags (void)
8238 /* Check if stack realign is really needed after reload, and
8239 stores result in cfun */
8240 unsigned int incoming_stack_boundary
8241 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8242 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8243 unsigned int stack_realign = (incoming_stack_boundary
8244 < (current_function_is_leaf
8245 ? crtl->max_used_stack_slot_alignment
8246 : crtl->stack_alignment_needed));
8248 if (crtl->stack_realign_finalized)
8250 /* After stack_realign_needed is finalized, we can't no longer
8252 gcc_assert (crtl->stack_realign_needed == stack_realign);
8256 crtl->stack_realign_needed = stack_realign;
8257 crtl->stack_realign_finalized = true;
8261 /* Expand the prologue into a bunch of separate insns. */
8264 ix86_expand_prologue (void)
8268 struct ix86_frame frame;
8269 HOST_WIDE_INT allocate;
8271 ix86_finalize_stack_realign_flags ();
8273 /* DRAP should not coexist with stack_realign_fp */
8274 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8276 /* Initialize CFA state for before the prologue. */
8277 ix86_cfa_state->reg = stack_pointer_rtx;
8278 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
8280 ix86_compute_frame_layout (&frame);
8282 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8283 of DRAP is needed and stack realignment is really needed after reload */
8284 if (crtl->drap_reg && crtl->stack_realign_needed)
8287 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8288 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8289 ? 0 : UNITS_PER_WORD);
8291 gcc_assert (stack_realign_drap);
8293 /* Grab the argument pointer. */
8294 x = plus_constant (stack_pointer_rtx,
8295 (UNITS_PER_WORD + param_ptr_offset));
8298 /* Only need to push parameter pointer reg if it is caller
8300 if (!call_used_regs[REGNO (crtl->drap_reg)])
8302 /* Push arg pointer reg */
8303 insn = emit_insn (gen_push (y));
8304 RTX_FRAME_RELATED_P (insn) = 1;
8307 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8308 RTX_FRAME_RELATED_P (insn) = 1;
8309 ix86_cfa_state->reg = crtl->drap_reg;
8311 /* Align the stack. */
8312 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8314 GEN_INT (-align_bytes)));
8315 RTX_FRAME_RELATED_P (insn) = 1;
8317 /* Replicate the return address on the stack so that return
8318 address can be reached via (argp - 1) slot. This is needed
8319 to implement macro RETURN_ADDR_RTX and intrinsic function
8320 expand_builtin_return_addr etc. */
8322 x = gen_frame_mem (Pmode,
8323 plus_constant (x, -UNITS_PER_WORD));
8324 insn = emit_insn (gen_push (x));
8325 RTX_FRAME_RELATED_P (insn) = 1;
8328 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8329 slower on all targets. Also sdb doesn't like it. */
8331 if (frame_pointer_needed)
8333 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8334 RTX_FRAME_RELATED_P (insn) = 1;
8336 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8337 RTX_FRAME_RELATED_P (insn) = 1;
8339 if (ix86_cfa_state->reg == stack_pointer_rtx)
8340 ix86_cfa_state->reg = hard_frame_pointer_rtx;
8343 if (stack_realign_fp)
8345 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8346 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8348 /* Align the stack. */
8349 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8351 GEN_INT (-align_bytes)));
8352 RTX_FRAME_RELATED_P (insn) = 1;
8355 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8357 if (!frame.save_regs_using_mov)
8358 ix86_emit_save_regs ();
8360 allocate += frame.nregs * UNITS_PER_WORD;
8362 /* When using red zone we may start register saving before allocating
8363 the stack frame saving one cycle of the prologue. However I will
8364 avoid doing this if I am going to have to probe the stack since
8365 at least on x86_64 the stack probe can turn into a call that clobbers
8366 a red zone location */
8367 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8368 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8369 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8370 && !crtl->stack_realign_needed)
8371 ? hard_frame_pointer_rtx
8372 : stack_pointer_rtx,
8373 -frame.nregs * UNITS_PER_WORD);
8377 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8378 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8379 GEN_INT (-allocate), -1,
8380 ix86_cfa_state->reg == stack_pointer_rtx);
8383 /* Only valid for Win32. */
8384 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8388 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8390 if (cfun->machine->call_abi == MS_ABI)
8393 eax_live = ix86_eax_live_at_start_p ();
8397 emit_insn (gen_push (eax));
8398 allocate -= UNITS_PER_WORD;
8401 emit_move_insn (eax, GEN_INT (allocate));
8404 insn = gen_allocate_stack_worker_64 (eax, eax);
8406 insn = gen_allocate_stack_worker_32 (eax, eax);
8407 insn = emit_insn (insn);
8409 if (ix86_cfa_state->reg == stack_pointer_rtx)
8411 ix86_cfa_state->offset += allocate;
8412 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8413 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8414 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8415 RTX_FRAME_RELATED_P (insn) = 1;
8420 if (frame_pointer_needed)
8421 t = plus_constant (hard_frame_pointer_rtx,
8424 - frame.nregs * UNITS_PER_WORD);
8426 t = plus_constant (stack_pointer_rtx, allocate);
8427 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8431 if (frame.save_regs_using_mov
8432 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8433 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8435 if (!frame_pointer_needed
8436 || !frame.to_allocate
8437 || crtl->stack_realign_needed)
8438 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8440 + frame.nsseregs * 16 + frame.padding0);
8442 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8443 -frame.nregs * UNITS_PER_WORD);
8445 if (!frame_pointer_needed
8446 || !frame.to_allocate
8447 || crtl->stack_realign_needed)
8448 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8451 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8452 - frame.nregs * UNITS_PER_WORD
8453 - frame.nsseregs * 16
8456 pic_reg_used = false;
8457 if (pic_offset_table_rtx
8458 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8461 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8463 if (alt_pic_reg_used != INVALID_REGNUM)
8464 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8466 pic_reg_used = true;
8473 if (ix86_cmodel == CM_LARGE_PIC)
8475 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8476 rtx label = gen_label_rtx ();
8478 LABEL_PRESERVE_P (label) = 1;
8479 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8480 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8481 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8482 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8483 pic_offset_table_rtx, tmp_reg));
8486 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8489 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8492 /* In the pic_reg_used case, make sure that the got load isn't deleted
8493 when mcount needs it. Blockage to avoid call movement across mcount
8494 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8496 if (crtl->profile && pic_reg_used)
8497 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8499 if (crtl->drap_reg && !crtl->stack_realign_needed)
8501 /* vDRAP is setup but after reload it turns out stack realign
8502 isn't necessary, here we will emit prologue to setup DRAP
8503 without stack realign adjustment */
8504 int drap_bp_offset = UNITS_PER_WORD * 2;
8505 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8506 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8509 /* Prevent instructions from being scheduled into register save push
8510 sequence when access to the redzone area is done through frame pointer.
8511 The offset betweeh the frame pointer and the stack pointer is calculated
8512 relative to the value of the stack pointer at the end of the function
8513 prologue, and moving instructions that access redzone area via frame
8514 pointer inside push sequence violates this assumption. */
8515 if (frame_pointer_needed && frame.red_zone_size)
8516 emit_insn (gen_memory_blockage ());
8518 /* Emit cld instruction if stringops are used in the function. */
8519 if (TARGET_CLD && ix86_current_function_needs_cld)
8520 emit_insn (gen_cld ());
8523 /* Emit code to restore REG using a POP insn. */
8526 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
8528 rtx insn = emit_insn (ix86_gen_pop1 (reg));
8530 if (ix86_cfa_state->reg == crtl->drap_reg
8531 && REGNO (reg) == REGNO (crtl->drap_reg))
8533 /* Previously we'd represented the CFA as an expression
8534 like *(%ebp - 8). We've just popped that value from
8535 the stack, which means we need to reset the CFA to
8536 the drap register. This will remain until we restore
8537 the stack pointer. */
8538 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8539 RTX_FRAME_RELATED_P (insn) = 1;
8543 if (ix86_cfa_state->reg == stack_pointer_rtx)
8545 ix86_cfa_state->offset -= UNITS_PER_WORD;
8546 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8547 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8548 RTX_FRAME_RELATED_P (insn) = 1;
8551 /* When the frame pointer is the CFA, and we pop it, we are
8552 swapping back to the stack pointer as the CFA. This happens
8553 for stack frames that don't allocate other data, so we assume
8554 the stack pointer is now pointing at the return address, i.e.
8555 the function entry state, which makes the offset be 1 word. */
8556 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
8557 && reg == hard_frame_pointer_rtx)
8559 ix86_cfa_state->reg = stack_pointer_rtx;
8560 ix86_cfa_state->offset = UNITS_PER_WORD;
8562 add_reg_note (insn, REG_CFA_DEF_CFA,
8563 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8564 GEN_INT (UNITS_PER_WORD)));
8565 RTX_FRAME_RELATED_P (insn) = 1;
8568 ix86_add_cfa_restore_note (insn, reg, red_offset);
8571 /* Emit code to restore saved registers using POP insns. */
8574 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
8578 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8579 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8581 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
8583 red_offset += UNITS_PER_WORD;
8587 /* Emit code and notes for the LEAVE instruction. */
8590 ix86_emit_leave (HOST_WIDE_INT red_offset)
8592 rtx insn = emit_insn (ix86_gen_leave ());
8594 ix86_add_queued_cfa_restore_notes (insn);
8596 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
8598 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8599 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
8600 RTX_FRAME_RELATED_P (insn) = 1;
8601 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
8605 /* Emit code to restore saved registers using MOV insns. First register
8606 is restored from POINTER + OFFSET. */
8608 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8609 HOST_WIDE_INT red_offset,
8610 int maybe_eh_return)
8613 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8616 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8617 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8619 rtx reg = gen_rtx_REG (Pmode, regno);
8621 /* Ensure that adjust_address won't be forced to produce pointer
8622 out of range allowed by x86-64 instruction set. */
8623 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8627 r11 = gen_rtx_REG (DImode, R11_REG);
8628 emit_move_insn (r11, GEN_INT (offset));
8629 emit_insn (gen_adddi3 (r11, r11, pointer));
8630 base_address = gen_rtx_MEM (Pmode, r11);
8633 insn = emit_move_insn (reg,
8634 adjust_address (base_address, Pmode, offset));
8635 offset += UNITS_PER_WORD;
8637 if (ix86_cfa_state->reg == crtl->drap_reg
8638 && regno == REGNO (crtl->drap_reg))
8640 /* Previously we'd represented the CFA as an expression
8641 like *(%ebp - 8). We've just popped that value from
8642 the stack, which means we need to reset the CFA to
8643 the drap register. This will remain until we restore
8644 the stack pointer. */
8645 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8646 RTX_FRAME_RELATED_P (insn) = 1;
8649 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8651 red_offset += UNITS_PER_WORD;
8655 /* Emit code to restore saved registers using MOV insns. First register
8656 is restored from POINTER + OFFSET. */
8658 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8659 HOST_WIDE_INT red_offset,
8660 int maybe_eh_return)
8663 rtx base_address = gen_rtx_MEM (TImode, pointer);
8666 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8667 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8669 rtx reg = gen_rtx_REG (TImode, regno);
8671 /* Ensure that adjust_address won't be forced to produce pointer
8672 out of range allowed by x86-64 instruction set. */
8673 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8677 r11 = gen_rtx_REG (DImode, R11_REG);
8678 emit_move_insn (r11, GEN_INT (offset));
8679 emit_insn (gen_adddi3 (r11, r11, pointer));
8680 base_address = gen_rtx_MEM (TImode, r11);
8683 mem = adjust_address (base_address, TImode, offset);
8684 set_mem_align (mem, 128);
8685 insn = emit_move_insn (reg, mem);
8688 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8694 /* Restore function stack, frame, and registers. */
8697 ix86_expand_epilogue (int style)
8700 struct ix86_frame frame;
8701 HOST_WIDE_INT offset, red_offset;
8702 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
8705 ix86_finalize_stack_realign_flags ();
8707 /* When stack is realigned, SP must be valid. */
8708 sp_valid = (!frame_pointer_needed
8709 || current_function_sp_is_unchanging
8710 || stack_realign_fp);
8712 ix86_compute_frame_layout (&frame);
8714 /* See the comment about red zone and frame
8715 pointer usage in ix86_expand_prologue. */
8716 if (frame_pointer_needed && frame.red_zone_size)
8717 emit_insn (gen_memory_blockage ());
8719 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8720 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
8722 /* Calculate start of saved registers relative to ebp. Special care
8723 must be taken for the normal return case of a function using
8724 eh_return: the eax and edx registers are marked as saved, but not
8725 restored along this path. */
8726 offset = frame.nregs;
8727 if (crtl->calls_eh_return && style != 2)
8729 offset *= -UNITS_PER_WORD;
8730 offset -= frame.nsseregs * 16 + frame.padding0;
8732 /* Calculate start of saved registers relative to esp on entry of the
8733 function. When realigning stack, this needs to be the most negative
8734 value possible at runtime. */
8735 red_offset = offset;
8737 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8739 else if (stack_realign_fp)
8740 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8742 if (frame_pointer_needed)
8743 red_offset -= UNITS_PER_WORD;
8745 /* If we're only restoring one register and sp is not valid then
8746 using a move instruction to restore the register since it's
8747 less work than reloading sp and popping the register.
8749 The default code result in stack adjustment using add/lea instruction,
8750 while this code results in LEAVE instruction (or discrete equivalent),
8751 so it is profitable in some other cases as well. Especially when there
8752 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8753 and there is exactly one register to pop. This heuristic may need some
8754 tuning in future. */
8755 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8756 || (TARGET_EPILOGUE_USING_MOVE
8757 && cfun->machine->use_fast_prologue_epilogue
8758 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8759 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8760 && frame.to_allocate)
8761 || (frame_pointer_needed && TARGET_USE_LEAVE
8762 && cfun->machine->use_fast_prologue_epilogue
8763 && (frame.nregs + frame.nsseregs) == 1)
8764 || crtl->calls_eh_return)
8766 /* Restore registers. We can use ebp or esp to address the memory
8767 locations. If both are available, default to ebp, since offsets
8768 are known to be small. Only exception is esp pointing directly
8769 to the end of block of saved registers, where we may simplify
8772 If we are realigning stack with bp and sp, regs restore can't
8773 be addressed by bp. sp must be used instead. */
8775 if (!frame_pointer_needed
8776 || (sp_valid && !frame.to_allocate)
8777 || stack_realign_fp)
8779 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8780 frame.to_allocate, red_offset,
8782 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8784 + frame.nsseregs * 16
8787 + frame.nsseregs * 16
8788 + frame.padding0, style == 2);
8792 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8795 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8797 + frame.nsseregs * 16
8800 + frame.nsseregs * 16
8801 + frame.padding0, style == 2);
8804 red_offset -= offset;
8806 /* eh_return epilogues need %ecx added to the stack pointer. */
8809 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8811 /* Stack align doesn't work with eh_return. */
8812 gcc_assert (!crtl->stack_realign_needed);
8814 if (frame_pointer_needed)
8816 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8817 tmp = plus_constant (tmp, UNITS_PER_WORD);
8818 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8820 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8821 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
8823 /* Note that we use SA as a temporary CFA, as the return
8824 address is at the proper place relative to it. We
8825 pretend this happens at the FP restore insn because
8826 prior to this insn the FP would be stored at the wrong
8827 offset relative to SA, and after this insn we have no
8828 other reasonable register to use for the CFA. We don't
8829 bother resetting the CFA to the SP for the duration of
8831 add_reg_note (tmp, REG_CFA_DEF_CFA,
8832 plus_constant (sa, UNITS_PER_WORD));
8833 ix86_add_queued_cfa_restore_notes (tmp);
8834 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8835 RTX_FRAME_RELATED_P (tmp) = 1;
8836 ix86_cfa_state->reg = sa;
8837 ix86_cfa_state->offset = UNITS_PER_WORD;
8839 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8840 const0_rtx, style, false);
8844 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8845 tmp = plus_constant (tmp, (frame.to_allocate
8846 + frame.nregs * UNITS_PER_WORD
8847 + frame.nsseregs * 16
8849 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8850 ix86_add_queued_cfa_restore_notes (tmp);
8852 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
8853 if (ix86_cfa_state->offset != UNITS_PER_WORD)
8855 ix86_cfa_state->offset = UNITS_PER_WORD;
8856 add_reg_note (tmp, REG_CFA_DEF_CFA,
8857 plus_constant (stack_pointer_rtx,
8859 RTX_FRAME_RELATED_P (tmp) = 1;
8863 else if (!frame_pointer_needed)
8864 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8865 GEN_INT (frame.to_allocate
8866 + frame.nregs * UNITS_PER_WORD
8867 + frame.nsseregs * 16
8869 style, !using_drap);
8870 /* If not an i386, mov & pop is faster than "leave". */
8871 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8872 || !cfun->machine->use_fast_prologue_epilogue)
8873 ix86_emit_leave (red_offset);
8876 pro_epilogue_adjust_stack (stack_pointer_rtx,
8877 hard_frame_pointer_rtx,
8878 const0_rtx, style, !using_drap);
8880 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
8885 /* First step is to deallocate the stack frame so that we can
8888 If we realign stack with frame pointer, then stack pointer
8889 won't be able to recover via lea $offset(%bp), %sp, because
8890 there is a padding area between bp and sp for realign.
8891 "add $to_allocate, %sp" must be used instead. */
8894 gcc_assert (frame_pointer_needed);
8895 gcc_assert (!stack_realign_fp);
8896 pro_epilogue_adjust_stack (stack_pointer_rtx,
8897 hard_frame_pointer_rtx,
8898 GEN_INT (offset), style, false);
8899 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8900 frame.to_allocate, red_offset,
8902 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8903 GEN_INT (frame.nsseregs * 16),
8906 else if (frame.to_allocate || frame.nsseregs)
8908 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8909 frame.to_allocate, red_offset,
8911 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8912 GEN_INT (frame.to_allocate
8913 + frame.nsseregs * 16
8914 + frame.padding0), style,
8915 !using_drap && !frame_pointer_needed);
8918 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
8920 red_offset -= offset;
8922 if (frame_pointer_needed)
8924 /* Leave results in shorter dependency chains on CPUs that are
8925 able to grok it fast. */
8926 if (TARGET_USE_LEAVE)
8927 ix86_emit_leave (red_offset);
8930 /* For stack realigned really happens, recover stack
8931 pointer to hard frame pointer is a must, if not using
8933 if (stack_realign_fp)
8934 pro_epilogue_adjust_stack (stack_pointer_rtx,
8935 hard_frame_pointer_rtx,
8936 const0_rtx, style, !using_drap);
8937 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
8945 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8946 ? 0 : UNITS_PER_WORD);
8949 gcc_assert (stack_realign_drap);
8951 insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8953 GEN_INT (-(UNITS_PER_WORD
8954 + param_ptr_offset))));
8956 ix86_cfa_state->reg = stack_pointer_rtx;
8957 ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
8959 add_reg_note (insn, REG_CFA_DEF_CFA,
8960 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
8961 GEN_INT (ix86_cfa_state->offset)));
8962 RTX_FRAME_RELATED_P (insn) = 1;
8964 if (param_ptr_offset)
8965 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
8968 /* Sibcall epilogues don't want a return instruction. */
8971 *ix86_cfa_state = cfa_state_save;
8975 if (crtl->args.pops_args && crtl->args.size)
8977 rtx popc = GEN_INT (crtl->args.pops_args);
8979 /* i386 can only pop 64K bytes. If asked to pop more, pop return
8980 address, do explicit add, and jump indirectly to the caller. */
8982 if (crtl->args.pops_args >= 65536)
8984 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8987 /* There is no "pascal" calling convention in any 64bit ABI. */
8988 gcc_assert (!TARGET_64BIT);
8990 insn = emit_insn (gen_popsi1 (ecx));
8991 ix86_cfa_state->offset -= UNITS_PER_WORD;
8993 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8994 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8995 add_reg_note (insn, REG_CFA_REGISTER,
8996 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
8997 RTX_FRAME_RELATED_P (insn) = 1;
8999 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9001 emit_jump_insn (gen_return_indirect_internal (ecx));
9004 emit_jump_insn (gen_return_pop_internal (popc));
9007 emit_jump_insn (gen_return_internal ());
9009 /* Restore the state back to the state from the prologue,
9010 so that it's correct for the next epilogue. */
9011 *ix86_cfa_state = cfa_state_save;
9014 /* Reset from the function's potential modifications. */
9017 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9018 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
9020 if (pic_offset_table_rtx)
9021 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9023 /* Mach-O doesn't support labels at the end of objects, so if
9024 it looks like we might want one, insert a NOP. */
9026 rtx insn = get_last_insn ();
9029 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9030 insn = PREV_INSN (insn);
9034 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
9035 fputs ("\tnop\n", file);
9041 /* Extract the parts of an RTL expression that is a valid memory address
9042 for an instruction. Return 0 if the structure of the address is
9043 grossly off. Return -1 if the address contains ASHIFT, so it is not
9044 strictly valid, but still used for computing length of lea instruction. */
9047 ix86_decompose_address (rtx addr, struct ix86_address *out)
9049 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9050 rtx base_reg, index_reg;
9051 HOST_WIDE_INT scale = 1;
9052 rtx scale_rtx = NULL_RTX;
9054 enum ix86_address_seg seg = SEG_DEFAULT;
9056 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
9058 else if (GET_CODE (addr) == PLUS)
9068 addends[n++] = XEXP (op, 1);
9071 while (GET_CODE (op) == PLUS);
9076 for (i = n; i >= 0; --i)
9079 switch (GET_CODE (op))
9084 index = XEXP (op, 0);
9085 scale_rtx = XEXP (op, 1);
9089 if (XINT (op, 1) == UNSPEC_TP
9090 && TARGET_TLS_DIRECT_SEG_REFS
9091 && seg == SEG_DEFAULT)
9092 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
9121 else if (GET_CODE (addr) == MULT)
9123 index = XEXP (addr, 0); /* index*scale */
9124 scale_rtx = XEXP (addr, 1);
9126 else if (GET_CODE (addr) == ASHIFT)
9130 /* We're called for lea too, which implements ashift on occasion. */
9131 index = XEXP (addr, 0);
9132 tmp = XEXP (addr, 1);
9133 if (!CONST_INT_P (tmp))
9135 scale = INTVAL (tmp);
9136 if ((unsigned HOST_WIDE_INT) scale > 3)
9142 disp = addr; /* displacement */
9144 /* Extract the integral value of scale. */
9147 if (!CONST_INT_P (scale_rtx))
9149 scale = INTVAL (scale_rtx);
9152 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
9153 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
9155 /* Avoid useless 0 displacement. */
9156 if (disp == const0_rtx && (base || index))
9159 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9160 if (base_reg && index_reg && scale == 1
9161 && (index_reg == arg_pointer_rtx
9162 || index_reg == frame_pointer_rtx
9163 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
9166 tmp = base, base = index, index = tmp;
9167 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
9170 /* Special case: %ebp cannot be encoded as a base without a displacement.
9174 && (base_reg == hard_frame_pointer_rtx
9175 || base_reg == frame_pointer_rtx
9176 || base_reg == arg_pointer_rtx
9177 || (REG_P (base_reg)
9178 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
9179 || REGNO (base_reg) == R13_REG))))
9182 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9183 Avoid this by transforming to [%esi+0].
9184 Reload calls address legitimization without cfun defined, so we need
9185 to test cfun for being non-NULL. */
9186 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9187 && base_reg && !index_reg && !disp
9189 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
9192 /* Special case: encode reg+reg instead of reg*2. */
9193 if (!base && index && scale == 2)
9194 base = index, base_reg = index_reg, scale = 1;
9196 /* Special case: scaling cannot be encoded without base or displacement. */
9197 if (!base && !disp && index && scale != 1)
9209 /* Return cost of the memory address x.
9210 For i386, it is better to use a complex address than let gcc copy
9211 the address into a reg and make a new pseudo. But not if the address
9212 requires to two regs - that would mean more pseudos with longer
9215 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9217 struct ix86_address parts;
9219 int ok = ix86_decompose_address (x, &parts);
9223 if (parts.base && GET_CODE (parts.base) == SUBREG)
9224 parts.base = SUBREG_REG (parts.base);
9225 if (parts.index && GET_CODE (parts.index) == SUBREG)
9226 parts.index = SUBREG_REG (parts.index);
9228 /* Attempt to minimize number of registers in the address. */
9230 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9232 && (!REG_P (parts.index)
9233 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9237 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9239 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9240 && parts.base != parts.index)
9243 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9244 since it's predecode logic can't detect the length of instructions
9245 and it degenerates to vector decoded. Increase cost of such
9246 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9247 to split such addresses or even refuse such addresses at all.
9249 Following addressing modes are affected:
9254 The first and last case may be avoidable by explicitly coding the zero in
9255 memory address, but I don't have AMD-K6 machine handy to check this
9259 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9260 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9261 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9267 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9268 this is used for to form addresses to local data when -fPIC is in
9272 darwin_local_data_pic (rtx disp)
9274 return (GET_CODE (disp) == UNSPEC
9275 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9278 /* Determine if a given RTX is a valid constant. We already know this
9279 satisfies CONSTANT_P. */
9282 legitimate_constant_p (rtx x)
9284 switch (GET_CODE (x))
9289 if (GET_CODE (x) == PLUS)
9291 if (!CONST_INT_P (XEXP (x, 1)))
9296 if (TARGET_MACHO && darwin_local_data_pic (x))
9299 /* Only some unspecs are valid as "constants". */
9300 if (GET_CODE (x) == UNSPEC)
9301 switch (XINT (x, 1))
9306 return TARGET_64BIT;
9309 x = XVECEXP (x, 0, 0);
9310 return (GET_CODE (x) == SYMBOL_REF
9311 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9313 x = XVECEXP (x, 0, 0);
9314 return (GET_CODE (x) == SYMBOL_REF
9315 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9320 /* We must have drilled down to a symbol. */
9321 if (GET_CODE (x) == LABEL_REF)
9323 if (GET_CODE (x) != SYMBOL_REF)
9328 /* TLS symbols are never valid. */
9329 if (SYMBOL_REF_TLS_MODEL (x))
9332 /* DLLIMPORT symbols are never valid. */
9333 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9334 && SYMBOL_REF_DLLIMPORT_P (x))
9339 if (GET_MODE (x) == TImode
9340 && x != CONST0_RTX (TImode)
9346 if (!standard_sse_constant_p (x))
9353 /* Otherwise we handle everything else in the move patterns. */
9357 /* Determine if it's legal to put X into the constant pool. This
9358 is not possible for the address of thread-local symbols, which
9359 is checked above. */
9362 ix86_cannot_force_const_mem (rtx x)
9364 /* We can always put integral constants and vectors in memory. */
9365 switch (GET_CODE (x))
9375 return !legitimate_constant_p (x);
9379 /* Nonzero if the constant value X is a legitimate general operand
9380 when generating PIC code. It is given that flag_pic is on and
9381 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9384 legitimate_pic_operand_p (rtx x)
9388 switch (GET_CODE (x))
9391 inner = XEXP (x, 0);
9392 if (GET_CODE (inner) == PLUS
9393 && CONST_INT_P (XEXP (inner, 1)))
9394 inner = XEXP (inner, 0);
9396 /* Only some unspecs are valid as "constants". */
9397 if (GET_CODE (inner) == UNSPEC)
9398 switch (XINT (inner, 1))
9403 return TARGET_64BIT;
9405 x = XVECEXP (inner, 0, 0);
9406 return (GET_CODE (x) == SYMBOL_REF
9407 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9408 case UNSPEC_MACHOPIC_OFFSET:
9409 return legitimate_pic_address_disp_p (x);
9417 return legitimate_pic_address_disp_p (x);
9424 /* Determine if a given CONST RTX is a valid memory displacement
9428 legitimate_pic_address_disp_p (rtx disp)
9432 /* In 64bit mode we can allow direct addresses of symbols and labels
9433 when they are not dynamic symbols. */
9436 rtx op0 = disp, op1;
9438 switch (GET_CODE (disp))
9444 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9446 op0 = XEXP (XEXP (disp, 0), 0);
9447 op1 = XEXP (XEXP (disp, 0), 1);
9448 if (!CONST_INT_P (op1)
9449 || INTVAL (op1) >= 16*1024*1024
9450 || INTVAL (op1) < -16*1024*1024)
9452 if (GET_CODE (op0) == LABEL_REF)
9454 if (GET_CODE (op0) != SYMBOL_REF)
9459 /* TLS references should always be enclosed in UNSPEC. */
9460 if (SYMBOL_REF_TLS_MODEL (op0))
9462 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9463 && ix86_cmodel != CM_LARGE_PIC)
9471 if (GET_CODE (disp) != CONST)
9473 disp = XEXP (disp, 0);
9477 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9478 of GOT tables. We should not need these anyway. */
9479 if (GET_CODE (disp) != UNSPEC
9480 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9481 && XINT (disp, 1) != UNSPEC_GOTOFF
9482 && XINT (disp, 1) != UNSPEC_PLTOFF))
9485 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9486 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9492 if (GET_CODE (disp) == PLUS)
9494 if (!CONST_INT_P (XEXP (disp, 1)))
9496 disp = XEXP (disp, 0);
9500 if (TARGET_MACHO && darwin_local_data_pic (disp))
9503 if (GET_CODE (disp) != UNSPEC)
9506 switch (XINT (disp, 1))
9511 /* We need to check for both symbols and labels because VxWorks loads
9512 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9514 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9515 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9517 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9518 While ABI specify also 32bit relocation but we don't produce it in
9519 small PIC model at all. */
9520 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9521 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9523 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9525 case UNSPEC_GOTTPOFF:
9526 case UNSPEC_GOTNTPOFF:
9527 case UNSPEC_INDNTPOFF:
9530 disp = XVECEXP (disp, 0, 0);
9531 return (GET_CODE (disp) == SYMBOL_REF
9532 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9534 disp = XVECEXP (disp, 0, 0);
9535 return (GET_CODE (disp) == SYMBOL_REF
9536 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9538 disp = XVECEXP (disp, 0, 0);
9539 return (GET_CODE (disp) == SYMBOL_REF
9540 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9546 /* Recognizes RTL expressions that are valid memory addresses for an
9547 instruction. The MODE argument is the machine mode for the MEM
9548 expression that wants to use this address.
9550 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9551 convert common non-canonical forms to canonical form so that they will
9555 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9556 rtx addr, bool strict)
9558 struct ix86_address parts;
9559 rtx base, index, disp;
9560 HOST_WIDE_INT scale;
9561 const char *reason = NULL;
9562 rtx reason_rtx = NULL_RTX;
9564 if (ix86_decompose_address (addr, &parts) <= 0)
9566 reason = "decomposition failed";
9571 index = parts.index;
9573 scale = parts.scale;
9575 /* Validate base register.
9577 Don't allow SUBREG's that span more than a word here. It can lead to spill
9578 failures when the base is one word out of a two word structure, which is
9579 represented internally as a DImode int. */
9588 else if (GET_CODE (base) == SUBREG
9589 && REG_P (SUBREG_REG (base))
9590 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9592 reg = SUBREG_REG (base);
9595 reason = "base is not a register";
9599 if (GET_MODE (base) != Pmode)
9601 reason = "base is not in Pmode";
9605 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9606 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9608 reason = "base is not valid";
9613 /* Validate index register.
9615 Don't allow SUBREG's that span more than a word here -- same as above. */
9624 else if (GET_CODE (index) == SUBREG
9625 && REG_P (SUBREG_REG (index))
9626 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9628 reg = SUBREG_REG (index);
9631 reason = "index is not a register";
9635 if (GET_MODE (index) != Pmode)
9637 reason = "index is not in Pmode";
9641 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9642 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9644 reason = "index is not valid";
9649 /* Validate scale factor. */
9652 reason_rtx = GEN_INT (scale);
9655 reason = "scale without index";
9659 if (scale != 2 && scale != 4 && scale != 8)
9661 reason = "scale is not a valid multiplier";
9666 /* Validate displacement. */
9671 if (GET_CODE (disp) == CONST
9672 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9673 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9674 switch (XINT (XEXP (disp, 0), 1))
9676 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9677 used. While ABI specify also 32bit relocations, we don't produce
9678 them at all and use IP relative instead. */
9681 gcc_assert (flag_pic);
9683 goto is_legitimate_pic;
9684 reason = "64bit address unspec";
9687 case UNSPEC_GOTPCREL:
9688 gcc_assert (flag_pic);
9689 goto is_legitimate_pic;
9691 case UNSPEC_GOTTPOFF:
9692 case UNSPEC_GOTNTPOFF:
9693 case UNSPEC_INDNTPOFF:
9699 reason = "invalid address unspec";
9703 else if (SYMBOLIC_CONST (disp)
9707 && MACHOPIC_INDIRECT
9708 && !machopic_operand_p (disp)
9714 if (TARGET_64BIT && (index || base))
9716 /* foo@dtpoff(%rX) is ok. */
9717 if (GET_CODE (disp) != CONST
9718 || GET_CODE (XEXP (disp, 0)) != PLUS
9719 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9720 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9721 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9722 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9724 reason = "non-constant pic memory reference";
9728 else if (! legitimate_pic_address_disp_p (disp))
9730 reason = "displacement is an invalid pic construct";
9734 /* This code used to verify that a symbolic pic displacement
9735 includes the pic_offset_table_rtx register.
9737 While this is good idea, unfortunately these constructs may
9738 be created by "adds using lea" optimization for incorrect
9747 This code is nonsensical, but results in addressing
9748 GOT table with pic_offset_table_rtx base. We can't
9749 just refuse it easily, since it gets matched by
9750 "addsi3" pattern, that later gets split to lea in the
9751 case output register differs from input. While this
9752 can be handled by separate addsi pattern for this case
9753 that never results in lea, this seems to be easier and
9754 correct fix for crash to disable this test. */
9756 else if (GET_CODE (disp) != LABEL_REF
9757 && !CONST_INT_P (disp)
9758 && (GET_CODE (disp) != CONST
9759 || !legitimate_constant_p (disp))
9760 && (GET_CODE (disp) != SYMBOL_REF
9761 || !legitimate_constant_p (disp)))
9763 reason = "displacement is not constant";
9766 else if (TARGET_64BIT
9767 && !x86_64_immediate_operand (disp, VOIDmode))
9769 reason = "displacement is out of range";
9774 /* Everything looks valid. */
9781 /* Determine if a given RTX is a valid constant address. */
9784 constant_address_p (rtx x)
9786 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9789 /* Return a unique alias set for the GOT. */
9791 static alias_set_type
9792 ix86_GOT_alias_set (void)
9794 static alias_set_type set = -1;
9796 set = new_alias_set ();
9800 /* Return a legitimate reference for ORIG (an address) using the
9801 register REG. If REG is 0, a new pseudo is generated.
9803 There are two types of references that must be handled:
9805 1. Global data references must load the address from the GOT, via
9806 the PIC reg. An insn is emitted to do this load, and the reg is
9809 2. Static data references, constant pool addresses, and code labels
9810 compute the address as an offset from the GOT, whose base is in
9811 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9812 differentiate them from global data objects. The returned
9813 address is the PIC reg + an unspec constant.
9815 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9816 reg also appears in the address. */
9819 legitimize_pic_address (rtx orig, rtx reg)
9826 if (TARGET_MACHO && !TARGET_64BIT)
9829 reg = gen_reg_rtx (Pmode);
9830 /* Use the generic Mach-O PIC machinery. */
9831 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9835 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9837 else if (TARGET_64BIT
9838 && ix86_cmodel != CM_SMALL_PIC
9839 && gotoff_operand (addr, Pmode))
9842 /* This symbol may be referenced via a displacement from the PIC
9843 base address (@GOTOFF). */
9845 if (reload_in_progress)
9846 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9847 if (GET_CODE (addr) == CONST)
9848 addr = XEXP (addr, 0);
9849 if (GET_CODE (addr) == PLUS)
9851 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9853 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9856 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9857 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9859 tmpreg = gen_reg_rtx (Pmode);
9862 emit_move_insn (tmpreg, new_rtx);
9866 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9867 tmpreg, 1, OPTAB_DIRECT);
9870 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9872 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9874 /* This symbol may be referenced via a displacement from the PIC
9875 base address (@GOTOFF). */
9877 if (reload_in_progress)
9878 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9879 if (GET_CODE (addr) == CONST)
9880 addr = XEXP (addr, 0);
9881 if (GET_CODE (addr) == PLUS)
9883 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9885 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9888 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9889 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9890 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9894 emit_move_insn (reg, new_rtx);
9898 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9899 /* We can't use @GOTOFF for text labels on VxWorks;
9900 see gotoff_operand. */
9901 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9903 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9905 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9906 return legitimize_dllimport_symbol (addr, true);
9907 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9908 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9909 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9911 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9912 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9916 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9918 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9919 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9920 new_rtx = gen_const_mem (Pmode, new_rtx);
9921 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9924 reg = gen_reg_rtx (Pmode);
9925 /* Use directly gen_movsi, otherwise the address is loaded
9926 into register for CSE. We don't want to CSE this addresses,
9927 instead we CSE addresses from the GOT table, so skip this. */
9928 emit_insn (gen_movsi (reg, new_rtx));
9933 /* This symbol must be referenced via a load from the
9934 Global Offset Table (@GOT). */
9936 if (reload_in_progress)
9937 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9938 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9939 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9941 new_rtx = force_reg (Pmode, new_rtx);
9942 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9943 new_rtx = gen_const_mem (Pmode, new_rtx);
9944 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9947 reg = gen_reg_rtx (Pmode);
9948 emit_move_insn (reg, new_rtx);
9954 if (CONST_INT_P (addr)
9955 && !x86_64_immediate_operand (addr, VOIDmode))
9959 emit_move_insn (reg, addr);
9963 new_rtx = force_reg (Pmode, addr);
9965 else if (GET_CODE (addr) == CONST)
9967 addr = XEXP (addr, 0);
9969 /* We must match stuff we generate before. Assume the only
9970 unspecs that can get here are ours. Not that we could do
9971 anything with them anyway.... */
9972 if (GET_CODE (addr) == UNSPEC
9973 || (GET_CODE (addr) == PLUS
9974 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9976 gcc_assert (GET_CODE (addr) == PLUS);
9978 if (GET_CODE (addr) == PLUS)
9980 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9982 /* Check first to see if this is a constant offset from a @GOTOFF
9983 symbol reference. */
9984 if (gotoff_operand (op0, Pmode)
9985 && CONST_INT_P (op1))
9989 if (reload_in_progress)
9990 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9991 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9993 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9994 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9995 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9999 emit_move_insn (reg, new_rtx);
10005 if (INTVAL (op1) < -16*1024*1024
10006 || INTVAL (op1) >= 16*1024*1024)
10008 if (!x86_64_immediate_operand (op1, Pmode))
10009 op1 = force_reg (Pmode, op1);
10010 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10016 base = legitimize_pic_address (XEXP (addr, 0), reg);
10017 new_rtx = legitimize_pic_address (XEXP (addr, 1),
10018 base == reg ? NULL_RTX : reg);
10020 if (CONST_INT_P (new_rtx))
10021 new_rtx = plus_constant (base, INTVAL (new_rtx));
10024 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
10026 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
10027 new_rtx = XEXP (new_rtx, 1);
10029 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
10037 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10040 get_thread_pointer (int to_reg)
10044 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10048 reg = gen_reg_rtx (Pmode);
10049 insn = gen_rtx_SET (VOIDmode, reg, tp);
10050 insn = emit_insn (insn);
10055 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10056 false if we expect this to be used for a memory address and true if
10057 we expect to load the address into a register. */
10060 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
10062 rtx dest, base, off, pic, tp;
10067 case TLS_MODEL_GLOBAL_DYNAMIC:
10068 dest = gen_reg_rtx (Pmode);
10069 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10071 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10073 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
10076 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
10077 insns = get_insns ();
10080 RTL_CONST_CALL_P (insns) = 1;
10081 emit_libcall_block (insns, dest, rax, x);
10083 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10084 emit_insn (gen_tls_global_dynamic_64 (dest, x));
10086 emit_insn (gen_tls_global_dynamic_32 (dest, x));
10088 if (TARGET_GNU2_TLS)
10090 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10092 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10096 case TLS_MODEL_LOCAL_DYNAMIC:
10097 base = gen_reg_rtx (Pmode);
10098 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10100 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10102 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
10105 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
10106 insns = get_insns ();
10109 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
10110 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
10111 RTL_CONST_CALL_P (insns) = 1;
10112 emit_libcall_block (insns, base, rax, note);
10114 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10115 emit_insn (gen_tls_local_dynamic_base_64 (base));
10117 emit_insn (gen_tls_local_dynamic_base_32 (base));
10119 if (TARGET_GNU2_TLS)
10121 rtx x = ix86_tls_module_base ();
10123 set_unique_reg_note (get_last_insn (), REG_EQUIV,
10124 gen_rtx_MINUS (Pmode, x, tp));
10127 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10128 off = gen_rtx_CONST (Pmode, off);
10130 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10132 if (TARGET_GNU2_TLS)
10134 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10136 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10141 case TLS_MODEL_INITIAL_EXEC:
10145 type = UNSPEC_GOTNTPOFF;
10149 if (reload_in_progress)
10150 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10151 pic = pic_offset_table_rtx;
10152 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10154 else if (!TARGET_ANY_GNU_TLS)
10156 pic = gen_reg_rtx (Pmode);
10157 emit_insn (gen_set_got (pic));
10158 type = UNSPEC_GOTTPOFF;
10163 type = UNSPEC_INDNTPOFF;
10166 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
10167 off = gen_rtx_CONST (Pmode, off);
10169 off = gen_rtx_PLUS (Pmode, pic, off);
10170 off = gen_const_mem (Pmode, off);
10171 set_mem_alias_set (off, ix86_GOT_alias_set ());
10173 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10175 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10176 off = force_reg (Pmode, off);
10177 return gen_rtx_PLUS (Pmode, base, off);
10181 base = get_thread_pointer (true);
10182 dest = gen_reg_rtx (Pmode);
10183 emit_insn (gen_subsi3 (dest, base, off));
10187 case TLS_MODEL_LOCAL_EXEC:
10188 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10189 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10190 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10191 off = gen_rtx_CONST (Pmode, off);
10193 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10195 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10196 return gen_rtx_PLUS (Pmode, base, off);
10200 base = get_thread_pointer (true);
10201 dest = gen_reg_rtx (Pmode);
10202 emit_insn (gen_subsi3 (dest, base, off));
10207 gcc_unreachable ();
10213 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10216 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10217 htab_t dllimport_map;
10220 get_dllimport_decl (tree decl)
10222 struct tree_map *h, in;
10225 const char *prefix;
10226 size_t namelen, prefixlen;
10231 if (!dllimport_map)
10232 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10234 in.hash = htab_hash_pointer (decl);
10235 in.base.from = decl;
10236 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10237 h = (struct tree_map *) *loc;
10241 *loc = h = GGC_NEW (struct tree_map);
10243 h->base.from = decl;
10244 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
10245 DECL_ARTIFICIAL (to) = 1;
10246 DECL_IGNORED_P (to) = 1;
10247 DECL_EXTERNAL (to) = 1;
10248 TREE_READONLY (to) = 1;
10250 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10251 name = targetm.strip_name_encoding (name);
10252 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10253 ? "*__imp_" : "*__imp__";
10254 namelen = strlen (name);
10255 prefixlen = strlen (prefix);
10256 imp_name = (char *) alloca (namelen + prefixlen + 1);
10257 memcpy (imp_name, prefix, prefixlen);
10258 memcpy (imp_name + prefixlen, name, namelen + 1);
10260 name = ggc_alloc_string (imp_name, namelen + prefixlen);
10261 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10262 SET_SYMBOL_REF_DECL (rtl, to);
10263 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10265 rtl = gen_const_mem (Pmode, rtl);
10266 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10268 SET_DECL_RTL (to, rtl);
10269 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10274 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10275 true if we require the result be a register. */
10278 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10283 gcc_assert (SYMBOL_REF_DECL (symbol));
10284 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10286 x = DECL_RTL (imp_decl);
10288 x = force_reg (Pmode, x);
10292 /* Try machine-dependent ways of modifying an illegitimate address
10293 to be legitimate. If we find one, return the new, valid address.
10294 This macro is used in only one place: `memory_address' in explow.c.
10296 OLDX is the address as it was before break_out_memory_refs was called.
10297 In some cases it is useful to look at this to decide what needs to be done.
10299 It is always safe for this macro to do nothing. It exists to recognize
10300 opportunities to optimize the output.
10302 For the 80386, we handle X+REG by loading X into a register R and
10303 using R+REG. R will go in a general reg and indexing will be used.
10304 However, if REG is a broken-out memory address or multiplication,
10305 nothing needs to be done because REG can certainly go in a general reg.
10307 When -fpic is used, special handling is needed for symbolic references.
10308 See comments by legitimize_pic_address in i386.c for details. */
10311 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10312 enum machine_mode mode)
10317 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10319 return legitimize_tls_address (x, (enum tls_model) log, false);
10320 if (GET_CODE (x) == CONST
10321 && GET_CODE (XEXP (x, 0)) == PLUS
10322 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10323 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10325 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10326 (enum tls_model) log, false);
10327 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10330 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10332 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10333 return legitimize_dllimport_symbol (x, true);
10334 if (GET_CODE (x) == CONST
10335 && GET_CODE (XEXP (x, 0)) == PLUS
10336 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10337 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10339 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10340 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10344 if (flag_pic && SYMBOLIC_CONST (x))
10345 return legitimize_pic_address (x, 0);
10347 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10348 if (GET_CODE (x) == ASHIFT
10349 && CONST_INT_P (XEXP (x, 1))
10350 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10353 log = INTVAL (XEXP (x, 1));
10354 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10355 GEN_INT (1 << log));
10358 if (GET_CODE (x) == PLUS)
10360 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10362 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10363 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10364 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10367 log = INTVAL (XEXP (XEXP (x, 0), 1));
10368 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10369 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10370 GEN_INT (1 << log));
10373 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10374 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10375 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10378 log = INTVAL (XEXP (XEXP (x, 1), 1));
10379 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10380 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10381 GEN_INT (1 << log));
10384 /* Put multiply first if it isn't already. */
10385 if (GET_CODE (XEXP (x, 1)) == MULT)
10387 rtx tmp = XEXP (x, 0);
10388 XEXP (x, 0) = XEXP (x, 1);
10393 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10394 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10395 created by virtual register instantiation, register elimination, and
10396 similar optimizations. */
10397 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10400 x = gen_rtx_PLUS (Pmode,
10401 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10402 XEXP (XEXP (x, 1), 0)),
10403 XEXP (XEXP (x, 1), 1));
10407 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10408 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10409 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10410 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10411 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10412 && CONSTANT_P (XEXP (x, 1)))
10415 rtx other = NULL_RTX;
10417 if (CONST_INT_P (XEXP (x, 1)))
10419 constant = XEXP (x, 1);
10420 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10422 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10424 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10425 other = XEXP (x, 1);
10433 x = gen_rtx_PLUS (Pmode,
10434 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10435 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10436 plus_constant (other, INTVAL (constant)));
10440 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10443 if (GET_CODE (XEXP (x, 0)) == MULT)
10446 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10449 if (GET_CODE (XEXP (x, 1)) == MULT)
10452 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10456 && REG_P (XEXP (x, 1))
10457 && REG_P (XEXP (x, 0)))
10460 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10463 x = legitimize_pic_address (x, 0);
10466 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10469 if (REG_P (XEXP (x, 0)))
10471 rtx temp = gen_reg_rtx (Pmode);
10472 rtx val = force_operand (XEXP (x, 1), temp);
10474 emit_move_insn (temp, val);
10476 XEXP (x, 1) = temp;
10480 else if (REG_P (XEXP (x, 1)))
10482 rtx temp = gen_reg_rtx (Pmode);
10483 rtx val = force_operand (XEXP (x, 0), temp);
10485 emit_move_insn (temp, val);
10487 XEXP (x, 0) = temp;
10495 /* Print an integer constant expression in assembler syntax. Addition
10496 and subtraction are the only arithmetic that may appear in these
10497 expressions. FILE is the stdio stream to write to, X is the rtx, and
10498 CODE is the operand print code from the output string. */
10501 output_pic_addr_const (FILE *file, rtx x, int code)
10505 switch (GET_CODE (x))
10508 gcc_assert (flag_pic);
10513 if (! TARGET_MACHO || TARGET_64BIT)
10514 output_addr_const (file, x);
10517 const char *name = XSTR (x, 0);
10519 /* Mark the decl as referenced so that cgraph will
10520 output the function. */
10521 if (SYMBOL_REF_DECL (x))
10522 mark_decl_referenced (SYMBOL_REF_DECL (x));
10525 if (MACHOPIC_INDIRECT
10526 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10527 name = machopic_indirection_name (x, /*stub_p=*/true);
10529 assemble_name (file, name);
10531 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10532 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10533 fputs ("@PLT", file);
10540 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10541 assemble_name (asm_out_file, buf);
10545 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10549 /* This used to output parentheses around the expression,
10550 but that does not work on the 386 (either ATT or BSD assembler). */
10551 output_pic_addr_const (file, XEXP (x, 0), code);
10555 if (GET_MODE (x) == VOIDmode)
10557 /* We can use %d if the number is <32 bits and positive. */
10558 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10559 fprintf (file, "0x%lx%08lx",
10560 (unsigned long) CONST_DOUBLE_HIGH (x),
10561 (unsigned long) CONST_DOUBLE_LOW (x));
10563 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10566 /* We can't handle floating point constants;
10567 PRINT_OPERAND must handle them. */
10568 output_operand_lossage ("floating constant misused");
10572 /* Some assemblers need integer constants to appear first. */
10573 if (CONST_INT_P (XEXP (x, 0)))
10575 output_pic_addr_const (file, XEXP (x, 0), code);
10577 output_pic_addr_const (file, XEXP (x, 1), code);
10581 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10582 output_pic_addr_const (file, XEXP (x, 1), code);
10584 output_pic_addr_const (file, XEXP (x, 0), code);
10590 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10591 output_pic_addr_const (file, XEXP (x, 0), code);
10593 output_pic_addr_const (file, XEXP (x, 1), code);
10595 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10599 gcc_assert (XVECLEN (x, 0) == 1);
10600 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10601 switch (XINT (x, 1))
10604 fputs ("@GOT", file);
10606 case UNSPEC_GOTOFF:
10607 fputs ("@GOTOFF", file);
10609 case UNSPEC_PLTOFF:
10610 fputs ("@PLTOFF", file);
10612 case UNSPEC_GOTPCREL:
10613 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10614 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10616 case UNSPEC_GOTTPOFF:
10617 /* FIXME: This might be @TPOFF in Sun ld too. */
10618 fputs ("@GOTTPOFF", file);
10621 fputs ("@TPOFF", file);
10623 case UNSPEC_NTPOFF:
10625 fputs ("@TPOFF", file);
10627 fputs ("@NTPOFF", file);
10629 case UNSPEC_DTPOFF:
10630 fputs ("@DTPOFF", file);
10632 case UNSPEC_GOTNTPOFF:
10634 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10635 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10637 fputs ("@GOTNTPOFF", file);
10639 case UNSPEC_INDNTPOFF:
10640 fputs ("@INDNTPOFF", file);
10643 case UNSPEC_MACHOPIC_OFFSET:
10645 machopic_output_function_base_name (file);
10649 output_operand_lossage ("invalid UNSPEC as operand");
10655 output_operand_lossage ("invalid expression as operand");
10659 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10660 We need to emit DTP-relative relocations. */
10662 static void ATTRIBUTE_UNUSED
10663 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10665 fputs (ASM_LONG, file);
10666 output_addr_const (file, x);
10667 fputs ("@DTPOFF", file);
10673 fputs (", 0", file);
10676 gcc_unreachable ();
10680 /* Return true if X is a representation of the PIC register. This copes
10681 with calls from ix86_find_base_term, where the register might have
10682 been replaced by a cselib value. */
10685 ix86_pic_register_p (rtx x)
10687 if (GET_CODE (x) == VALUE)
10688 return (pic_offset_table_rtx
10689 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10691 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10694 /* In the name of slightly smaller debug output, and to cater to
10695 general assembler lossage, recognize PIC+GOTOFF and turn it back
10696 into a direct symbol reference.
10698 On Darwin, this is necessary to avoid a crash, because Darwin
10699 has a different PIC label for each routine but the DWARF debugging
10700 information is not associated with any particular routine, so it's
10701 necessary to remove references to the PIC label from RTL stored by
10702 the DWARF output code. */
10705 ix86_delegitimize_address (rtx orig_x)
10708 /* reg_addend is NULL or a multiple of some register. */
10709 rtx reg_addend = NULL_RTX;
10710 /* const_addend is NULL or a const_int. */
10711 rtx const_addend = NULL_RTX;
10712 /* This is the result, or NULL. */
10713 rtx result = NULL_RTX;
10720 if (GET_CODE (x) != CONST
10721 || GET_CODE (XEXP (x, 0)) != UNSPEC
10722 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10723 || !MEM_P (orig_x))
10725 return XVECEXP (XEXP (x, 0), 0, 0);
10728 if (GET_CODE (x) != PLUS
10729 || GET_CODE (XEXP (x, 1)) != CONST)
10732 if (ix86_pic_register_p (XEXP (x, 0)))
10733 /* %ebx + GOT/GOTOFF */
10735 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10737 /* %ebx + %reg * scale + GOT/GOTOFF */
10738 reg_addend = XEXP (x, 0);
10739 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10740 reg_addend = XEXP (reg_addend, 1);
10741 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10742 reg_addend = XEXP (reg_addend, 0);
10745 if (!REG_P (reg_addend)
10746 && GET_CODE (reg_addend) != MULT
10747 && GET_CODE (reg_addend) != ASHIFT)
10753 x = XEXP (XEXP (x, 1), 0);
10754 if (GET_CODE (x) == PLUS
10755 && CONST_INT_P (XEXP (x, 1)))
10757 const_addend = XEXP (x, 1);
10761 if (GET_CODE (x) == UNSPEC
10762 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10763 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10764 result = XVECEXP (x, 0, 0);
10766 if (TARGET_MACHO && darwin_local_data_pic (x)
10767 && !MEM_P (orig_x))
10768 result = XVECEXP (x, 0, 0);
10774 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10776 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10780 /* If X is a machine specific address (i.e. a symbol or label being
10781 referenced as a displacement from the GOT implemented using an
10782 UNSPEC), then return the base term. Otherwise return X. */
10785 ix86_find_base_term (rtx x)
10791 if (GET_CODE (x) != CONST)
10793 term = XEXP (x, 0);
10794 if (GET_CODE (term) == PLUS
10795 && (CONST_INT_P (XEXP (term, 1))
10796 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10797 term = XEXP (term, 0);
10798 if (GET_CODE (term) != UNSPEC
10799 || XINT (term, 1) != UNSPEC_GOTPCREL)
10802 return XVECEXP (term, 0, 0);
10805 return ix86_delegitimize_address (x);
10809 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10810 int fp, FILE *file)
10812 const char *suffix;
10814 if (mode == CCFPmode || mode == CCFPUmode)
10816 enum rtx_code second_code, bypass_code;
10817 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10818 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10819 code = ix86_fp_compare_code_to_integer (code);
10823 code = reverse_condition (code);
10874 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10878 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10879 Those same assemblers have the same but opposite lossage on cmov. */
10880 if (mode == CCmode)
10881 suffix = fp ? "nbe" : "a";
10882 else if (mode == CCCmode)
10885 gcc_unreachable ();
10901 gcc_unreachable ();
10905 gcc_assert (mode == CCmode || mode == CCCmode);
10922 gcc_unreachable ();
10926 /* ??? As above. */
10927 gcc_assert (mode == CCmode || mode == CCCmode);
10928 suffix = fp ? "nb" : "ae";
10931 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10935 /* ??? As above. */
10936 if (mode == CCmode)
10938 else if (mode == CCCmode)
10939 suffix = fp ? "nb" : "ae";
10941 gcc_unreachable ();
10944 suffix = fp ? "u" : "p";
10947 suffix = fp ? "nu" : "np";
10950 gcc_unreachable ();
10952 fputs (suffix, file);
10955 /* Print the name of register X to FILE based on its machine mode and number.
10956 If CODE is 'w', pretend the mode is HImode.
10957 If CODE is 'b', pretend the mode is QImode.
10958 If CODE is 'k', pretend the mode is SImode.
10959 If CODE is 'q', pretend the mode is DImode.
10960 If CODE is 'x', pretend the mode is V4SFmode.
10961 If CODE is 't', pretend the mode is V8SFmode.
10962 If CODE is 'h', pretend the reg is the 'high' byte register.
10963 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10964 If CODE is 'd', duplicate the operand for AVX instruction.
10968 print_reg (rtx x, int code, FILE *file)
10971 bool duplicated = code == 'd' && TARGET_AVX;
10973 gcc_assert (x == pc_rtx
10974 || (REGNO (x) != ARG_POINTER_REGNUM
10975 && REGNO (x) != FRAME_POINTER_REGNUM
10976 && REGNO (x) != FLAGS_REG
10977 && REGNO (x) != FPSR_REG
10978 && REGNO (x) != FPCR_REG));
10980 if (ASSEMBLER_DIALECT == ASM_ATT)
10985 gcc_assert (TARGET_64BIT);
10986 fputs ("rip", file);
10990 if (code == 'w' || MMX_REG_P (x))
10992 else if (code == 'b')
10994 else if (code == 'k')
10996 else if (code == 'q')
10998 else if (code == 'y')
11000 else if (code == 'h')
11002 else if (code == 'x')
11004 else if (code == 't')
11007 code = GET_MODE_SIZE (GET_MODE (x));
11009 /* Irritatingly, AMD extended registers use different naming convention
11010 from the normal registers. */
11011 if (REX_INT_REG_P (x))
11013 gcc_assert (TARGET_64BIT);
11017 error ("extended registers have no high halves");
11020 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
11023 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
11026 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
11029 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
11032 error ("unsupported operand size for extended register");
11042 if (STACK_TOP_P (x))
11051 if (! ANY_FP_REG_P (x))
11052 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
11057 reg = hi_reg_name[REGNO (x)];
11060 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
11062 reg = qi_reg_name[REGNO (x)];
11065 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
11067 reg = qi_high_reg_name[REGNO (x)];
11072 gcc_assert (!duplicated);
11074 fputs (hi_reg_name[REGNO (x)] + 1, file);
11079 gcc_unreachable ();
11085 if (ASSEMBLER_DIALECT == ASM_ATT)
11086 fprintf (file, ", %%%s", reg);
11088 fprintf (file, ", %s", reg);
11092 /* Locate some local-dynamic symbol still in use by this function
11093 so that we can print its name in some tls_local_dynamic_base
11097 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11101 if (GET_CODE (x) == SYMBOL_REF
11102 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11104 cfun->machine->some_ld_name = XSTR (x, 0);
11111 static const char *
11112 get_some_local_dynamic_name (void)
11116 if (cfun->machine->some_ld_name)
11117 return cfun->machine->some_ld_name;
11119 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11121 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11122 return cfun->machine->some_ld_name;
11124 gcc_unreachable ();
11127 /* Meaning of CODE:
11128 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11129 C -- print opcode suffix for set/cmov insn.
11130 c -- like C, but print reversed condition
11131 E,e -- likewise, but for compare-and-branch fused insn.
11132 F,f -- likewise, but for floating-point.
11133 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11135 R -- print the prefix for register names.
11136 z -- print the opcode suffix for the size of the current operand.
11137 Z -- likewise, with special suffixes for x87 instructions.
11138 * -- print a star (in certain assembler syntax)
11139 A -- print an absolute memory reference.
11140 w -- print the operand as if it's a "word" (HImode) even if it isn't.
11141 s -- print a shift double count, followed by the assemblers argument
11143 b -- print the QImode name of the register for the indicated operand.
11144 %b0 would print %al if operands[0] is reg 0.
11145 w -- likewise, print the HImode name of the register.
11146 k -- likewise, print the SImode name of the register.
11147 q -- likewise, print the DImode name of the register.
11148 x -- likewise, print the V4SFmode name of the register.
11149 t -- likewise, print the V8SFmode name of the register.
11150 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11151 y -- print "st(0)" instead of "st" as a register.
11152 d -- print duplicated register operand for AVX instruction.
11153 D -- print condition for SSE cmp instruction.
11154 P -- if PIC, print an @PLT suffix.
11155 X -- don't print any sort of PIC '@' suffix for a symbol.
11156 & -- print some in-use local-dynamic symbol name.
11157 H -- print a memory address offset by 8; used for sse high-parts
11158 Y -- print condition for SSE5 com* instruction.
11159 + -- print a branch hint as 'cs' or 'ds' prefix
11160 ; -- print a semicolon (after prefixes due to bug in older gas).
11164 print_operand (FILE *file, rtx x, int code)
11171 if (ASSEMBLER_DIALECT == ASM_ATT)
11176 assemble_name (file, get_some_local_dynamic_name ());
11180 switch (ASSEMBLER_DIALECT)
11187 /* Intel syntax. For absolute addresses, registers should not
11188 be surrounded by braces. */
11192 PRINT_OPERAND (file, x, 0);
11199 gcc_unreachable ();
11202 PRINT_OPERAND (file, x, 0);
11207 if (ASSEMBLER_DIALECT == ASM_ATT)
11212 if (ASSEMBLER_DIALECT == ASM_ATT)
11217 if (ASSEMBLER_DIALECT == ASM_ATT)
11222 if (ASSEMBLER_DIALECT == ASM_ATT)
11227 if (ASSEMBLER_DIALECT == ASM_ATT)
11232 if (ASSEMBLER_DIALECT == ASM_ATT)
11237 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11239 /* Opcodes don't get size suffixes if using Intel opcodes. */
11240 if (ASSEMBLER_DIALECT == ASM_INTEL)
11243 switch (GET_MODE_SIZE (GET_MODE (x)))
11262 output_operand_lossage
11263 ("invalid operand size for operand code '%c'", code);
11268 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11270 (0, "non-integer operand used with operand code '%c'", code);
11274 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11275 if (ASSEMBLER_DIALECT == ASM_INTEL)
11278 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11280 switch (GET_MODE_SIZE (GET_MODE (x)))
11283 #ifdef HAVE_AS_IX86_FILDS
11293 #ifdef HAVE_AS_IX86_FILDQ
11296 fputs ("ll", file);
11304 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11306 /* 387 opcodes don't get size suffixes
11307 if the operands are registers. */
11308 if (STACK_REG_P (x))
11311 switch (GET_MODE_SIZE (GET_MODE (x)))
11332 output_operand_lossage
11333 ("invalid operand type used with operand code '%c'", code);
11337 output_operand_lossage
11338 ("invalid operand size for operand code '%c'", code);
11355 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11357 PRINT_OPERAND (file, x, 0);
11358 fputs (", ", file);
11363 /* Little bit of braindamage here. The SSE compare instructions
11364 does use completely different names for the comparisons that the
11365 fp conditional moves. */
11368 switch (GET_CODE (x))
11371 fputs ("eq", file);
11374 fputs ("eq_us", file);
11377 fputs ("lt", file);
11380 fputs ("nge", file);
11383 fputs ("le", file);
11386 fputs ("ngt", file);
11389 fputs ("unord", file);
11392 fputs ("neq", file);
11395 fputs ("neq_oq", file);
11398 fputs ("ge", file);
11401 fputs ("nlt", file);
11404 fputs ("gt", file);
11407 fputs ("nle", file);
11410 fputs ("ord", file);
11413 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11419 switch (GET_CODE (x))
11423 fputs ("eq", file);
11427 fputs ("lt", file);
11431 fputs ("le", file);
11434 fputs ("unord", file);
11438 fputs ("neq", file);
11442 fputs ("nlt", file);
11446 fputs ("nle", file);
11449 fputs ("ord", file);
11452 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11458 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11459 if (ASSEMBLER_DIALECT == ASM_ATT)
11461 switch (GET_MODE (x))
11463 case HImode: putc ('w', file); break;
11465 case SFmode: putc ('l', file); break;
11467 case DFmode: putc ('q', file); break;
11468 default: gcc_unreachable ();
11475 if (!COMPARISON_P (x))
11477 output_operand_lossage ("operand is neither a constant nor a "
11478 "condition code, invalid operand code "
11482 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11485 if (!COMPARISON_P (x))
11487 output_operand_lossage ("operand is neither a constant nor a "
11488 "condition code, invalid operand code "
11492 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11493 if (ASSEMBLER_DIALECT == ASM_ATT)
11496 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11499 /* Like above, but reverse condition */
11501 /* Check to see if argument to %c is really a constant
11502 and not a condition code which needs to be reversed. */
11503 if (!COMPARISON_P (x))
11505 output_operand_lossage ("operand is neither a constant nor a "
11506 "condition code, invalid operand "
11510 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11513 if (!COMPARISON_P (x))
11515 output_operand_lossage ("operand is neither a constant nor a "
11516 "condition code, invalid operand "
11520 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11521 if (ASSEMBLER_DIALECT == ASM_ATT)
11524 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11528 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11532 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11536 /* It doesn't actually matter what mode we use here, as we're
11537 only going to use this for printing. */
11538 x = adjust_address_nv (x, DImode, 8);
11546 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11549 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11552 int pred_val = INTVAL (XEXP (x, 0));
11554 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11555 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11557 int taken = pred_val > REG_BR_PROB_BASE / 2;
11558 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11560 /* Emit hints only in the case default branch prediction
11561 heuristics would fail. */
11562 if (taken != cputaken)
11564 /* We use 3e (DS) prefix for taken branches and
11565 2e (CS) prefix for not taken branches. */
11567 fputs ("ds ; ", file);
11569 fputs ("cs ; ", file);
11577 switch (GET_CODE (x))
11580 fputs ("neq", file);
11583 fputs ("eq", file);
11587 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11591 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11595 fputs ("le", file);
11599 fputs ("lt", file);
11602 fputs ("unord", file);
11605 fputs ("ord", file);
11608 fputs ("ueq", file);
11611 fputs ("nlt", file);
11614 fputs ("nle", file);
11617 fputs ("ule", file);
11620 fputs ("ult", file);
11623 fputs ("une", file);
11626 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11633 fputs (" ; ", file);
11640 output_operand_lossage ("invalid operand code '%c'", code);
11645 print_reg (x, code, file);
11647 else if (MEM_P (x))
11649 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11650 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11651 && GET_MODE (x) != BLKmode)
11654 switch (GET_MODE_SIZE (GET_MODE (x)))
11656 case 1: size = "BYTE"; break;
11657 case 2: size = "WORD"; break;
11658 case 4: size = "DWORD"; break;
11659 case 8: size = "QWORD"; break;
11660 case 12: size = "XWORD"; break;
11662 if (GET_MODE (x) == XFmode)
11668 gcc_unreachable ();
11671 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11674 else if (code == 'w')
11676 else if (code == 'k')
11679 fputs (size, file);
11680 fputs (" PTR ", file);
11684 /* Avoid (%rip) for call operands. */
11685 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11686 && !CONST_INT_P (x))
11687 output_addr_const (file, x);
11688 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11689 output_operand_lossage ("invalid constraints for operand");
11691 output_address (x);
11694 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11699 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11700 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11702 if (ASSEMBLER_DIALECT == ASM_ATT)
11704 fprintf (file, "0x%08lx", (long unsigned int) l);
11707 /* These float cases don't actually occur as immediate operands. */
11708 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11712 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11713 fprintf (file, "%s", dstr);
11716 else if (GET_CODE (x) == CONST_DOUBLE
11717 && GET_MODE (x) == XFmode)
11721 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11722 fprintf (file, "%s", dstr);
11727 /* We have patterns that allow zero sets of memory, for instance.
11728 In 64-bit mode, we should probably support all 8-byte vectors,
11729 since we can in fact encode that into an immediate. */
11730 if (GET_CODE (x) == CONST_VECTOR)
11732 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11738 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11740 if (ASSEMBLER_DIALECT == ASM_ATT)
11743 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11744 || GET_CODE (x) == LABEL_REF)
11746 if (ASSEMBLER_DIALECT == ASM_ATT)
11749 fputs ("OFFSET FLAT:", file);
11752 if (CONST_INT_P (x))
11753 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11755 output_pic_addr_const (file, x, code);
11757 output_addr_const (file, x);
11761 /* Print a memory operand whose address is ADDR. */
11764 print_operand_address (FILE *file, rtx addr)
11766 struct ix86_address parts;
11767 rtx base, index, disp;
11769 int ok = ix86_decompose_address (addr, &parts);
11774 index = parts.index;
11776 scale = parts.scale;
11784 if (ASSEMBLER_DIALECT == ASM_ATT)
11786 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11789 gcc_unreachable ();
11792 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11793 if (TARGET_64BIT && !base && !index)
11797 if (GET_CODE (disp) == CONST
11798 && GET_CODE (XEXP (disp, 0)) == PLUS
11799 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11800 symbol = XEXP (XEXP (disp, 0), 0);
11802 if (GET_CODE (symbol) == LABEL_REF
11803 || (GET_CODE (symbol) == SYMBOL_REF
11804 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11807 if (!base && !index)
11809 /* Displacement only requires special attention. */
11811 if (CONST_INT_P (disp))
11813 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11814 fputs ("ds:", file);
11815 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11818 output_pic_addr_const (file, disp, 0);
11820 output_addr_const (file, disp);
11824 if (ASSEMBLER_DIALECT == ASM_ATT)
11829 output_pic_addr_const (file, disp, 0);
11830 else if (GET_CODE (disp) == LABEL_REF)
11831 output_asm_label (disp);
11833 output_addr_const (file, disp);
11838 print_reg (base, 0, file);
11842 print_reg (index, 0, file);
11844 fprintf (file, ",%d", scale);
11850 rtx offset = NULL_RTX;
11854 /* Pull out the offset of a symbol; print any symbol itself. */
11855 if (GET_CODE (disp) == CONST
11856 && GET_CODE (XEXP (disp, 0)) == PLUS
11857 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11859 offset = XEXP (XEXP (disp, 0), 1);
11860 disp = gen_rtx_CONST (VOIDmode,
11861 XEXP (XEXP (disp, 0), 0));
11865 output_pic_addr_const (file, disp, 0);
11866 else if (GET_CODE (disp) == LABEL_REF)
11867 output_asm_label (disp);
11868 else if (CONST_INT_P (disp))
11871 output_addr_const (file, disp);
11877 print_reg (base, 0, file);
11880 if (INTVAL (offset) >= 0)
11882 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11886 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11893 print_reg (index, 0, file);
11895 fprintf (file, "*%d", scale);
11903 output_addr_const_extra (FILE *file, rtx x)
11907 if (GET_CODE (x) != UNSPEC)
11910 op = XVECEXP (x, 0, 0);
11911 switch (XINT (x, 1))
11913 case UNSPEC_GOTTPOFF:
11914 output_addr_const (file, op);
11915 /* FIXME: This might be @TPOFF in Sun ld. */
11916 fputs ("@GOTTPOFF", file);
11919 output_addr_const (file, op);
11920 fputs ("@TPOFF", file);
11922 case UNSPEC_NTPOFF:
11923 output_addr_const (file, op);
11925 fputs ("@TPOFF", file);
11927 fputs ("@NTPOFF", file);
11929 case UNSPEC_DTPOFF:
11930 output_addr_const (file, op);
11931 fputs ("@DTPOFF", file);
11933 case UNSPEC_GOTNTPOFF:
11934 output_addr_const (file, op);
11936 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11937 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11939 fputs ("@GOTNTPOFF", file);
11941 case UNSPEC_INDNTPOFF:
11942 output_addr_const (file, op);
11943 fputs ("@INDNTPOFF", file);
11946 case UNSPEC_MACHOPIC_OFFSET:
11947 output_addr_const (file, op);
11949 machopic_output_function_base_name (file);
11960 /* Split one or more DImode RTL references into pairs of SImode
11961 references. The RTL can be REG, offsettable MEM, integer constant, or
11962 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11963 split and "num" is its length. lo_half and hi_half are output arrays
11964 that parallel "operands". */
11967 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11971 rtx op = operands[num];
11973 /* simplify_subreg refuse to split volatile memory addresses,
11974 but we still have to handle it. */
11977 lo_half[num] = adjust_address (op, SImode, 0);
11978 hi_half[num] = adjust_address (op, SImode, 4);
11982 lo_half[num] = simplify_gen_subreg (SImode, op,
11983 GET_MODE (op) == VOIDmode
11984 ? DImode : GET_MODE (op), 0);
11985 hi_half[num] = simplify_gen_subreg (SImode, op,
11986 GET_MODE (op) == VOIDmode
11987 ? DImode : GET_MODE (op), 4);
11991 /* Split one or more TImode RTL references into pairs of DImode
11992 references. The RTL can be REG, offsettable MEM, integer constant, or
11993 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11994 split and "num" is its length. lo_half and hi_half are output arrays
11995 that parallel "operands". */
11998 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12002 rtx op = operands[num];
12004 /* simplify_subreg refuse to split volatile memory addresses, but we
12005 still have to handle it. */
12008 lo_half[num] = adjust_address (op, DImode, 0);
12009 hi_half[num] = adjust_address (op, DImode, 8);
12013 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
12014 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
12019 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
12020 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
12021 is the expression of the binary operation. The output may either be
12022 emitted here, or returned to the caller, like all output_* functions.
12024 There is no guarantee that the operands are the same mode, as they
12025 might be within FLOAT or FLOAT_EXTEND expressions. */
12027 #ifndef SYSV386_COMPAT
12028 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
12029 wants to fix the assemblers because that causes incompatibility
12030 with gcc. No-one wants to fix gcc because that causes
12031 incompatibility with assemblers... You can use the option of
12032 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
12033 #define SYSV386_COMPAT 1
12037 output_387_binary_op (rtx insn, rtx *operands)
12039 static char buf[40];
12042 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
12044 #ifdef ENABLE_CHECKING
12045 /* Even if we do not want to check the inputs, this documents input
12046 constraints. Which helps in understanding the following code. */
12047 if (STACK_REG_P (operands[0])
12048 && ((REG_P (operands[1])
12049 && REGNO (operands[0]) == REGNO (operands[1])
12050 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
12051 || (REG_P (operands[2])
12052 && REGNO (operands[0]) == REGNO (operands[2])
12053 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
12054 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
12057 gcc_assert (is_sse);
12060 switch (GET_CODE (operands[3]))
12063 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12064 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12072 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12073 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12081 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12082 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12090 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12091 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12099 gcc_unreachable ();
12106 strcpy (buf, ssep);
12107 if (GET_MODE (operands[0]) == SFmode)
12108 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
12110 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
12114 strcpy (buf, ssep + 1);
12115 if (GET_MODE (operands[0]) == SFmode)
12116 strcat (buf, "ss\t{%2, %0|%0, %2}");
12118 strcat (buf, "sd\t{%2, %0|%0, %2}");
12124 switch (GET_CODE (operands[3]))
12128 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
12130 rtx temp = operands[2];
12131 operands[2] = operands[1];
12132 operands[1] = temp;
12135 /* know operands[0] == operands[1]. */
12137 if (MEM_P (operands[2]))
12143 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12145 if (STACK_TOP_P (operands[0]))
12146 /* How is it that we are storing to a dead operand[2]?
12147 Well, presumably operands[1] is dead too. We can't
12148 store the result to st(0) as st(0) gets popped on this
12149 instruction. Instead store to operands[2] (which I
12150 think has to be st(1)). st(1) will be popped later.
12151 gcc <= 2.8.1 didn't have this check and generated
12152 assembly code that the Unixware assembler rejected. */
12153 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12155 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12159 if (STACK_TOP_P (operands[0]))
12160 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12162 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12167 if (MEM_P (operands[1]))
12173 if (MEM_P (operands[2]))
12179 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12182 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12183 derived assemblers, confusingly reverse the direction of
12184 the operation for fsub{r} and fdiv{r} when the
12185 destination register is not st(0). The Intel assembler
12186 doesn't have this brain damage. Read !SYSV386_COMPAT to
12187 figure out what the hardware really does. */
12188 if (STACK_TOP_P (operands[0]))
12189 p = "{p\t%0, %2|rp\t%2, %0}";
12191 p = "{rp\t%2, %0|p\t%0, %2}";
12193 if (STACK_TOP_P (operands[0]))
12194 /* As above for fmul/fadd, we can't store to st(0). */
12195 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12197 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12202 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
12205 if (STACK_TOP_P (operands[0]))
12206 p = "{rp\t%0, %1|p\t%1, %0}";
12208 p = "{p\t%1, %0|rp\t%0, %1}";
12210 if (STACK_TOP_P (operands[0]))
12211 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
12213 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
12218 if (STACK_TOP_P (operands[0]))
12220 if (STACK_TOP_P (operands[1]))
12221 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12223 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
12226 else if (STACK_TOP_P (operands[1]))
12229 p = "{\t%1, %0|r\t%0, %1}";
12231 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
12237 p = "{r\t%2, %0|\t%0, %2}";
12239 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12245 gcc_unreachable ();
12252 /* Return needed mode for entity in optimize_mode_switching pass. */
12255 ix86_mode_needed (int entity, rtx insn)
12257 enum attr_i387_cw mode;
12259 /* The mode UNINITIALIZED is used to store control word after a
12260 function call or ASM pattern. The mode ANY specify that function
12261 has no requirements on the control word and make no changes in the
12262 bits we are interested in. */
12265 || (NONJUMP_INSN_P (insn)
12266 && (asm_noperands (PATTERN (insn)) >= 0
12267 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12268 return I387_CW_UNINITIALIZED;
12270 if (recog_memoized (insn) < 0)
12271 return I387_CW_ANY;
12273 mode = get_attr_i387_cw (insn);
12278 if (mode == I387_CW_TRUNC)
12283 if (mode == I387_CW_FLOOR)
12288 if (mode == I387_CW_CEIL)
12293 if (mode == I387_CW_MASK_PM)
12298 gcc_unreachable ();
12301 return I387_CW_ANY;
12304 /* Output code to initialize control word copies used by trunc?f?i and
12305 rounding patterns. CURRENT_MODE is set to current control word,
12306 while NEW_MODE is set to new control word. */
12309 emit_i387_cw_initialization (int mode)
12311 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12314 enum ix86_stack_slot slot;
12316 rtx reg = gen_reg_rtx (HImode);
12318 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12319 emit_move_insn (reg, copy_rtx (stored_mode));
12321 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12322 || optimize_function_for_size_p (cfun))
12326 case I387_CW_TRUNC:
12327 /* round toward zero (truncate) */
12328 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12329 slot = SLOT_CW_TRUNC;
12332 case I387_CW_FLOOR:
12333 /* round down toward -oo */
12334 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12335 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12336 slot = SLOT_CW_FLOOR;
12340 /* round up toward +oo */
12341 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12342 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12343 slot = SLOT_CW_CEIL;
12346 case I387_CW_MASK_PM:
12347 /* mask precision exception for nearbyint() */
12348 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12349 slot = SLOT_CW_MASK_PM;
12353 gcc_unreachable ();
12360 case I387_CW_TRUNC:
12361 /* round toward zero (truncate) */
12362 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12363 slot = SLOT_CW_TRUNC;
12366 case I387_CW_FLOOR:
12367 /* round down toward -oo */
12368 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12369 slot = SLOT_CW_FLOOR;
12373 /* round up toward +oo */
12374 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12375 slot = SLOT_CW_CEIL;
12378 case I387_CW_MASK_PM:
12379 /* mask precision exception for nearbyint() */
12380 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12381 slot = SLOT_CW_MASK_PM;
12385 gcc_unreachable ();
12389 gcc_assert (slot < MAX_386_STACK_LOCALS);
12391 new_mode = assign_386_stack_local (HImode, slot);
12392 emit_move_insn (new_mode, reg);
12395 /* Output code for INSN to convert a float to a signed int. OPERANDS
12396 are the insn operands. The output may be [HSD]Imode and the input
12397 operand may be [SDX]Fmode. */
12400 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12402 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12403 int dimode_p = GET_MODE (operands[0]) == DImode;
12404 int round_mode = get_attr_i387_cw (insn);
12406 /* Jump through a hoop or two for DImode, since the hardware has no
12407 non-popping instruction. We used to do this a different way, but
12408 that was somewhat fragile and broke with post-reload splitters. */
12409 if ((dimode_p || fisttp) && !stack_top_dies)
12410 output_asm_insn ("fld\t%y1", operands);
12412 gcc_assert (STACK_TOP_P (operands[1]));
12413 gcc_assert (MEM_P (operands[0]));
12414 gcc_assert (GET_MODE (operands[1]) != TFmode);
12417 output_asm_insn ("fisttp%Z0\t%0", operands);
12420 if (round_mode != I387_CW_ANY)
12421 output_asm_insn ("fldcw\t%3", operands);
12422 if (stack_top_dies || dimode_p)
12423 output_asm_insn ("fistp%Z0\t%0", operands);
12425 output_asm_insn ("fist%Z0\t%0", operands);
12426 if (round_mode != I387_CW_ANY)
12427 output_asm_insn ("fldcw\t%2", operands);
12433 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12434 have the values zero or one, indicates the ffreep insn's operand
12435 from the OPERANDS array. */
12437 static const char *
12438 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12440 if (TARGET_USE_FFREEP)
12441 #if HAVE_AS_IX86_FFREEP
12442 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12445 static char retval[] = ".word\t0xc_df";
12446 int regno = REGNO (operands[opno]);
12448 gcc_assert (FP_REGNO_P (regno));
12450 retval[9] = '0' + (regno - FIRST_STACK_REG);
12455 return opno ? "fstp\t%y1" : "fstp\t%y0";
12459 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12460 should be used. UNORDERED_P is true when fucom should be used. */
12463 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12465 int stack_top_dies;
12466 rtx cmp_op0, cmp_op1;
12467 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12471 cmp_op0 = operands[0];
12472 cmp_op1 = operands[1];
12476 cmp_op0 = operands[1];
12477 cmp_op1 = operands[2];
12482 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12483 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12484 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12485 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12487 if (GET_MODE (operands[0]) == SFmode)
12489 return &ucomiss[TARGET_AVX ? 0 : 1];
12491 return &comiss[TARGET_AVX ? 0 : 1];
12494 return &ucomisd[TARGET_AVX ? 0 : 1];
12496 return &comisd[TARGET_AVX ? 0 : 1];
12499 gcc_assert (STACK_TOP_P (cmp_op0));
12501 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12503 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12505 if (stack_top_dies)
12507 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12508 return output_387_ffreep (operands, 1);
12511 return "ftst\n\tfnstsw\t%0";
12514 if (STACK_REG_P (cmp_op1)
12516 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12517 && REGNO (cmp_op1) != FIRST_STACK_REG)
12519 /* If both the top of the 387 stack dies, and the other operand
12520 is also a stack register that dies, then this must be a
12521 `fcompp' float compare */
12525 /* There is no double popping fcomi variant. Fortunately,
12526 eflags is immune from the fstp's cc clobbering. */
12528 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12530 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12531 return output_387_ffreep (operands, 0);
12536 return "fucompp\n\tfnstsw\t%0";
12538 return "fcompp\n\tfnstsw\t%0";
12543 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12545 static const char * const alt[16] =
12547 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12548 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12549 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12550 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12552 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12553 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12557 "fcomi\t{%y1, %0|%0, %y1}",
12558 "fcomip\t{%y1, %0|%0, %y1}",
12559 "fucomi\t{%y1, %0|%0, %y1}",
12560 "fucomip\t{%y1, %0|%0, %y1}",
12571 mask = eflags_p << 3;
12572 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12573 mask |= unordered_p << 1;
12574 mask |= stack_top_dies;
12576 gcc_assert (mask < 16);
12585 ix86_output_addr_vec_elt (FILE *file, int value)
12587 const char *directive = ASM_LONG;
12591 directive = ASM_QUAD;
12593 gcc_assert (!TARGET_64BIT);
12596 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12600 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12602 const char *directive = ASM_LONG;
12605 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12606 directive = ASM_QUAD;
12608 gcc_assert (!TARGET_64BIT);
12610 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12611 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12612 fprintf (file, "%s%s%d-%s%d\n",
12613 directive, LPREFIX, value, LPREFIX, rel);
12614 else if (HAVE_AS_GOTOFF_IN_DATA)
12615 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12617 else if (TARGET_MACHO)
12619 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12620 machopic_output_function_base_name (file);
12621 fprintf(file, "\n");
12625 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12626 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12629 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12633 ix86_expand_clear (rtx dest)
12637 /* We play register width games, which are only valid after reload. */
12638 gcc_assert (reload_completed);
12640 /* Avoid HImode and its attendant prefix byte. */
12641 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12642 dest = gen_rtx_REG (SImode, REGNO (dest));
12643 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12645 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12646 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12648 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12649 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12655 /* X is an unchanging MEM. If it is a constant pool reference, return
12656 the constant pool rtx, else NULL. */
12659 maybe_get_pool_constant (rtx x)
12661 x = ix86_delegitimize_address (XEXP (x, 0));
12663 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12664 return get_pool_constant (x);
12670 ix86_expand_move (enum machine_mode mode, rtx operands[])
12673 enum tls_model model;
12678 if (GET_CODE (op1) == SYMBOL_REF)
12680 model = SYMBOL_REF_TLS_MODEL (op1);
12683 op1 = legitimize_tls_address (op1, model, true);
12684 op1 = force_operand (op1, op0);
12688 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12689 && SYMBOL_REF_DLLIMPORT_P (op1))
12690 op1 = legitimize_dllimport_symbol (op1, false);
12692 else if (GET_CODE (op1) == CONST
12693 && GET_CODE (XEXP (op1, 0)) == PLUS
12694 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12696 rtx addend = XEXP (XEXP (op1, 0), 1);
12697 rtx symbol = XEXP (XEXP (op1, 0), 0);
12700 model = SYMBOL_REF_TLS_MODEL (symbol);
12702 tmp = legitimize_tls_address (symbol, model, true);
12703 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12704 && SYMBOL_REF_DLLIMPORT_P (symbol))
12705 tmp = legitimize_dllimport_symbol (symbol, true);
12709 tmp = force_operand (tmp, NULL);
12710 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12711 op0, 1, OPTAB_DIRECT);
12717 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12719 if (TARGET_MACHO && !TARGET_64BIT)
12724 rtx temp = ((reload_in_progress
12725 || ((op0 && REG_P (op0))
12727 ? op0 : gen_reg_rtx (Pmode));
12728 op1 = machopic_indirect_data_reference (op1, temp);
12729 op1 = machopic_legitimize_pic_address (op1, mode,
12730 temp == op1 ? 0 : temp);
12732 else if (MACHOPIC_INDIRECT)
12733 op1 = machopic_indirect_data_reference (op1, 0);
12741 op1 = force_reg (Pmode, op1);
12742 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12744 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12745 op1 = legitimize_pic_address (op1, reg);
12754 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12755 || !push_operand (op0, mode))
12757 op1 = force_reg (mode, op1);
12759 if (push_operand (op0, mode)
12760 && ! general_no_elim_operand (op1, mode))
12761 op1 = copy_to_mode_reg (mode, op1);
12763 /* Force large constants in 64bit compilation into register
12764 to get them CSEed. */
12765 if (can_create_pseudo_p ()
12766 && (mode == DImode) && TARGET_64BIT
12767 && immediate_operand (op1, mode)
12768 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12769 && !register_operand (op0, mode)
12771 op1 = copy_to_mode_reg (mode, op1);
12773 if (can_create_pseudo_p ()
12774 && FLOAT_MODE_P (mode)
12775 && GET_CODE (op1) == CONST_DOUBLE)
12777 /* If we are loading a floating point constant to a register,
12778 force the value to memory now, since we'll get better code
12779 out the back end. */
12781 op1 = validize_mem (force_const_mem (mode, op1));
12782 if (!register_operand (op0, mode))
12784 rtx temp = gen_reg_rtx (mode);
12785 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12786 emit_move_insn (op0, temp);
12792 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12796 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12798 rtx op0 = operands[0], op1 = operands[1];
12799 unsigned int align = GET_MODE_ALIGNMENT (mode);
12801 /* Force constants other than zero into memory. We do not know how
12802 the instructions used to build constants modify the upper 64 bits
12803 of the register, once we have that information we may be able
12804 to handle some of them more efficiently. */
12805 if (can_create_pseudo_p ()
12806 && register_operand (op0, mode)
12807 && (CONSTANT_P (op1)
12808 || (GET_CODE (op1) == SUBREG
12809 && CONSTANT_P (SUBREG_REG (op1))))
12810 && standard_sse_constant_p (op1) <= 0)
12811 op1 = validize_mem (force_const_mem (mode, op1));
12813 /* We need to check memory alignment for SSE mode since attribute
12814 can make operands unaligned. */
12815 if (can_create_pseudo_p ()
12816 && SSE_REG_MODE_P (mode)
12817 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12818 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12822 /* ix86_expand_vector_move_misalign() does not like constants ... */
12823 if (CONSTANT_P (op1)
12824 || (GET_CODE (op1) == SUBREG
12825 && CONSTANT_P (SUBREG_REG (op1))))
12826 op1 = validize_mem (force_const_mem (mode, op1));
12828 /* ... nor both arguments in memory. */
12829 if (!register_operand (op0, mode)
12830 && !register_operand (op1, mode))
12831 op1 = force_reg (mode, op1);
12833 tmp[0] = op0; tmp[1] = op1;
12834 ix86_expand_vector_move_misalign (mode, tmp);
12838 /* Make operand1 a register if it isn't already. */
12839 if (can_create_pseudo_p ()
12840 && !register_operand (op0, mode)
12841 && !register_operand (op1, mode))
12843 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12847 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12850 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12851 straight to ix86_expand_vector_move. */
12852 /* Code generation for scalar reg-reg moves of single and double precision data:
12853 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12857 if (x86_sse_partial_reg_dependency == true)
12862 Code generation for scalar loads of double precision data:
12863 if (x86_sse_split_regs == true)
12864 movlpd mem, reg (gas syntax)
12868 Code generation for unaligned packed loads of single precision data
12869 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12870 if (x86_sse_unaligned_move_optimal)
12873 if (x86_sse_partial_reg_dependency == true)
12885 Code generation for unaligned packed loads of double precision data
12886 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12887 if (x86_sse_unaligned_move_optimal)
12890 if (x86_sse_split_regs == true)
12903 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12912 switch (GET_MODE_CLASS (mode))
12914 case MODE_VECTOR_INT:
12916 switch (GET_MODE_SIZE (mode))
12919 op0 = gen_lowpart (V16QImode, op0);
12920 op1 = gen_lowpart (V16QImode, op1);
12921 emit_insn (gen_avx_movdqu (op0, op1));
12924 op0 = gen_lowpart (V32QImode, op0);
12925 op1 = gen_lowpart (V32QImode, op1);
12926 emit_insn (gen_avx_movdqu256 (op0, op1));
12929 gcc_unreachable ();
12932 case MODE_VECTOR_FLOAT:
12933 op0 = gen_lowpart (mode, op0);
12934 op1 = gen_lowpart (mode, op1);
12939 emit_insn (gen_avx_movups (op0, op1));
12942 emit_insn (gen_avx_movups256 (op0, op1));
12945 emit_insn (gen_avx_movupd (op0, op1));
12948 emit_insn (gen_avx_movupd256 (op0, op1));
12951 gcc_unreachable ();
12956 gcc_unreachable ();
12964 /* If we're optimizing for size, movups is the smallest. */
12965 if (optimize_insn_for_size_p ())
12967 op0 = gen_lowpart (V4SFmode, op0);
12968 op1 = gen_lowpart (V4SFmode, op1);
12969 emit_insn (gen_sse_movups (op0, op1));
12973 /* ??? If we have typed data, then it would appear that using
12974 movdqu is the only way to get unaligned data loaded with
12976 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12978 op0 = gen_lowpart (V16QImode, op0);
12979 op1 = gen_lowpart (V16QImode, op1);
12980 emit_insn (gen_sse2_movdqu (op0, op1));
12984 if (TARGET_SSE2 && mode == V2DFmode)
12988 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12990 op0 = gen_lowpart (V2DFmode, op0);
12991 op1 = gen_lowpart (V2DFmode, op1);
12992 emit_insn (gen_sse2_movupd (op0, op1));
12996 /* When SSE registers are split into halves, we can avoid
12997 writing to the top half twice. */
12998 if (TARGET_SSE_SPLIT_REGS)
13000 emit_clobber (op0);
13005 /* ??? Not sure about the best option for the Intel chips.
13006 The following would seem to satisfy; the register is
13007 entirely cleared, breaking the dependency chain. We
13008 then store to the upper half, with a dependency depth
13009 of one. A rumor has it that Intel recommends two movsd
13010 followed by an unpacklpd, but this is unconfirmed. And
13011 given that the dependency depth of the unpacklpd would
13012 still be one, I'm not sure why this would be better. */
13013 zero = CONST0_RTX (V2DFmode);
13016 m = adjust_address (op1, DFmode, 0);
13017 emit_insn (gen_sse2_loadlpd (op0, zero, m));
13018 m = adjust_address (op1, DFmode, 8);
13019 emit_insn (gen_sse2_loadhpd (op0, op0, m));
13023 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13025 op0 = gen_lowpart (V4SFmode, op0);
13026 op1 = gen_lowpart (V4SFmode, op1);
13027 emit_insn (gen_sse_movups (op0, op1));
13031 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
13032 emit_move_insn (op0, CONST0_RTX (mode));
13034 emit_clobber (op0);
13036 if (mode != V4SFmode)
13037 op0 = gen_lowpart (V4SFmode, op0);
13038 m = adjust_address (op1, V2SFmode, 0);
13039 emit_insn (gen_sse_loadlps (op0, op0, m));
13040 m = adjust_address (op1, V2SFmode, 8);
13041 emit_insn (gen_sse_loadhps (op0, op0, m));
13044 else if (MEM_P (op0))
13046 /* If we're optimizing for size, movups is the smallest. */
13047 if (optimize_insn_for_size_p ())
13049 op0 = gen_lowpart (V4SFmode, op0);
13050 op1 = gen_lowpart (V4SFmode, op1);
13051 emit_insn (gen_sse_movups (op0, op1));
13055 /* ??? Similar to above, only less clear because of quote
13056 typeless stores unquote. */
13057 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
13058 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13060 op0 = gen_lowpart (V16QImode, op0);
13061 op1 = gen_lowpart (V16QImode, op1);
13062 emit_insn (gen_sse2_movdqu (op0, op1));
13066 if (TARGET_SSE2 && mode == V2DFmode)
13068 m = adjust_address (op0, DFmode, 0);
13069 emit_insn (gen_sse2_storelpd (m, op1));
13070 m = adjust_address (op0, DFmode, 8);
13071 emit_insn (gen_sse2_storehpd (m, op1));
13075 if (mode != V4SFmode)
13076 op1 = gen_lowpart (V4SFmode, op1);
13077 m = adjust_address (op0, V2SFmode, 0);
13078 emit_insn (gen_sse_storelps (m, op1));
13079 m = adjust_address (op0, V2SFmode, 8);
13080 emit_insn (gen_sse_storehps (m, op1));
13084 gcc_unreachable ();
13087 /* Expand a push in MODE. This is some mode for which we do not support
13088 proper push instructions, at least from the registers that we expect
13089 the value to live in. */
13092 ix86_expand_push (enum machine_mode mode, rtx x)
13096 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
13097 GEN_INT (-GET_MODE_SIZE (mode)),
13098 stack_pointer_rtx, 1, OPTAB_DIRECT);
13099 if (tmp != stack_pointer_rtx)
13100 emit_move_insn (stack_pointer_rtx, tmp);
13102 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
13104 /* When we push an operand onto stack, it has to be aligned at least
13105 at the function argument boundary. However since we don't have
13106 the argument type, we can't determine the actual argument
13108 emit_move_insn (tmp, x);
13111 /* Helper function of ix86_fixup_binary_operands to canonicalize
13112 operand order. Returns true if the operands should be swapped. */
13115 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
13118 rtx dst = operands[0];
13119 rtx src1 = operands[1];
13120 rtx src2 = operands[2];
13122 /* If the operation is not commutative, we can't do anything. */
13123 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
13126 /* Highest priority is that src1 should match dst. */
13127 if (rtx_equal_p (dst, src1))
13129 if (rtx_equal_p (dst, src2))
13132 /* Next highest priority is that immediate constants come second. */
13133 if (immediate_operand (src2, mode))
13135 if (immediate_operand (src1, mode))
13138 /* Lowest priority is that memory references should come second. */
13148 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
13149 destination to use for the operation. If different from the true
13150 destination in operands[0], a copy operation will be required. */
13153 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
13156 rtx dst = operands[0];
13157 rtx src1 = operands[1];
13158 rtx src2 = operands[2];
13160 /* Canonicalize operand order. */
13161 if (ix86_swap_binary_operands_p (code, mode, operands))
13165 /* It is invalid to swap operands of different modes. */
13166 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
13173 /* Both source operands cannot be in memory. */
13174 if (MEM_P (src1) && MEM_P (src2))
13176 /* Optimization: Only read from memory once. */
13177 if (rtx_equal_p (src1, src2))
13179 src2 = force_reg (mode, src2);
13183 src2 = force_reg (mode, src2);
13186 /* If the destination is memory, and we do not have matching source
13187 operands, do things in registers. */
13188 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13189 dst = gen_reg_rtx (mode);
13191 /* Source 1 cannot be a constant. */
13192 if (CONSTANT_P (src1))
13193 src1 = force_reg (mode, src1);
13195 /* Source 1 cannot be a non-matching memory. */
13196 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13197 src1 = force_reg (mode, src1);
13199 operands[1] = src1;
13200 operands[2] = src2;
13204 /* Similarly, but assume that the destination has already been
13205 set up properly. */
13208 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
13209 enum machine_mode mode, rtx operands[])
13211 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
13212 gcc_assert (dst == operands[0]);
13215 /* Attempt to expand a binary operator. Make the expansion closer to the
13216 actual machine, then just general_operand, which will allow 3 separate
13217 memory references (one output, two input) in a single insn. */
13220 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
13223 rtx src1, src2, dst, op, clob;
13225 dst = ix86_fixup_binary_operands (code, mode, operands);
13226 src1 = operands[1];
13227 src2 = operands[2];
13229 /* Emit the instruction. */
13231 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
13232 if (reload_in_progress)
13234 /* Reload doesn't know about the flags register, and doesn't know that
13235 it doesn't want to clobber it. We can only do this with PLUS. */
13236 gcc_assert (code == PLUS);
13241 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13242 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13245 /* Fix up the destination if needed. */
13246 if (dst != operands[0])
13247 emit_move_insn (operands[0], dst);
13250 /* Return TRUE or FALSE depending on whether the binary operator meets the
13251 appropriate constraints. */
13254 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13257 rtx dst = operands[0];
13258 rtx src1 = operands[1];
13259 rtx src2 = operands[2];
13261 /* Both source operands cannot be in memory. */
13262 if (MEM_P (src1) && MEM_P (src2))
13265 /* Canonicalize operand order for commutative operators. */
13266 if (ix86_swap_binary_operands_p (code, mode, operands))
13273 /* If the destination is memory, we must have a matching source operand. */
13274 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13277 /* Source 1 cannot be a constant. */
13278 if (CONSTANT_P (src1))
13281 /* Source 1 cannot be a non-matching memory. */
13282 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13288 /* Attempt to expand a unary operator. Make the expansion closer to the
13289 actual machine, then just general_operand, which will allow 2 separate
13290 memory references (one output, one input) in a single insn. */
13293 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13296 int matching_memory;
13297 rtx src, dst, op, clob;
13302 /* If the destination is memory, and we do not have matching source
13303 operands, do things in registers. */
13304 matching_memory = 0;
13307 if (rtx_equal_p (dst, src))
13308 matching_memory = 1;
13310 dst = gen_reg_rtx (mode);
13313 /* When source operand is memory, destination must match. */
13314 if (MEM_P (src) && !matching_memory)
13315 src = force_reg (mode, src);
13317 /* Emit the instruction. */
13319 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13320 if (reload_in_progress || code == NOT)
13322 /* Reload doesn't know about the flags register, and doesn't know that
13323 it doesn't want to clobber it. */
13324 gcc_assert (code == NOT);
13329 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13330 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13333 /* Fix up the destination if needed. */
13334 if (dst != operands[0])
13335 emit_move_insn (operands[0], dst);
13338 #define LEA_SEARCH_THRESHOLD 12
13340 /* Search backward for non-agu definition of register number REGNO1
13341 or register number REGNO2 in INSN's basic block until
13342 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13343 2. Reach BB boundary, or
13344 3. Reach agu definition.
13345 Returns the distance between the non-agu definition point and INSN.
13346 If no definition point, returns -1. */
13349 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13352 basic_block bb = BLOCK_FOR_INSN (insn);
13355 enum attr_type insn_type;
13357 if (insn != BB_HEAD (bb))
13359 rtx prev = PREV_INSN (insn);
13360 while (prev && distance < LEA_SEARCH_THRESHOLD)
13365 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13366 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13367 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13368 && (regno1 == DF_REF_REGNO (*def_rec)
13369 || regno2 == DF_REF_REGNO (*def_rec)))
13371 insn_type = get_attr_type (prev);
13372 if (insn_type != TYPE_LEA)
13376 if (prev == BB_HEAD (bb))
13378 prev = PREV_INSN (prev);
13382 if (distance < LEA_SEARCH_THRESHOLD)
13386 bool simple_loop = false;
13388 FOR_EACH_EDGE (e, ei, bb->preds)
13391 simple_loop = true;
13397 rtx prev = BB_END (bb);
13400 && distance < LEA_SEARCH_THRESHOLD)
13405 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13406 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13407 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13408 && (regno1 == DF_REF_REGNO (*def_rec)
13409 || regno2 == DF_REF_REGNO (*def_rec)))
13411 insn_type = get_attr_type (prev);
13412 if (insn_type != TYPE_LEA)
13416 prev = PREV_INSN (prev);
13424 /* get_attr_type may modify recog data. We want to make sure
13425 that recog data is valid for instruction INSN, on which
13426 distance_non_agu_define is called. INSN is unchanged here. */
13427 extract_insn_cached (insn);
13431 /* Return the distance between INSN and the next insn that uses
13432 register number REGNO0 in memory address. Return -1 if no such
13433 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13436 distance_agu_use (unsigned int regno0, rtx insn)
13438 basic_block bb = BLOCK_FOR_INSN (insn);
13443 if (insn != BB_END (bb))
13445 rtx next = NEXT_INSN (insn);
13446 while (next && distance < LEA_SEARCH_THRESHOLD)
13452 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13453 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13454 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13455 && regno0 == DF_REF_REGNO (*use_rec))
13457 /* Return DISTANCE if OP0 is used in memory
13458 address in NEXT. */
13462 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13463 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13464 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13465 && regno0 == DF_REF_REGNO (*def_rec))
13467 /* Return -1 if OP0 is set in NEXT. */
13471 if (next == BB_END (bb))
13473 next = NEXT_INSN (next);
13477 if (distance < LEA_SEARCH_THRESHOLD)
13481 bool simple_loop = false;
13483 FOR_EACH_EDGE (e, ei, bb->succs)
13486 simple_loop = true;
13492 rtx next = BB_HEAD (bb);
13495 && distance < LEA_SEARCH_THRESHOLD)
13501 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13502 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13503 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13504 && regno0 == DF_REF_REGNO (*use_rec))
13506 /* Return DISTANCE if OP0 is used in memory
13507 address in NEXT. */
13511 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13512 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13513 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13514 && regno0 == DF_REF_REGNO (*def_rec))
13516 /* Return -1 if OP0 is set in NEXT. */
13521 next = NEXT_INSN (next);
13529 /* Define this macro to tune LEA priority vs ADD, it take effect when
13530 there is a dilemma of choicing LEA or ADD
13531 Negative value: ADD is more preferred than LEA
13533 Positive value: LEA is more preferred than ADD*/
13534 #define IX86_LEA_PRIORITY 2
13536 /* Return true if it is ok to optimize an ADD operation to LEA
13537 operation to avoid flag register consumation. For the processors
13538 like ATOM, if the destination register of LEA holds an actual
13539 address which will be used soon, LEA is better and otherwise ADD
13543 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13544 rtx insn, rtx operands[])
13546 unsigned int regno0 = true_regnum (operands[0]);
13547 unsigned int regno1 = true_regnum (operands[1]);
13548 unsigned int regno2;
13550 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13551 return regno0 != regno1;
13553 regno2 = true_regnum (operands[2]);
13555 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13556 if (regno0 != regno1 && regno0 != regno2)
13560 int dist_define, dist_use;
13561 dist_define = distance_non_agu_define (regno1, regno2, insn);
13562 if (dist_define <= 0)
13565 /* If this insn has both backward non-agu dependence and forward
13566 agu dependence, the one with short distance take effect. */
13567 dist_use = distance_agu_use (regno0, insn);
13569 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13576 /* Return true if destination reg of SET_BODY is shift count of
13580 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13586 /* Retrieve destination of SET_BODY. */
13587 switch (GET_CODE (set_body))
13590 set_dest = SET_DEST (set_body);
13591 if (!set_dest || !REG_P (set_dest))
13595 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13596 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13604 /* Retrieve shift count of USE_BODY. */
13605 switch (GET_CODE (use_body))
13608 shift_rtx = XEXP (use_body, 1);
13611 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13612 if (ix86_dep_by_shift_count_body (set_body,
13613 XVECEXP (use_body, 0, i)))
13621 && (GET_CODE (shift_rtx) == ASHIFT
13622 || GET_CODE (shift_rtx) == LSHIFTRT
13623 || GET_CODE (shift_rtx) == ASHIFTRT
13624 || GET_CODE (shift_rtx) == ROTATE
13625 || GET_CODE (shift_rtx) == ROTATERT))
13627 rtx shift_count = XEXP (shift_rtx, 1);
13629 /* Return true if shift count is dest of SET_BODY. */
13630 if (REG_P (shift_count)
13631 && true_regnum (set_dest) == true_regnum (shift_count))
13638 /* Return true if destination reg of SET_INSN is shift count of
13642 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13644 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13645 PATTERN (use_insn));
13648 /* Return TRUE or FALSE depending on whether the unary operator meets the
13649 appropriate constraints. */
13652 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13653 enum machine_mode mode ATTRIBUTE_UNUSED,
13654 rtx operands[2] ATTRIBUTE_UNUSED)
13656 /* If one of operands is memory, source and destination must match. */
13657 if ((MEM_P (operands[0])
13658 || MEM_P (operands[1]))
13659 && ! rtx_equal_p (operands[0], operands[1]))
13664 /* Post-reload splitter for converting an SF or DFmode value in an
13665 SSE register into an unsigned SImode. */
13668 ix86_split_convert_uns_si_sse (rtx operands[])
13670 enum machine_mode vecmode;
13671 rtx value, large, zero_or_two31, input, two31, x;
13673 large = operands[1];
13674 zero_or_two31 = operands[2];
13675 input = operands[3];
13676 two31 = operands[4];
13677 vecmode = GET_MODE (large);
13678 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13680 /* Load up the value into the low element. We must ensure that the other
13681 elements are valid floats -- zero is the easiest such value. */
13684 if (vecmode == V4SFmode)
13685 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13687 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13691 input = gen_rtx_REG (vecmode, REGNO (input));
13692 emit_move_insn (value, CONST0_RTX (vecmode));
13693 if (vecmode == V4SFmode)
13694 emit_insn (gen_sse_movss (value, value, input));
13696 emit_insn (gen_sse2_movsd (value, value, input));
13699 emit_move_insn (large, two31);
13700 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13702 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13703 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13705 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13706 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13708 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13709 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13711 large = gen_rtx_REG (V4SImode, REGNO (large));
13712 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13714 x = gen_rtx_REG (V4SImode, REGNO (value));
13715 if (vecmode == V4SFmode)
13716 emit_insn (gen_sse2_cvttps2dq (x, value));
13718 emit_insn (gen_sse2_cvttpd2dq (x, value));
13721 emit_insn (gen_xorv4si3 (value, value, large));
13724 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13725 Expects the 64-bit DImode to be supplied in a pair of integral
13726 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13727 -mfpmath=sse, !optimize_size only. */
13730 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13732 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13733 rtx int_xmm, fp_xmm;
13734 rtx biases, exponents;
13737 int_xmm = gen_reg_rtx (V4SImode);
13738 if (TARGET_INTER_UNIT_MOVES)
13739 emit_insn (gen_movdi_to_sse (int_xmm, input));
13740 else if (TARGET_SSE_SPLIT_REGS)
13742 emit_clobber (int_xmm);
13743 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13747 x = gen_reg_rtx (V2DImode);
13748 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13749 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13752 x = gen_rtx_CONST_VECTOR (V4SImode,
13753 gen_rtvec (4, GEN_INT (0x43300000UL),
13754 GEN_INT (0x45300000UL),
13755 const0_rtx, const0_rtx));
13756 exponents = validize_mem (force_const_mem (V4SImode, x));
13758 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13759 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13761 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13762 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13763 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13764 (0x1.0p84 + double(fp_value_hi_xmm)).
13765 Note these exponents differ by 32. */
13767 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13769 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13770 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13771 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13772 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13773 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13774 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13775 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13776 biases = validize_mem (force_const_mem (V2DFmode, biases));
13777 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13779 /* Add the upper and lower DFmode values together. */
13781 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13784 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13785 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13786 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13789 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13792 /* Not used, but eases macroization of patterns. */
13794 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13795 rtx input ATTRIBUTE_UNUSED)
13797 gcc_unreachable ();
13800 /* Convert an unsigned SImode value into a DFmode. Only currently used
13801 for SSE, but applicable anywhere. */
13804 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13806 REAL_VALUE_TYPE TWO31r;
13809 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13810 NULL, 1, OPTAB_DIRECT);
13812 fp = gen_reg_rtx (DFmode);
13813 emit_insn (gen_floatsidf2 (fp, x));
13815 real_ldexp (&TWO31r, &dconst1, 31);
13816 x = const_double_from_real_value (TWO31r, DFmode);
13818 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13820 emit_move_insn (target, x);
13823 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13824 32-bit mode; otherwise we have a direct convert instruction. */
13827 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13829 REAL_VALUE_TYPE TWO32r;
13830 rtx fp_lo, fp_hi, x;
13832 fp_lo = gen_reg_rtx (DFmode);
13833 fp_hi = gen_reg_rtx (DFmode);
13835 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13837 real_ldexp (&TWO32r, &dconst1, 32);
13838 x = const_double_from_real_value (TWO32r, DFmode);
13839 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13841 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13843 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13846 emit_move_insn (target, x);
13849 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13850 For x86_32, -mfpmath=sse, !optimize_size only. */
13852 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13854 REAL_VALUE_TYPE ONE16r;
13855 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13857 real_ldexp (&ONE16r, &dconst1, 16);
13858 x = const_double_from_real_value (ONE16r, SFmode);
13859 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13860 NULL, 0, OPTAB_DIRECT);
13861 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13862 NULL, 0, OPTAB_DIRECT);
13863 fp_hi = gen_reg_rtx (SFmode);
13864 fp_lo = gen_reg_rtx (SFmode);
13865 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13866 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13867 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13869 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13871 if (!rtx_equal_p (target, fp_hi))
13872 emit_move_insn (target, fp_hi);
13875 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13876 then replicate the value for all elements of the vector
13880 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13887 v = gen_rtvec (4, value, value, value, value);
13888 return gen_rtx_CONST_VECTOR (V4SImode, v);
13892 v = gen_rtvec (2, value, value);
13893 return gen_rtx_CONST_VECTOR (V2DImode, v);
13897 v = gen_rtvec (4, value, value, value, value);
13899 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13900 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13901 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13905 v = gen_rtvec (2, value, value);
13907 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13908 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13911 gcc_unreachable ();
13915 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13916 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13917 for an SSE register. If VECT is true, then replicate the mask for
13918 all elements of the vector register. If INVERT is true, then create
13919 a mask excluding the sign bit. */
13922 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13924 enum machine_mode vec_mode, imode;
13925 HOST_WIDE_INT hi, lo;
13930 /* Find the sign bit, sign extended to 2*HWI. */
13936 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13937 lo = 0x80000000, hi = lo < 0;
13943 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13944 if (HOST_BITS_PER_WIDE_INT >= 64)
13945 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13947 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13952 vec_mode = VOIDmode;
13953 if (HOST_BITS_PER_WIDE_INT >= 64)
13956 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13963 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13967 lo = ~lo, hi = ~hi;
13973 mask = immed_double_const (lo, hi, imode);
13975 vec = gen_rtvec (2, v, mask);
13976 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13977 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13984 gcc_unreachable ();
13988 lo = ~lo, hi = ~hi;
13990 /* Force this value into the low part of a fp vector constant. */
13991 mask = immed_double_const (lo, hi, imode);
13992 mask = gen_lowpart (mode, mask);
13994 if (vec_mode == VOIDmode)
13995 return force_reg (mode, mask);
13997 v = ix86_build_const_vector (mode, vect, mask);
13998 return force_reg (vec_mode, v);
14001 /* Generate code for floating point ABS or NEG. */
14004 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
14007 rtx mask, set, use, clob, dst, src;
14008 bool use_sse = false;
14009 bool vector_mode = VECTOR_MODE_P (mode);
14010 enum machine_mode elt_mode = mode;
14014 elt_mode = GET_MODE_INNER (mode);
14017 else if (mode == TFmode)
14019 else if (TARGET_SSE_MATH)
14020 use_sse = SSE_FLOAT_MODE_P (mode);
14022 /* NEG and ABS performed with SSE use bitwise mask operations.
14023 Create the appropriate mask now. */
14025 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
14034 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
14035 set = gen_rtx_SET (VOIDmode, dst, set);
14040 set = gen_rtx_fmt_e (code, mode, src);
14041 set = gen_rtx_SET (VOIDmode, dst, set);
14044 use = gen_rtx_USE (VOIDmode, mask);
14045 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14046 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14047 gen_rtvec (3, set, use, clob)));
14054 /* Expand a copysign operation. Special case operand 0 being a constant. */
14057 ix86_expand_copysign (rtx operands[])
14059 enum machine_mode mode;
14060 rtx dest, op0, op1, mask, nmask;
14062 dest = operands[0];
14066 mode = GET_MODE (dest);
14068 if (GET_CODE (op0) == CONST_DOUBLE)
14070 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
14072 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
14073 op0 = simplify_unary_operation (ABS, mode, op0, mode);
14075 if (mode == SFmode || mode == DFmode)
14077 enum machine_mode vmode;
14079 vmode = mode == SFmode ? V4SFmode : V2DFmode;
14081 if (op0 == CONST0_RTX (mode))
14082 op0 = CONST0_RTX (vmode);
14087 if (mode == SFmode)
14088 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
14089 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14091 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
14093 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
14096 else if (op0 != CONST0_RTX (mode))
14097 op0 = force_reg (mode, op0);
14099 mask = ix86_build_signbit_mask (mode, 0, 0);
14101 if (mode == SFmode)
14102 copysign_insn = gen_copysignsf3_const;
14103 else if (mode == DFmode)
14104 copysign_insn = gen_copysigndf3_const;
14106 copysign_insn = gen_copysigntf3_const;
14108 emit_insn (copysign_insn (dest, op0, op1, mask));
14112 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
14114 nmask = ix86_build_signbit_mask (mode, 0, 1);
14115 mask = ix86_build_signbit_mask (mode, 0, 0);
14117 if (mode == SFmode)
14118 copysign_insn = gen_copysignsf3_var;
14119 else if (mode == DFmode)
14120 copysign_insn = gen_copysigndf3_var;
14122 copysign_insn = gen_copysigntf3_var;
14124 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
14128 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
14129 be a constant, and so has already been expanded into a vector constant. */
14132 ix86_split_copysign_const (rtx operands[])
14134 enum machine_mode mode, vmode;
14135 rtx dest, op0, op1, mask, x;
14137 dest = operands[0];
14140 mask = operands[3];
14142 mode = GET_MODE (dest);
14143 vmode = GET_MODE (mask);
14145 dest = simplify_gen_subreg (vmode, dest, mode, 0);
14146 x = gen_rtx_AND (vmode, dest, mask);
14147 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14149 if (op0 != CONST0_RTX (vmode))
14151 x = gen_rtx_IOR (vmode, dest, op0);
14152 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14156 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
14157 so we have to do two masks. */
14160 ix86_split_copysign_var (rtx operands[])
14162 enum machine_mode mode, vmode;
14163 rtx dest, scratch, op0, op1, mask, nmask, x;
14165 dest = operands[0];
14166 scratch = operands[1];
14169 nmask = operands[4];
14170 mask = operands[5];
14172 mode = GET_MODE (dest);
14173 vmode = GET_MODE (mask);
14175 if (rtx_equal_p (op0, op1))
14177 /* Shouldn't happen often (it's useless, obviously), but when it does
14178 we'd generate incorrect code if we continue below. */
14179 emit_move_insn (dest, op0);
14183 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
14185 gcc_assert (REGNO (op1) == REGNO (scratch));
14187 x = gen_rtx_AND (vmode, scratch, mask);
14188 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14191 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14192 x = gen_rtx_NOT (vmode, dest);
14193 x = gen_rtx_AND (vmode, x, op0);
14194 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14198 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
14200 x = gen_rtx_AND (vmode, scratch, mask);
14202 else /* alternative 2,4 */
14204 gcc_assert (REGNO (mask) == REGNO (scratch));
14205 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
14206 x = gen_rtx_AND (vmode, scratch, op1);
14208 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14210 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
14212 dest = simplify_gen_subreg (vmode, op0, mode, 0);
14213 x = gen_rtx_AND (vmode, dest, nmask);
14215 else /* alternative 3,4 */
14217 gcc_assert (REGNO (nmask) == REGNO (dest));
14219 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14220 x = gen_rtx_AND (vmode, dest, op0);
14222 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14225 x = gen_rtx_IOR (vmode, dest, scratch);
14226 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14229 /* Return TRUE or FALSE depending on whether the first SET in INSN
14230 has source and destination with matching CC modes, and that the
14231 CC mode is at least as constrained as REQ_MODE. */
14234 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
14237 enum machine_mode set_mode;
14239 set = PATTERN (insn);
14240 if (GET_CODE (set) == PARALLEL)
14241 set = XVECEXP (set, 0, 0);
14242 gcc_assert (GET_CODE (set) == SET);
14243 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14245 set_mode = GET_MODE (SET_DEST (set));
14249 if (req_mode != CCNOmode
14250 && (req_mode != CCmode
14251 || XEXP (SET_SRC (set), 1) != const0_rtx))
14255 if (req_mode == CCGCmode)
14259 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14263 if (req_mode == CCZmode)
14274 gcc_unreachable ();
14277 return (GET_MODE (SET_SRC (set)) == set_mode);
14280 /* Generate insn patterns to do an integer compare of OPERANDS. */
14283 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14285 enum machine_mode cmpmode;
14288 cmpmode = SELECT_CC_MODE (code, op0, op1);
14289 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14291 /* This is very simple, but making the interface the same as in the
14292 FP case makes the rest of the code easier. */
14293 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14294 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14296 /* Return the test that should be put into the flags user, i.e.
14297 the bcc, scc, or cmov instruction. */
14298 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14301 /* Figure out whether to use ordered or unordered fp comparisons.
14302 Return the appropriate mode to use. */
14305 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14307 /* ??? In order to make all comparisons reversible, we do all comparisons
14308 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14309 all forms trapping and nontrapping comparisons, we can make inequality
14310 comparisons trapping again, since it results in better code when using
14311 FCOM based compares. */
14312 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14316 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14318 enum machine_mode mode = GET_MODE (op0);
14320 if (SCALAR_FLOAT_MODE_P (mode))
14322 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14323 return ix86_fp_compare_mode (code);
14328 /* Only zero flag is needed. */
14329 case EQ: /* ZF=0 */
14330 case NE: /* ZF!=0 */
14332 /* Codes needing carry flag. */
14333 case GEU: /* CF=0 */
14334 case LTU: /* CF=1 */
14335 /* Detect overflow checks. They need just the carry flag. */
14336 if (GET_CODE (op0) == PLUS
14337 && rtx_equal_p (op1, XEXP (op0, 0)))
14341 case GTU: /* CF=0 & ZF=0 */
14342 case LEU: /* CF=1 | ZF=1 */
14343 /* Detect overflow checks. They need just the carry flag. */
14344 if (GET_CODE (op0) == MINUS
14345 && rtx_equal_p (op1, XEXP (op0, 0)))
14349 /* Codes possibly doable only with sign flag when
14350 comparing against zero. */
14351 case GE: /* SF=OF or SF=0 */
14352 case LT: /* SF<>OF or SF=1 */
14353 if (op1 == const0_rtx)
14356 /* For other cases Carry flag is not required. */
14358 /* Codes doable only with sign flag when comparing
14359 against zero, but we miss jump instruction for it
14360 so we need to use relational tests against overflow
14361 that thus needs to be zero. */
14362 case GT: /* ZF=0 & SF=OF */
14363 case LE: /* ZF=1 | SF<>OF */
14364 if (op1 == const0_rtx)
14368 /* strcmp pattern do (use flags) and combine may ask us for proper
14373 gcc_unreachable ();
14377 /* Return the fixed registers used for condition codes. */
14380 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14387 /* If two condition code modes are compatible, return a condition code
14388 mode which is compatible with both. Otherwise, return
14391 static enum machine_mode
14392 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14397 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14400 if ((m1 == CCGCmode && m2 == CCGOCmode)
14401 || (m1 == CCGOCmode && m2 == CCGCmode))
14407 gcc_unreachable ();
14437 /* These are only compatible with themselves, which we already
14443 /* Split comparison code CODE into comparisons we can do using branch
14444 instructions. BYPASS_CODE is comparison code for branch that will
14445 branch around FIRST_CODE and SECOND_CODE. If some of branches
14446 is not required, set value to UNKNOWN.
14447 We never require more than two branches. */
14450 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14451 enum rtx_code *first_code,
14452 enum rtx_code *second_code)
14454 *first_code = code;
14455 *bypass_code = UNKNOWN;
14456 *second_code = UNKNOWN;
14458 /* The fcomi comparison sets flags as follows:
14468 case GT: /* GTU - CF=0 & ZF=0 */
14469 case GE: /* GEU - CF=0 */
14470 case ORDERED: /* PF=0 */
14471 case UNORDERED: /* PF=1 */
14472 case UNEQ: /* EQ - ZF=1 */
14473 case UNLT: /* LTU - CF=1 */
14474 case UNLE: /* LEU - CF=1 | ZF=1 */
14475 case LTGT: /* EQ - ZF=0 */
14477 case LT: /* LTU - CF=1 - fails on unordered */
14478 *first_code = UNLT;
14479 *bypass_code = UNORDERED;
14481 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14482 *first_code = UNLE;
14483 *bypass_code = UNORDERED;
14485 case EQ: /* EQ - ZF=1 - fails on unordered */
14486 *first_code = UNEQ;
14487 *bypass_code = UNORDERED;
14489 case NE: /* NE - ZF=0 - fails on unordered */
14490 *first_code = LTGT;
14491 *second_code = UNORDERED;
14493 case UNGE: /* GEU - CF=0 - fails on unordered */
14495 *second_code = UNORDERED;
14497 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14499 *second_code = UNORDERED;
14502 gcc_unreachable ();
14504 if (!TARGET_IEEE_FP)
14506 *second_code = UNKNOWN;
14507 *bypass_code = UNKNOWN;
14511 /* Return cost of comparison done fcom + arithmetics operations on AX.
14512 All following functions do use number of instructions as a cost metrics.
14513 In future this should be tweaked to compute bytes for optimize_size and
14514 take into account performance of various instructions on various CPUs. */
14516 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14518 if (!TARGET_IEEE_FP)
14520 /* The cost of code output by ix86_expand_fp_compare. */
14544 gcc_unreachable ();
14548 /* Return cost of comparison done using fcomi operation.
14549 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14551 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14553 enum rtx_code bypass_code, first_code, second_code;
14554 /* Return arbitrarily high cost when instruction is not supported - this
14555 prevents gcc from using it. */
14558 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14559 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14562 /* Return cost of comparison done using sahf operation.
14563 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14565 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14567 enum rtx_code bypass_code, first_code, second_code;
14568 /* Return arbitrarily high cost when instruction is not preferred - this
14569 avoids gcc from using it. */
14570 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14572 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14573 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14576 /* Compute cost of the comparison done using any method.
14577 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14579 ix86_fp_comparison_cost (enum rtx_code code)
14581 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14584 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14585 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14587 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14588 if (min > sahf_cost)
14590 if (min > fcomi_cost)
14595 /* Return true if we should use an FCOMI instruction for this
14599 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14601 enum rtx_code swapped_code = swap_condition (code);
14603 return ((ix86_fp_comparison_cost (code)
14604 == ix86_fp_comparison_fcomi_cost (code))
14605 || (ix86_fp_comparison_cost (swapped_code)
14606 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14609 /* Swap, force into registers, or otherwise massage the two operands
14610 to a fp comparison. The operands are updated in place; the new
14611 comparison code is returned. */
14613 static enum rtx_code
14614 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14616 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14617 rtx op0 = *pop0, op1 = *pop1;
14618 enum machine_mode op_mode = GET_MODE (op0);
14619 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14621 /* All of the unordered compare instructions only work on registers.
14622 The same is true of the fcomi compare instructions. The XFmode
14623 compare instructions require registers except when comparing
14624 against zero or when converting operand 1 from fixed point to
14628 && (fpcmp_mode == CCFPUmode
14629 || (op_mode == XFmode
14630 && ! (standard_80387_constant_p (op0) == 1
14631 || standard_80387_constant_p (op1) == 1)
14632 && GET_CODE (op1) != FLOAT)
14633 || ix86_use_fcomi_compare (code)))
14635 op0 = force_reg (op_mode, op0);
14636 op1 = force_reg (op_mode, op1);
14640 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14641 things around if they appear profitable, otherwise force op0
14642 into a register. */
14644 if (standard_80387_constant_p (op0) == 0
14646 && ! (standard_80387_constant_p (op1) == 0
14650 tmp = op0, op0 = op1, op1 = tmp;
14651 code = swap_condition (code);
14655 op0 = force_reg (op_mode, op0);
14657 if (CONSTANT_P (op1))
14659 int tmp = standard_80387_constant_p (op1);
14661 op1 = validize_mem (force_const_mem (op_mode, op1));
14665 op1 = force_reg (op_mode, op1);
14668 op1 = force_reg (op_mode, op1);
14672 /* Try to rearrange the comparison to make it cheaper. */
14673 if (ix86_fp_comparison_cost (code)
14674 > ix86_fp_comparison_cost (swap_condition (code))
14675 && (REG_P (op1) || can_create_pseudo_p ()))
14678 tmp = op0, op0 = op1, op1 = tmp;
14679 code = swap_condition (code);
14681 op0 = force_reg (op_mode, op0);
14689 /* Convert comparison codes we use to represent FP comparison to integer
14690 code that will result in proper branch. Return UNKNOWN if no such code
14694 ix86_fp_compare_code_to_integer (enum rtx_code code)
14723 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14726 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14727 rtx *second_test, rtx *bypass_test)
14729 enum machine_mode fpcmp_mode, intcmp_mode;
14731 int cost = ix86_fp_comparison_cost (code);
14732 enum rtx_code bypass_code, first_code, second_code;
14734 fpcmp_mode = ix86_fp_compare_mode (code);
14735 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14738 *second_test = NULL_RTX;
14740 *bypass_test = NULL_RTX;
14742 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14744 /* Do fcomi/sahf based test when profitable. */
14745 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14746 && (bypass_code == UNKNOWN || bypass_test)
14747 && (second_code == UNKNOWN || second_test))
14749 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14750 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14756 gcc_assert (TARGET_SAHF);
14759 scratch = gen_reg_rtx (HImode);
14760 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14762 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14765 /* The FP codes work out to act like unsigned. */
14766 intcmp_mode = fpcmp_mode;
14768 if (bypass_code != UNKNOWN)
14769 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14770 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14772 if (second_code != UNKNOWN)
14773 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14774 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14779 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14780 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14781 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14783 scratch = gen_reg_rtx (HImode);
14784 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14786 /* In the unordered case, we have to check C2 for NaN's, which
14787 doesn't happen to work out to anything nice combination-wise.
14788 So do some bit twiddling on the value we've got in AH to come
14789 up with an appropriate set of condition codes. */
14791 intcmp_mode = CCNOmode;
14796 if (code == GT || !TARGET_IEEE_FP)
14798 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14803 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14804 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14805 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14806 intcmp_mode = CCmode;
14812 if (code == LT && TARGET_IEEE_FP)
14814 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14815 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14816 intcmp_mode = CCmode;
14821 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14827 if (code == GE || !TARGET_IEEE_FP)
14829 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14834 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14835 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14842 if (code == LE && TARGET_IEEE_FP)
14844 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14845 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14846 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14847 intcmp_mode = CCmode;
14852 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14858 if (code == EQ && TARGET_IEEE_FP)
14860 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14861 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14862 intcmp_mode = CCmode;
14867 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14874 if (code == NE && TARGET_IEEE_FP)
14876 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14877 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14883 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14889 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14893 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14898 gcc_unreachable ();
14902 /* Return the test that should be put into the flags user, i.e.
14903 the bcc, scc, or cmov instruction. */
14904 return gen_rtx_fmt_ee (code, VOIDmode,
14905 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14910 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14913 op0 = ix86_compare_op0;
14914 op1 = ix86_compare_op1;
14917 *second_test = NULL_RTX;
14919 *bypass_test = NULL_RTX;
14921 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14922 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14924 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14926 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14927 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14928 second_test, bypass_test);
14931 ret = ix86_expand_int_compare (code, op0, op1);
14936 /* Return true if the CODE will result in nontrivial jump sequence. */
14938 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14940 enum rtx_code bypass_code, first_code, second_code;
14943 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14944 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14948 ix86_expand_branch (enum rtx_code code, rtx label)
14952 switch (GET_MODE (ix86_compare_op0))
14958 tmp = ix86_expand_compare (code, NULL, NULL);
14959 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14960 gen_rtx_LABEL_REF (VOIDmode, label),
14962 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14971 enum rtx_code bypass_code, first_code, second_code;
14973 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14974 &ix86_compare_op1);
14976 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14978 /* Check whether we will use the natural sequence with one jump. If
14979 so, we can expand jump early. Otherwise delay expansion by
14980 creating compound insn to not confuse optimizers. */
14981 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14983 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14984 gen_rtx_LABEL_REF (VOIDmode, label),
14985 pc_rtx, NULL_RTX, NULL_RTX);
14989 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14990 ix86_compare_op0, ix86_compare_op1);
14991 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14992 gen_rtx_LABEL_REF (VOIDmode, label),
14994 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14996 use_fcomi = ix86_use_fcomi_compare (code);
14997 vec = rtvec_alloc (3 + !use_fcomi);
14998 RTVEC_ELT (vec, 0) = tmp;
15000 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
15002 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
15005 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
15007 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
15016 /* Expand DImode branch into multiple compare+branch. */
15018 rtx lo[2], hi[2], label2;
15019 enum rtx_code code1, code2, code3;
15020 enum machine_mode submode;
15022 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
15024 tmp = ix86_compare_op0;
15025 ix86_compare_op0 = ix86_compare_op1;
15026 ix86_compare_op1 = tmp;
15027 code = swap_condition (code);
15029 if (GET_MODE (ix86_compare_op0) == DImode)
15031 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
15032 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
15037 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
15038 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
15042 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
15043 avoid two branches. This costs one extra insn, so disable when
15044 optimizing for size. */
15046 if ((code == EQ || code == NE)
15047 && (!optimize_insn_for_size_p ()
15048 || hi[1] == const0_rtx || lo[1] == const0_rtx))
15053 if (hi[1] != const0_rtx)
15054 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
15055 NULL_RTX, 0, OPTAB_WIDEN);
15058 if (lo[1] != const0_rtx)
15059 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
15060 NULL_RTX, 0, OPTAB_WIDEN);
15062 tmp = expand_binop (submode, ior_optab, xor1, xor0,
15063 NULL_RTX, 0, OPTAB_WIDEN);
15065 ix86_compare_op0 = tmp;
15066 ix86_compare_op1 = const0_rtx;
15067 ix86_expand_branch (code, label);
15071 /* Otherwise, if we are doing less-than or greater-or-equal-than,
15072 op1 is a constant and the low word is zero, then we can just
15073 examine the high word. Similarly for low word -1 and
15074 less-or-equal-than or greater-than. */
15076 if (CONST_INT_P (hi[1]))
15079 case LT: case LTU: case GE: case GEU:
15080 if (lo[1] == const0_rtx)
15082 ix86_compare_op0 = hi[0];
15083 ix86_compare_op1 = hi[1];
15084 ix86_expand_branch (code, label);
15088 case LE: case LEU: case GT: case GTU:
15089 if (lo[1] == constm1_rtx)
15091 ix86_compare_op0 = hi[0];
15092 ix86_compare_op1 = hi[1];
15093 ix86_expand_branch (code, label);
15101 /* Otherwise, we need two or three jumps. */
15103 label2 = gen_label_rtx ();
15106 code2 = swap_condition (code);
15107 code3 = unsigned_condition (code);
15111 case LT: case GT: case LTU: case GTU:
15114 case LE: code1 = LT; code2 = GT; break;
15115 case GE: code1 = GT; code2 = LT; break;
15116 case LEU: code1 = LTU; code2 = GTU; break;
15117 case GEU: code1 = GTU; code2 = LTU; break;
15119 case EQ: code1 = UNKNOWN; code2 = NE; break;
15120 case NE: code2 = UNKNOWN; break;
15123 gcc_unreachable ();
15128 * if (hi(a) < hi(b)) goto true;
15129 * if (hi(a) > hi(b)) goto false;
15130 * if (lo(a) < lo(b)) goto true;
15134 ix86_compare_op0 = hi[0];
15135 ix86_compare_op1 = hi[1];
15137 if (code1 != UNKNOWN)
15138 ix86_expand_branch (code1, label);
15139 if (code2 != UNKNOWN)
15140 ix86_expand_branch (code2, label2);
15142 ix86_compare_op0 = lo[0];
15143 ix86_compare_op1 = lo[1];
15144 ix86_expand_branch (code3, label);
15146 if (code2 != UNKNOWN)
15147 emit_label (label2);
15152 /* If we have already emitted a compare insn, go straight to simple.
15153 ix86_expand_compare won't emit anything if ix86_compare_emitted
15155 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
15160 /* Split branch based on floating point condition. */
15162 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
15163 rtx target1, rtx target2, rtx tmp, rtx pushed)
15165 rtx second, bypass;
15166 rtx label = NULL_RTX;
15168 int bypass_probability = -1, second_probability = -1, probability = -1;
15171 if (target2 != pc_rtx)
15174 code = reverse_condition_maybe_unordered (code);
15179 condition = ix86_expand_fp_compare (code, op1, op2,
15180 tmp, &second, &bypass);
15182 /* Remove pushed operand from stack. */
15184 ix86_free_from_memory (GET_MODE (pushed));
15186 if (split_branch_probability >= 0)
15188 /* Distribute the probabilities across the jumps.
15189 Assume the BYPASS and SECOND to be always test
15191 probability = split_branch_probability;
15193 /* Value of 1 is low enough to make no need for probability
15194 to be updated. Later we may run some experiments and see
15195 if unordered values are more frequent in practice. */
15197 bypass_probability = 1;
15199 second_probability = 1;
15201 if (bypass != NULL_RTX)
15203 label = gen_label_rtx ();
15204 i = emit_jump_insn (gen_rtx_SET
15206 gen_rtx_IF_THEN_ELSE (VOIDmode,
15208 gen_rtx_LABEL_REF (VOIDmode,
15211 if (bypass_probability >= 0)
15212 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
15214 i = emit_jump_insn (gen_rtx_SET
15216 gen_rtx_IF_THEN_ELSE (VOIDmode,
15217 condition, target1, target2)));
15218 if (probability >= 0)
15219 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
15220 if (second != NULL_RTX)
15222 i = emit_jump_insn (gen_rtx_SET
15224 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
15226 if (second_probability >= 0)
15227 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
15229 if (label != NULL_RTX)
15230 emit_label (label);
15234 ix86_expand_setcc (enum rtx_code code, rtx dest)
15236 rtx ret, tmp, tmpreg, equiv;
15237 rtx second_test, bypass_test;
15239 gcc_assert (GET_MODE (dest) == QImode);
15241 ret = ix86_expand_compare (code, &second_test, &bypass_test);
15242 PUT_MODE (ret, QImode);
15247 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
15248 if (bypass_test || second_test)
15250 rtx test = second_test;
15252 rtx tmp2 = gen_reg_rtx (QImode);
15255 gcc_assert (!second_test);
15256 test = bypass_test;
15258 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
15260 PUT_MODE (test, QImode);
15261 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
15264 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
15266 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
15269 /* Attach a REG_EQUAL note describing the comparison result. */
15270 if (ix86_compare_op0 && ix86_compare_op1)
15272 equiv = simplify_gen_relational (code, QImode,
15273 GET_MODE (ix86_compare_op0),
15274 ix86_compare_op0, ix86_compare_op1);
15275 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
15279 /* Expand comparison setting or clearing carry flag. Return true when
15280 successful and set pop for the operation. */
15282 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15284 enum machine_mode mode =
15285 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15287 /* Do not handle DImode compares that go through special path. */
15288 if (mode == (TARGET_64BIT ? TImode : DImode))
15291 if (SCALAR_FLOAT_MODE_P (mode))
15293 rtx second_test = NULL, bypass_test = NULL;
15294 rtx compare_op, compare_seq;
15296 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15298 /* Shortcut: following common codes never translate
15299 into carry flag compares. */
15300 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15301 || code == ORDERED || code == UNORDERED)
15304 /* These comparisons require zero flag; swap operands so they won't. */
15305 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15306 && !TARGET_IEEE_FP)
15311 code = swap_condition (code);
15314 /* Try to expand the comparison and verify that we end up with
15315 carry flag based comparison. This fails to be true only when
15316 we decide to expand comparison using arithmetic that is not
15317 too common scenario. */
15319 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15320 &second_test, &bypass_test);
15321 compare_seq = get_insns ();
15324 if (second_test || bypass_test)
15327 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15328 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15329 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15331 code = GET_CODE (compare_op);
15333 if (code != LTU && code != GEU)
15336 emit_insn (compare_seq);
15341 if (!INTEGRAL_MODE_P (mode))
15350 /* Convert a==0 into (unsigned)a<1. */
15353 if (op1 != const0_rtx)
15356 code = (code == EQ ? LTU : GEU);
15359 /* Convert a>b into b<a or a>=b-1. */
15362 if (CONST_INT_P (op1))
15364 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15365 /* Bail out on overflow. We still can swap operands but that
15366 would force loading of the constant into register. */
15367 if (op1 == const0_rtx
15368 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15370 code = (code == GTU ? GEU : LTU);
15377 code = (code == GTU ? LTU : GEU);
15381 /* Convert a>=0 into (unsigned)a<0x80000000. */
15384 if (mode == DImode || op1 != const0_rtx)
15386 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15387 code = (code == LT ? GEU : LTU);
15391 if (mode == DImode || op1 != constm1_rtx)
15393 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15394 code = (code == LE ? GEU : LTU);
15400 /* Swapping operands may cause constant to appear as first operand. */
15401 if (!nonimmediate_operand (op0, VOIDmode))
15403 if (!can_create_pseudo_p ())
15405 op0 = force_reg (mode, op0);
15407 ix86_compare_op0 = op0;
15408 ix86_compare_op1 = op1;
15409 *pop = ix86_expand_compare (code, NULL, NULL);
15410 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15415 ix86_expand_int_movcc (rtx operands[])
15417 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15418 rtx compare_seq, compare_op;
15419 rtx second_test, bypass_test;
15420 enum machine_mode mode = GET_MODE (operands[0]);
15421 bool sign_bit_compare_p = false;;
15424 ix86_compare_op0 = XEXP (operands[1], 0);
15425 ix86_compare_op1 = XEXP (operands[1], 1);
15426 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15427 compare_seq = get_insns ();
15430 compare_code = GET_CODE (compare_op);
15432 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15433 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15434 sign_bit_compare_p = true;
15436 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15437 HImode insns, we'd be swallowed in word prefix ops. */
15439 if ((mode != HImode || TARGET_FAST_PREFIX)
15440 && (mode != (TARGET_64BIT ? TImode : DImode))
15441 && CONST_INT_P (operands[2])
15442 && CONST_INT_P (operands[3]))
15444 rtx out = operands[0];
15445 HOST_WIDE_INT ct = INTVAL (operands[2]);
15446 HOST_WIDE_INT cf = INTVAL (operands[3]);
15447 HOST_WIDE_INT diff;
15450 /* Sign bit compares are better done using shifts than we do by using
15452 if (sign_bit_compare_p
15453 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15454 ix86_compare_op1, &compare_op))
15456 /* Detect overlap between destination and compare sources. */
15459 if (!sign_bit_compare_p)
15461 bool fpcmp = false;
15463 compare_code = GET_CODE (compare_op);
15465 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15466 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15469 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15472 /* To simplify rest of code, restrict to the GEU case. */
15473 if (compare_code == LTU)
15475 HOST_WIDE_INT tmp = ct;
15478 compare_code = reverse_condition (compare_code);
15479 code = reverse_condition (code);
15484 PUT_CODE (compare_op,
15485 reverse_condition_maybe_unordered
15486 (GET_CODE (compare_op)));
15488 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15492 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15493 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15494 tmp = gen_reg_rtx (mode);
15496 if (mode == DImode)
15497 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15499 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15503 if (code == GT || code == GE)
15504 code = reverse_condition (code);
15507 HOST_WIDE_INT tmp = ct;
15512 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15513 ix86_compare_op1, VOIDmode, 0, -1);
15526 tmp = expand_simple_binop (mode, PLUS,
15528 copy_rtx (tmp), 1, OPTAB_DIRECT);
15539 tmp = expand_simple_binop (mode, IOR,
15541 copy_rtx (tmp), 1, OPTAB_DIRECT);
15543 else if (diff == -1 && ct)
15553 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15555 tmp = expand_simple_binop (mode, PLUS,
15556 copy_rtx (tmp), GEN_INT (cf),
15557 copy_rtx (tmp), 1, OPTAB_DIRECT);
15565 * andl cf - ct, dest
15575 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15578 tmp = expand_simple_binop (mode, AND,
15580 gen_int_mode (cf - ct, mode),
15581 copy_rtx (tmp), 1, OPTAB_DIRECT);
15583 tmp = expand_simple_binop (mode, PLUS,
15584 copy_rtx (tmp), GEN_INT (ct),
15585 copy_rtx (tmp), 1, OPTAB_DIRECT);
15588 if (!rtx_equal_p (tmp, out))
15589 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15591 return 1; /* DONE */
15596 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15599 tmp = ct, ct = cf, cf = tmp;
15602 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15604 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15606 /* We may be reversing unordered compare to normal compare, that
15607 is not valid in general (we may convert non-trapping condition
15608 to trapping one), however on i386 we currently emit all
15609 comparisons unordered. */
15610 compare_code = reverse_condition_maybe_unordered (compare_code);
15611 code = reverse_condition_maybe_unordered (code);
15615 compare_code = reverse_condition (compare_code);
15616 code = reverse_condition (code);
15620 compare_code = UNKNOWN;
15621 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15622 && CONST_INT_P (ix86_compare_op1))
15624 if (ix86_compare_op1 == const0_rtx
15625 && (code == LT || code == GE))
15626 compare_code = code;
15627 else if (ix86_compare_op1 == constm1_rtx)
15631 else if (code == GT)
15636 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15637 if (compare_code != UNKNOWN
15638 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15639 && (cf == -1 || ct == -1))
15641 /* If lea code below could be used, only optimize
15642 if it results in a 2 insn sequence. */
15644 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15645 || diff == 3 || diff == 5 || diff == 9)
15646 || (compare_code == LT && ct == -1)
15647 || (compare_code == GE && cf == -1))
15650 * notl op1 (if necessary)
15658 code = reverse_condition (code);
15661 out = emit_store_flag (out, code, ix86_compare_op0,
15662 ix86_compare_op1, VOIDmode, 0, -1);
15664 out = expand_simple_binop (mode, IOR,
15666 out, 1, OPTAB_DIRECT);
15667 if (out != operands[0])
15668 emit_move_insn (operands[0], out);
15670 return 1; /* DONE */
15675 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15676 || diff == 3 || diff == 5 || diff == 9)
15677 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15679 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15685 * lea cf(dest*(ct-cf)),dest
15689 * This also catches the degenerate setcc-only case.
15695 out = emit_store_flag (out, code, ix86_compare_op0,
15696 ix86_compare_op1, VOIDmode, 0, 1);
15699 /* On x86_64 the lea instruction operates on Pmode, so we need
15700 to get arithmetics done in proper mode to match. */
15702 tmp = copy_rtx (out);
15706 out1 = copy_rtx (out);
15707 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15711 tmp = gen_rtx_PLUS (mode, tmp, out1);
15717 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15720 if (!rtx_equal_p (tmp, out))
15723 out = force_operand (tmp, copy_rtx (out));
15725 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15727 if (!rtx_equal_p (out, operands[0]))
15728 emit_move_insn (operands[0], copy_rtx (out));
15730 return 1; /* DONE */
15734 * General case: Jumpful:
15735 * xorl dest,dest cmpl op1, op2
15736 * cmpl op1, op2 movl ct, dest
15737 * setcc dest jcc 1f
15738 * decl dest movl cf, dest
15739 * andl (cf-ct),dest 1:
15742 * Size 20. Size 14.
15744 * This is reasonably steep, but branch mispredict costs are
15745 * high on modern cpus, so consider failing only if optimizing
15749 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15750 && BRANCH_COST (optimize_insn_for_speed_p (),
15755 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15760 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15762 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15764 /* We may be reversing unordered compare to normal compare,
15765 that is not valid in general (we may convert non-trapping
15766 condition to trapping one), however on i386 we currently
15767 emit all comparisons unordered. */
15768 code = reverse_condition_maybe_unordered (code);
15772 code = reverse_condition (code);
15773 if (compare_code != UNKNOWN)
15774 compare_code = reverse_condition (compare_code);
15778 if (compare_code != UNKNOWN)
15780 /* notl op1 (if needed)
15785 For x < 0 (resp. x <= -1) there will be no notl,
15786 so if possible swap the constants to get rid of the
15788 True/false will be -1/0 while code below (store flag
15789 followed by decrement) is 0/-1, so the constants need
15790 to be exchanged once more. */
15792 if (compare_code == GE || !cf)
15794 code = reverse_condition (code);
15799 HOST_WIDE_INT tmp = cf;
15804 out = emit_store_flag (out, code, ix86_compare_op0,
15805 ix86_compare_op1, VOIDmode, 0, -1);
15809 out = emit_store_flag (out, code, ix86_compare_op0,
15810 ix86_compare_op1, VOIDmode, 0, 1);
15812 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15813 copy_rtx (out), 1, OPTAB_DIRECT);
15816 out = expand_simple_binop (mode, AND, copy_rtx (out),
15817 gen_int_mode (cf - ct, mode),
15818 copy_rtx (out), 1, OPTAB_DIRECT);
15820 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15821 copy_rtx (out), 1, OPTAB_DIRECT);
15822 if (!rtx_equal_p (out, operands[0]))
15823 emit_move_insn (operands[0], copy_rtx (out));
15825 return 1; /* DONE */
15829 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15831 /* Try a few things more with specific constants and a variable. */
15834 rtx var, orig_out, out, tmp;
15836 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15837 return 0; /* FAIL */
15839 /* If one of the two operands is an interesting constant, load a
15840 constant with the above and mask it in with a logical operation. */
15842 if (CONST_INT_P (operands[2]))
15845 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15846 operands[3] = constm1_rtx, op = and_optab;
15847 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15848 operands[3] = const0_rtx, op = ior_optab;
15850 return 0; /* FAIL */
15852 else if (CONST_INT_P (operands[3]))
15855 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15856 operands[2] = constm1_rtx, op = and_optab;
15857 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15858 operands[2] = const0_rtx, op = ior_optab;
15860 return 0; /* FAIL */
15863 return 0; /* FAIL */
15865 orig_out = operands[0];
15866 tmp = gen_reg_rtx (mode);
15869 /* Recurse to get the constant loaded. */
15870 if (ix86_expand_int_movcc (operands) == 0)
15871 return 0; /* FAIL */
15873 /* Mask in the interesting variable. */
15874 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15876 if (!rtx_equal_p (out, orig_out))
15877 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15879 return 1; /* DONE */
15883 * For comparison with above,
15893 if (! nonimmediate_operand (operands[2], mode))
15894 operands[2] = force_reg (mode, operands[2]);
15895 if (! nonimmediate_operand (operands[3], mode))
15896 operands[3] = force_reg (mode, operands[3]);
15898 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15900 rtx tmp = gen_reg_rtx (mode);
15901 emit_move_insn (tmp, operands[3]);
15904 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15906 rtx tmp = gen_reg_rtx (mode);
15907 emit_move_insn (tmp, operands[2]);
15911 if (! register_operand (operands[2], VOIDmode)
15913 || ! register_operand (operands[3], VOIDmode)))
15914 operands[2] = force_reg (mode, operands[2]);
15917 && ! register_operand (operands[3], VOIDmode))
15918 operands[3] = force_reg (mode, operands[3]);
15920 emit_insn (compare_seq);
15921 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15922 gen_rtx_IF_THEN_ELSE (mode,
15923 compare_op, operands[2],
15926 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15927 gen_rtx_IF_THEN_ELSE (mode,
15929 copy_rtx (operands[3]),
15930 copy_rtx (operands[0]))));
15932 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15933 gen_rtx_IF_THEN_ELSE (mode,
15935 copy_rtx (operands[2]),
15936 copy_rtx (operands[0]))));
15938 return 1; /* DONE */
15941 /* Swap, force into registers, or otherwise massage the two operands
15942 to an sse comparison with a mask result. Thus we differ a bit from
15943 ix86_prepare_fp_compare_args which expects to produce a flags result.
15945 The DEST operand exists to help determine whether to commute commutative
15946 operators. The POP0/POP1 operands are updated in place. The new
15947 comparison code is returned, or UNKNOWN if not implementable. */
15949 static enum rtx_code
15950 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15951 rtx *pop0, rtx *pop1)
15959 /* We have no LTGT as an operator. We could implement it with
15960 NE & ORDERED, but this requires an extra temporary. It's
15961 not clear that it's worth it. */
15968 /* These are supported directly. */
15975 /* For commutative operators, try to canonicalize the destination
15976 operand to be first in the comparison - this helps reload to
15977 avoid extra moves. */
15978 if (!dest || !rtx_equal_p (dest, *pop1))
15986 /* These are not supported directly. Swap the comparison operands
15987 to transform into something that is supported. */
15991 code = swap_condition (code);
15995 gcc_unreachable ();
16001 /* Detect conditional moves that exactly match min/max operational
16002 semantics. Note that this is IEEE safe, as long as we don't
16003 interchange the operands.
16005 Returns FALSE if this conditional move doesn't match a MIN/MAX,
16006 and TRUE if the operation is successful and instructions are emitted. */
16009 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
16010 rtx cmp_op1, rtx if_true, rtx if_false)
16012 enum machine_mode mode;
16018 else if (code == UNGE)
16021 if_true = if_false;
16027 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
16029 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
16034 mode = GET_MODE (dest);
16036 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
16037 but MODE may be a vector mode and thus not appropriate. */
16038 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
16040 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
16043 if_true = force_reg (mode, if_true);
16044 v = gen_rtvec (2, if_true, if_false);
16045 tmp = gen_rtx_UNSPEC (mode, v, u);
16049 code = is_min ? SMIN : SMAX;
16050 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
16053 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
16057 /* Expand an sse vector comparison. Return the register with the result. */
16060 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
16061 rtx op_true, rtx op_false)
16063 enum machine_mode mode = GET_MODE (dest);
16066 cmp_op0 = force_reg (mode, cmp_op0);
16067 if (!nonimmediate_operand (cmp_op1, mode))
16068 cmp_op1 = force_reg (mode, cmp_op1);
16071 || reg_overlap_mentioned_p (dest, op_true)
16072 || reg_overlap_mentioned_p (dest, op_false))
16073 dest = gen_reg_rtx (mode);
16075 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
16076 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16081 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
16082 operations. This is used for both scalar and vector conditional moves. */
16085 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
16087 enum machine_mode mode = GET_MODE (dest);
16090 if (op_false == CONST0_RTX (mode))
16092 op_true = force_reg (mode, op_true);
16093 x = gen_rtx_AND (mode, cmp, op_true);
16094 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16096 else if (op_true == CONST0_RTX (mode))
16098 op_false = force_reg (mode, op_false);
16099 x = gen_rtx_NOT (mode, cmp);
16100 x = gen_rtx_AND (mode, x, op_false);
16101 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16103 else if (TARGET_SSE5)
16105 rtx pcmov = gen_rtx_SET (mode, dest,
16106 gen_rtx_IF_THEN_ELSE (mode, cmp,
16113 op_true = force_reg (mode, op_true);
16114 op_false = force_reg (mode, op_false);
16116 t2 = gen_reg_rtx (mode);
16118 t3 = gen_reg_rtx (mode);
16122 x = gen_rtx_AND (mode, op_true, cmp);
16123 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
16125 x = gen_rtx_NOT (mode, cmp);
16126 x = gen_rtx_AND (mode, x, op_false);
16127 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
16129 x = gen_rtx_IOR (mode, t3, t2);
16130 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16134 /* Expand a floating-point conditional move. Return true if successful. */
16137 ix86_expand_fp_movcc (rtx operands[])
16139 enum machine_mode mode = GET_MODE (operands[0]);
16140 enum rtx_code code = GET_CODE (operands[1]);
16141 rtx tmp, compare_op, second_test, bypass_test;
16143 ix86_compare_op0 = XEXP (operands[1], 0);
16144 ix86_compare_op1 = XEXP (operands[1], 1);
16145 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16147 enum machine_mode cmode;
16149 /* Since we've no cmove for sse registers, don't force bad register
16150 allocation just to gain access to it. Deny movcc when the
16151 comparison mode doesn't match the move mode. */
16152 cmode = GET_MODE (ix86_compare_op0);
16153 if (cmode == VOIDmode)
16154 cmode = GET_MODE (ix86_compare_op1);
16158 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16160 &ix86_compare_op1);
16161 if (code == UNKNOWN)
16164 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
16165 ix86_compare_op1, operands[2],
16169 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
16170 ix86_compare_op1, operands[2], operands[3]);
16171 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
16175 /* The floating point conditional move instructions don't directly
16176 support conditions resulting from a signed integer comparison. */
16178 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
16180 /* The floating point conditional move instructions don't directly
16181 support signed integer comparisons. */
16183 if (!fcmov_comparison_operator (compare_op, VOIDmode))
16185 gcc_assert (!second_test && !bypass_test);
16186 tmp = gen_reg_rtx (QImode);
16187 ix86_expand_setcc (code, tmp);
16189 ix86_compare_op0 = tmp;
16190 ix86_compare_op1 = const0_rtx;
16191 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
16193 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
16195 tmp = gen_reg_rtx (mode);
16196 emit_move_insn (tmp, operands[3]);
16199 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
16201 tmp = gen_reg_rtx (mode);
16202 emit_move_insn (tmp, operands[2]);
16206 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16207 gen_rtx_IF_THEN_ELSE (mode, compare_op,
16208 operands[2], operands[3])));
16210 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16211 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
16212 operands[3], operands[0])));
16214 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16215 gen_rtx_IF_THEN_ELSE (mode, second_test,
16216 operands[2], operands[0])));
16221 /* Expand a floating-point vector conditional move; a vcond operation
16222 rather than a movcc operation. */
16225 ix86_expand_fp_vcond (rtx operands[])
16227 enum rtx_code code = GET_CODE (operands[3]);
16230 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16231 &operands[4], &operands[5]);
16232 if (code == UNKNOWN)
16235 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
16236 operands[5], operands[1], operands[2]))
16239 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
16240 operands[1], operands[2]);
16241 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
16245 /* Expand a signed/unsigned integral vector conditional move. */
16248 ix86_expand_int_vcond (rtx operands[])
16250 enum machine_mode mode = GET_MODE (operands[0]);
16251 enum rtx_code code = GET_CODE (operands[3]);
16252 bool negate = false;
16255 cop0 = operands[4];
16256 cop1 = operands[5];
16258 /* SSE5 supports all of the comparisons on all vector int types. */
16261 /* Canonicalize the comparison to EQ, GT, GTU. */
16272 code = reverse_condition (code);
16278 code = reverse_condition (code);
16284 code = swap_condition (code);
16285 x = cop0, cop0 = cop1, cop1 = x;
16289 gcc_unreachable ();
16292 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16293 if (mode == V2DImode)
16298 /* SSE4.1 supports EQ. */
16299 if (!TARGET_SSE4_1)
16305 /* SSE4.2 supports GT/GTU. */
16306 if (!TARGET_SSE4_2)
16311 gcc_unreachable ();
16315 /* Unsigned parallel compare is not supported by the hardware. Play some
16316 tricks to turn this into a signed comparison against 0. */
16319 cop0 = force_reg (mode, cop0);
16328 /* Perform a parallel modulo subtraction. */
16329 t1 = gen_reg_rtx (mode);
16330 emit_insn ((mode == V4SImode
16332 : gen_subv2di3) (t1, cop0, cop1));
16334 /* Extract the original sign bit of op0. */
16335 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16337 t2 = gen_reg_rtx (mode);
16338 emit_insn ((mode == V4SImode
16340 : gen_andv2di3) (t2, cop0, mask));
16342 /* XOR it back into the result of the subtraction. This results
16343 in the sign bit set iff we saw unsigned underflow. */
16344 x = gen_reg_rtx (mode);
16345 emit_insn ((mode == V4SImode
16347 : gen_xorv2di3) (x, t1, t2));
16355 /* Perform a parallel unsigned saturating subtraction. */
16356 x = gen_reg_rtx (mode);
16357 emit_insn (gen_rtx_SET (VOIDmode, x,
16358 gen_rtx_US_MINUS (mode, cop0, cop1)));
16365 gcc_unreachable ();
16369 cop1 = CONST0_RTX (mode);
16373 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16374 operands[1+negate], operands[2-negate]);
16376 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16377 operands[2-negate]);
16381 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16382 true if we should do zero extension, else sign extension. HIGH_P is
16383 true if we want the N/2 high elements, else the low elements. */
16386 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16388 enum machine_mode imode = GET_MODE (operands[1]);
16389 rtx (*unpack)(rtx, rtx, rtx);
16396 unpack = gen_vec_interleave_highv16qi;
16398 unpack = gen_vec_interleave_lowv16qi;
16402 unpack = gen_vec_interleave_highv8hi;
16404 unpack = gen_vec_interleave_lowv8hi;
16408 unpack = gen_vec_interleave_highv4si;
16410 unpack = gen_vec_interleave_lowv4si;
16413 gcc_unreachable ();
16416 dest = gen_lowpart (imode, operands[0]);
16419 se = force_reg (imode, CONST0_RTX (imode));
16421 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16422 operands[1], pc_rtx, pc_rtx);
16424 emit_insn (unpack (dest, operands[1], se));
16427 /* This function performs the same task as ix86_expand_sse_unpack,
16428 but with SSE4.1 instructions. */
16431 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16433 enum machine_mode imode = GET_MODE (operands[1]);
16434 rtx (*unpack)(rtx, rtx);
16441 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16443 unpack = gen_sse4_1_extendv8qiv8hi2;
16447 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16449 unpack = gen_sse4_1_extendv4hiv4si2;
16453 unpack = gen_sse4_1_zero_extendv2siv2di2;
16455 unpack = gen_sse4_1_extendv2siv2di2;
16458 gcc_unreachable ();
16461 dest = operands[0];
16464 /* Shift higher 8 bytes to lower 8 bytes. */
16465 src = gen_reg_rtx (imode);
16466 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16467 gen_lowpart (TImode, operands[1]),
16473 emit_insn (unpack (dest, src));
16476 /* This function performs the same task as ix86_expand_sse_unpack,
16477 but with sse5 instructions. */
16480 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16482 enum machine_mode imode = GET_MODE (operands[1]);
16483 int pperm_bytes[16];
16485 int h = (high_p) ? 8 : 0;
16488 rtvec v = rtvec_alloc (16);
16491 rtx op0 = operands[0], op1 = operands[1];
16496 vs = rtvec_alloc (8);
16497 h2 = (high_p) ? 8 : 0;
16498 for (i = 0; i < 8; i++)
16500 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16501 pperm_bytes[2*i+1] = ((unsigned_p)
16503 : PPERM_SIGN | PPERM_SRC2 | i | h);
16506 for (i = 0; i < 16; i++)
16507 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16509 for (i = 0; i < 8; i++)
16510 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16512 p = gen_rtx_PARALLEL (VOIDmode, vs);
16513 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16515 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16517 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16521 vs = rtvec_alloc (4);
16522 h2 = (high_p) ? 4 : 0;
16523 for (i = 0; i < 4; i++)
16525 sign_extend = ((unsigned_p)
16527 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16528 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16529 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16530 pperm_bytes[4*i+2] = sign_extend;
16531 pperm_bytes[4*i+3] = sign_extend;
16534 for (i = 0; i < 16; i++)
16535 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16537 for (i = 0; i < 4; i++)
16538 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16540 p = gen_rtx_PARALLEL (VOIDmode, vs);
16541 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16543 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16545 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16549 vs = rtvec_alloc (2);
16550 h2 = (high_p) ? 2 : 0;
16551 for (i = 0; i < 2; i++)
16553 sign_extend = ((unsigned_p)
16555 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16556 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16557 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16558 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16559 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16560 pperm_bytes[8*i+4] = sign_extend;
16561 pperm_bytes[8*i+5] = sign_extend;
16562 pperm_bytes[8*i+6] = sign_extend;
16563 pperm_bytes[8*i+7] = sign_extend;
16566 for (i = 0; i < 16; i++)
16567 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16569 for (i = 0; i < 2; i++)
16570 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16572 p = gen_rtx_PARALLEL (VOIDmode, vs);
16573 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16575 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16577 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16581 gcc_unreachable ();
16587 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16588 next narrower integer vector type */
16590 ix86_expand_sse5_pack (rtx operands[3])
16592 enum machine_mode imode = GET_MODE (operands[0]);
16593 int pperm_bytes[16];
16595 rtvec v = rtvec_alloc (16);
16597 rtx op0 = operands[0];
16598 rtx op1 = operands[1];
16599 rtx op2 = operands[2];
16604 for (i = 0; i < 8; i++)
16606 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16607 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16610 for (i = 0; i < 16; i++)
16611 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16613 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16614 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16618 for (i = 0; i < 4; i++)
16620 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16621 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16622 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16623 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16626 for (i = 0; i < 16; i++)
16627 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16629 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16630 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16634 for (i = 0; i < 2; i++)
16636 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16637 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16638 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16639 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16640 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16641 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16642 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16643 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16646 for (i = 0; i < 16; i++)
16647 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16649 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16650 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16654 gcc_unreachable ();
16660 /* Expand conditional increment or decrement using adb/sbb instructions.
16661 The default case using setcc followed by the conditional move can be
16662 done by generic code. */
16664 ix86_expand_int_addcc (rtx operands[])
16666 enum rtx_code code = GET_CODE (operands[1]);
16668 rtx val = const0_rtx;
16669 bool fpcmp = false;
16670 enum machine_mode mode = GET_MODE (operands[0]);
16672 ix86_compare_op0 = XEXP (operands[1], 0);
16673 ix86_compare_op1 = XEXP (operands[1], 1);
16674 if (operands[3] != const1_rtx
16675 && operands[3] != constm1_rtx)
16677 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16678 ix86_compare_op1, &compare_op))
16680 code = GET_CODE (compare_op);
16682 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16683 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16686 code = ix86_fp_compare_code_to_integer (code);
16693 PUT_CODE (compare_op,
16694 reverse_condition_maybe_unordered
16695 (GET_CODE (compare_op)));
16697 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16699 PUT_MODE (compare_op, mode);
16701 /* Construct either adc or sbb insn. */
16702 if ((code == LTU) == (operands[3] == constm1_rtx))
16704 switch (GET_MODE (operands[0]))
16707 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16710 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16713 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16716 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16719 gcc_unreachable ();
16724 switch (GET_MODE (operands[0]))
16727 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16730 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16733 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16736 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16739 gcc_unreachable ();
16742 return 1; /* DONE */
16746 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16747 works for floating pointer parameters and nonoffsetable memories.
16748 For pushes, it returns just stack offsets; the values will be saved
16749 in the right order. Maximally three parts are generated. */
16752 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16757 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16759 size = (GET_MODE_SIZE (mode) + 4) / 8;
16761 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16762 gcc_assert (size >= 2 && size <= 4);
16764 /* Optimize constant pool reference to immediates. This is used by fp
16765 moves, that force all constants to memory to allow combining. */
16766 if (MEM_P (operand) && MEM_READONLY_P (operand))
16768 rtx tmp = maybe_get_pool_constant (operand);
16773 if (MEM_P (operand) && !offsettable_memref_p (operand))
16775 /* The only non-offsetable memories we handle are pushes. */
16776 int ok = push_operand (operand, VOIDmode);
16780 operand = copy_rtx (operand);
16781 PUT_MODE (operand, Pmode);
16782 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16786 if (GET_CODE (operand) == CONST_VECTOR)
16788 enum machine_mode imode = int_mode_for_mode (mode);
16789 /* Caution: if we looked through a constant pool memory above,
16790 the operand may actually have a different mode now. That's
16791 ok, since we want to pun this all the way back to an integer. */
16792 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16793 gcc_assert (operand != NULL);
16799 if (mode == DImode)
16800 split_di (&operand, 1, &parts[0], &parts[1]);
16805 if (REG_P (operand))
16807 gcc_assert (reload_completed);
16808 for (i = 0; i < size; i++)
16809 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16811 else if (offsettable_memref_p (operand))
16813 operand = adjust_address (operand, SImode, 0);
16814 parts[0] = operand;
16815 for (i = 1; i < size; i++)
16816 parts[i] = adjust_address (operand, SImode, 4 * i);
16818 else if (GET_CODE (operand) == CONST_DOUBLE)
16823 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16827 real_to_target (l, &r, mode);
16828 parts[3] = gen_int_mode (l[3], SImode);
16829 parts[2] = gen_int_mode (l[2], SImode);
16832 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16833 parts[2] = gen_int_mode (l[2], SImode);
16836 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16839 gcc_unreachable ();
16841 parts[1] = gen_int_mode (l[1], SImode);
16842 parts[0] = gen_int_mode (l[0], SImode);
16845 gcc_unreachable ();
16850 if (mode == TImode)
16851 split_ti (&operand, 1, &parts[0], &parts[1]);
16852 if (mode == XFmode || mode == TFmode)
16854 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16855 if (REG_P (operand))
16857 gcc_assert (reload_completed);
16858 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16859 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16861 else if (offsettable_memref_p (operand))
16863 operand = adjust_address (operand, DImode, 0);
16864 parts[0] = operand;
16865 parts[1] = adjust_address (operand, upper_mode, 8);
16867 else if (GET_CODE (operand) == CONST_DOUBLE)
16872 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16873 real_to_target (l, &r, mode);
16875 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16876 if (HOST_BITS_PER_WIDE_INT >= 64)
16879 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16880 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16883 parts[0] = immed_double_const (l[0], l[1], DImode);
16885 if (upper_mode == SImode)
16886 parts[1] = gen_int_mode (l[2], SImode);
16887 else if (HOST_BITS_PER_WIDE_INT >= 64)
16890 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16891 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16894 parts[1] = immed_double_const (l[2], l[3], DImode);
16897 gcc_unreachable ();
16904 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16905 Return false when normal moves are needed; true when all required
16906 insns have been emitted. Operands 2-4 contain the input values
16907 int the correct order; operands 5-7 contain the output values. */
16910 ix86_split_long_move (rtx operands[])
16915 int collisions = 0;
16916 enum machine_mode mode = GET_MODE (operands[0]);
16917 bool collisionparts[4];
16919 /* The DFmode expanders may ask us to move double.
16920 For 64bit target this is single move. By hiding the fact
16921 here we simplify i386.md splitters. */
16922 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16924 /* Optimize constant pool reference to immediates. This is used by
16925 fp moves, that force all constants to memory to allow combining. */
16927 if (MEM_P (operands[1])
16928 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16929 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16930 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16931 if (push_operand (operands[0], VOIDmode))
16933 operands[0] = copy_rtx (operands[0]);
16934 PUT_MODE (operands[0], Pmode);
16937 operands[0] = gen_lowpart (DImode, operands[0]);
16938 operands[1] = gen_lowpart (DImode, operands[1]);
16939 emit_move_insn (operands[0], operands[1]);
16943 /* The only non-offsettable memory we handle is push. */
16944 if (push_operand (operands[0], VOIDmode))
16947 gcc_assert (!MEM_P (operands[0])
16948 || offsettable_memref_p (operands[0]));
16950 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16951 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16953 /* When emitting push, take care for source operands on the stack. */
16954 if (push && MEM_P (operands[1])
16955 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16956 for (i = 0; i < nparts - 1; i++)
16957 part[1][i] = change_address (part[1][i],
16958 GET_MODE (part[1][i]),
16959 XEXP (part[1][i + 1], 0));
16961 /* We need to do copy in the right order in case an address register
16962 of the source overlaps the destination. */
16963 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16967 for (i = 0; i < nparts; i++)
16970 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16971 if (collisionparts[i])
16975 /* Collision in the middle part can be handled by reordering. */
16976 if (collisions == 1 && nparts == 3 && collisionparts [1])
16978 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16979 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16981 else if (collisions == 1
16983 && (collisionparts [1] || collisionparts [2]))
16985 if (collisionparts [1])
16987 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16988 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16992 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16993 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16997 /* If there are more collisions, we can't handle it by reordering.
16998 Do an lea to the last part and use only one colliding move. */
16999 else if (collisions > 1)
17005 base = part[0][nparts - 1];
17007 /* Handle the case when the last part isn't valid for lea.
17008 Happens in 64-bit mode storing the 12-byte XFmode. */
17009 if (GET_MODE (base) != Pmode)
17010 base = gen_rtx_REG (Pmode, REGNO (base));
17012 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17013 part[1][0] = replace_equiv_address (part[1][0], base);
17014 for (i = 1; i < nparts; i++)
17016 tmp = plus_constant (base, UNITS_PER_WORD * i);
17017 part[1][i] = replace_equiv_address (part[1][i], tmp);
17028 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17029 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
17030 emit_move_insn (part[0][2], part[1][2]);
17032 else if (nparts == 4)
17034 emit_move_insn (part[0][3], part[1][3]);
17035 emit_move_insn (part[0][2], part[1][2]);
17040 /* In 64bit mode we don't have 32bit push available. In case this is
17041 register, it is OK - we will just use larger counterpart. We also
17042 retype memory - these comes from attempt to avoid REX prefix on
17043 moving of second half of TFmode value. */
17044 if (GET_MODE (part[1][1]) == SImode)
17046 switch (GET_CODE (part[1][1]))
17049 part[1][1] = adjust_address (part[1][1], DImode, 0);
17053 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
17057 gcc_unreachable ();
17060 if (GET_MODE (part[1][0]) == SImode)
17061 part[1][0] = part[1][1];
17064 emit_move_insn (part[0][1], part[1][1]);
17065 emit_move_insn (part[0][0], part[1][0]);
17069 /* Choose correct order to not overwrite the source before it is copied. */
17070 if ((REG_P (part[0][0])
17071 && REG_P (part[1][1])
17072 && (REGNO (part[0][0]) == REGNO (part[1][1])
17074 && REGNO (part[0][0]) == REGNO (part[1][2]))
17076 && REGNO (part[0][0]) == REGNO (part[1][3]))))
17078 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
17080 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
17082 operands[2 + i] = part[0][j];
17083 operands[6 + i] = part[1][j];
17088 for (i = 0; i < nparts; i++)
17090 operands[2 + i] = part[0][i];
17091 operands[6 + i] = part[1][i];
17095 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17096 if (optimize_insn_for_size_p ())
17098 for (j = 0; j < nparts - 1; j++)
17099 if (CONST_INT_P (operands[6 + j])
17100 && operands[6 + j] != const0_rtx
17101 && REG_P (operands[2 + j]))
17102 for (i = j; i < nparts - 1; i++)
17103 if (CONST_INT_P (operands[7 + i])
17104 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17105 operands[7 + i] = operands[2 + j];
17108 for (i = 0; i < nparts; i++)
17109 emit_move_insn (operands[2 + i], operands[6 + i]);
17114 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17115 left shift by a constant, either using a single shift or
17116 a sequence of add instructions. */
17119 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17123 emit_insn ((mode == DImode
17125 : gen_adddi3) (operand, operand, operand));
17127 else if (!optimize_insn_for_size_p ()
17128 && count * ix86_cost->add <= ix86_cost->shift_const)
17131 for (i=0; i<count; i++)
17133 emit_insn ((mode == DImode
17135 : gen_adddi3) (operand, operand, operand));
17139 emit_insn ((mode == DImode
17141 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17145 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17147 rtx low[2], high[2];
17149 const int single_width = mode == DImode ? 32 : 64;
17151 if (CONST_INT_P (operands[2]))
17153 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17154 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17156 if (count >= single_width)
17158 emit_move_insn (high[0], low[1]);
17159 emit_move_insn (low[0], const0_rtx);
17161 if (count > single_width)
17162 ix86_expand_ashl_const (high[0], count - single_width, mode);
17166 if (!rtx_equal_p (operands[0], operands[1]))
17167 emit_move_insn (operands[0], operands[1]);
17168 emit_insn ((mode == DImode
17170 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17171 ix86_expand_ashl_const (low[0], count, mode);
17176 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17178 if (operands[1] == const1_rtx)
17180 /* Assuming we've chosen a QImode capable registers, then 1 << N
17181 can be done with two 32/64-bit shifts, no branches, no cmoves. */
17182 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17184 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17186 ix86_expand_clear (low[0]);
17187 ix86_expand_clear (high[0]);
17188 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17190 d = gen_lowpart (QImode, low[0]);
17191 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17192 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17193 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17195 d = gen_lowpart (QImode, high[0]);
17196 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17197 s = gen_rtx_NE (QImode, flags, const0_rtx);
17198 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17201 /* Otherwise, we can get the same results by manually performing
17202 a bit extract operation on bit 5/6, and then performing the two
17203 shifts. The two methods of getting 0/1 into low/high are exactly
17204 the same size. Avoiding the shift in the bit extract case helps
17205 pentium4 a bit; no one else seems to care much either way. */
17210 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17211 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17213 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17214 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17216 emit_insn ((mode == DImode
17218 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
17219 emit_insn ((mode == DImode
17221 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
17222 emit_move_insn (low[0], high[0]);
17223 emit_insn ((mode == DImode
17225 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
17228 emit_insn ((mode == DImode
17230 : gen_ashldi3) (low[0], low[0], operands[2]));
17231 emit_insn ((mode == DImode
17233 : gen_ashldi3) (high[0], high[0], operands[2]));
17237 if (operands[1] == constm1_rtx)
17239 /* For -1 << N, we can avoid the shld instruction, because we
17240 know that we're shifting 0...31/63 ones into a -1. */
17241 emit_move_insn (low[0], constm1_rtx);
17242 if (optimize_insn_for_size_p ())
17243 emit_move_insn (high[0], low[0]);
17245 emit_move_insn (high[0], constm1_rtx);
17249 if (!rtx_equal_p (operands[0], operands[1]))
17250 emit_move_insn (operands[0], operands[1]);
17252 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17253 emit_insn ((mode == DImode
17255 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17258 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
17260 if (TARGET_CMOVE && scratch)
17262 ix86_expand_clear (scratch);
17263 emit_insn ((mode == DImode
17264 ? gen_x86_shift_adj_1
17265 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17269 emit_insn ((mode == DImode
17270 ? gen_x86_shift_adj_2
17271 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17275 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17277 rtx low[2], high[2];
17279 const int single_width = mode == DImode ? 32 : 64;
17281 if (CONST_INT_P (operands[2]))
17283 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17284 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17286 if (count == single_width * 2 - 1)
17288 emit_move_insn (high[0], high[1]);
17289 emit_insn ((mode == DImode
17291 : gen_ashrdi3) (high[0], high[0],
17292 GEN_INT (single_width - 1)));
17293 emit_move_insn (low[0], high[0]);
17296 else if (count >= single_width)
17298 emit_move_insn (low[0], high[1]);
17299 emit_move_insn (high[0], low[0]);
17300 emit_insn ((mode == DImode
17302 : gen_ashrdi3) (high[0], high[0],
17303 GEN_INT (single_width - 1)));
17304 if (count > single_width)
17305 emit_insn ((mode == DImode
17307 : gen_ashrdi3) (low[0], low[0],
17308 GEN_INT (count - single_width)));
17312 if (!rtx_equal_p (operands[0], operands[1]))
17313 emit_move_insn (operands[0], operands[1]);
17314 emit_insn ((mode == DImode
17316 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17317 emit_insn ((mode == DImode
17319 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17324 if (!rtx_equal_p (operands[0], operands[1]))
17325 emit_move_insn (operands[0], operands[1]);
17327 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17329 emit_insn ((mode == DImode
17331 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17332 emit_insn ((mode == DImode
17334 : gen_ashrdi3) (high[0], high[0], operands[2]));
17336 if (TARGET_CMOVE && scratch)
17338 emit_move_insn (scratch, high[0]);
17339 emit_insn ((mode == DImode
17341 : gen_ashrdi3) (scratch, scratch,
17342 GEN_INT (single_width - 1)));
17343 emit_insn ((mode == DImode
17344 ? gen_x86_shift_adj_1
17345 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17349 emit_insn ((mode == DImode
17350 ? gen_x86_shift_adj_3
17351 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17356 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17358 rtx low[2], high[2];
17360 const int single_width = mode == DImode ? 32 : 64;
17362 if (CONST_INT_P (operands[2]))
17364 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17365 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17367 if (count >= single_width)
17369 emit_move_insn (low[0], high[1]);
17370 ix86_expand_clear (high[0]);
17372 if (count > single_width)
17373 emit_insn ((mode == DImode
17375 : gen_lshrdi3) (low[0], low[0],
17376 GEN_INT (count - single_width)));
17380 if (!rtx_equal_p (operands[0], operands[1]))
17381 emit_move_insn (operands[0], operands[1]);
17382 emit_insn ((mode == DImode
17384 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17385 emit_insn ((mode == DImode
17387 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17392 if (!rtx_equal_p (operands[0], operands[1]))
17393 emit_move_insn (operands[0], operands[1]);
17395 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17397 emit_insn ((mode == DImode
17399 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17400 emit_insn ((mode == DImode
17402 : gen_lshrdi3) (high[0], high[0], operands[2]));
17404 /* Heh. By reversing the arguments, we can reuse this pattern. */
17405 if (TARGET_CMOVE && scratch)
17407 ix86_expand_clear (scratch);
17408 emit_insn ((mode == DImode
17409 ? gen_x86_shift_adj_1
17410 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17414 emit_insn ((mode == DImode
17415 ? gen_x86_shift_adj_2
17416 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17420 /* Predict just emitted jump instruction to be taken with probability PROB. */
17422 predict_jump (int prob)
17424 rtx insn = get_last_insn ();
17425 gcc_assert (JUMP_P (insn));
17426 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17429 /* Helper function for the string operations below. Dest VARIABLE whether
17430 it is aligned to VALUE bytes. If true, jump to the label. */
17432 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17434 rtx label = gen_label_rtx ();
17435 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17436 if (GET_MODE (variable) == DImode)
17437 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17439 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17440 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17443 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17445 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17449 /* Adjust COUNTER by the VALUE. */
17451 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17453 if (GET_MODE (countreg) == DImode)
17454 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17456 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17459 /* Zero extend possibly SImode EXP to Pmode register. */
17461 ix86_zero_extend_to_Pmode (rtx exp)
17464 if (GET_MODE (exp) == VOIDmode)
17465 return force_reg (Pmode, exp);
17466 if (GET_MODE (exp) == Pmode)
17467 return copy_to_mode_reg (Pmode, exp);
17468 r = gen_reg_rtx (Pmode);
17469 emit_insn (gen_zero_extendsidi2 (r, exp));
17473 /* Divide COUNTREG by SCALE. */
17475 scale_counter (rtx countreg, int scale)
17478 rtx piece_size_mask;
17482 if (CONST_INT_P (countreg))
17483 return GEN_INT (INTVAL (countreg) / scale);
17484 gcc_assert (REG_P (countreg));
17486 piece_size_mask = GEN_INT (scale - 1);
17487 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17488 GEN_INT (exact_log2 (scale)),
17489 NULL, 1, OPTAB_DIRECT);
17493 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17494 DImode for constant loop counts. */
17496 static enum machine_mode
17497 counter_mode (rtx count_exp)
17499 if (GET_MODE (count_exp) != VOIDmode)
17500 return GET_MODE (count_exp);
17501 if (!CONST_INT_P (count_exp))
17503 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17508 /* When SRCPTR is non-NULL, output simple loop to move memory
17509 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17510 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17511 equivalent loop to set memory by VALUE (supposed to be in MODE).
17513 The size is rounded down to whole number of chunk size moved at once.
17514 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17518 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17519 rtx destptr, rtx srcptr, rtx value,
17520 rtx count, enum machine_mode mode, int unroll,
17523 rtx out_label, top_label, iter, tmp;
17524 enum machine_mode iter_mode = counter_mode (count);
17525 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17526 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17532 top_label = gen_label_rtx ();
17533 out_label = gen_label_rtx ();
17534 iter = gen_reg_rtx (iter_mode);
17536 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17537 NULL, 1, OPTAB_DIRECT);
17538 /* Those two should combine. */
17539 if (piece_size == const1_rtx)
17541 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17543 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17545 emit_move_insn (iter, const0_rtx);
17547 emit_label (top_label);
17549 tmp = convert_modes (Pmode, iter_mode, iter, true);
17550 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17551 destmem = change_address (destmem, mode, x_addr);
17555 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17556 srcmem = change_address (srcmem, mode, y_addr);
17558 /* When unrolling for chips that reorder memory reads and writes,
17559 we can save registers by using single temporary.
17560 Also using 4 temporaries is overkill in 32bit mode. */
17561 if (!TARGET_64BIT && 0)
17563 for (i = 0; i < unroll; i++)
17568 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17570 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17572 emit_move_insn (destmem, srcmem);
17578 gcc_assert (unroll <= 4);
17579 for (i = 0; i < unroll; i++)
17581 tmpreg[i] = gen_reg_rtx (mode);
17585 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17587 emit_move_insn (tmpreg[i], srcmem);
17589 for (i = 0; i < unroll; i++)
17594 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17596 emit_move_insn (destmem, tmpreg[i]);
17601 for (i = 0; i < unroll; i++)
17605 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17606 emit_move_insn (destmem, value);
17609 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17610 true, OPTAB_LIB_WIDEN);
17612 emit_move_insn (iter, tmp);
17614 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17616 if (expected_size != -1)
17618 expected_size /= GET_MODE_SIZE (mode) * unroll;
17619 if (expected_size == 0)
17621 else if (expected_size > REG_BR_PROB_BASE)
17622 predict_jump (REG_BR_PROB_BASE - 1);
17624 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17627 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17628 iter = ix86_zero_extend_to_Pmode (iter);
17629 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17630 true, OPTAB_LIB_WIDEN);
17631 if (tmp != destptr)
17632 emit_move_insn (destptr, tmp);
17635 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17636 true, OPTAB_LIB_WIDEN);
17638 emit_move_insn (srcptr, tmp);
17640 emit_label (out_label);
17643 /* Output "rep; mov" instruction.
17644 Arguments have same meaning as for previous function */
17646 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17647 rtx destptr, rtx srcptr,
17649 enum machine_mode mode)
17655 /* If the size is known, it is shorter to use rep movs. */
17656 if (mode == QImode && CONST_INT_P (count)
17657 && !(INTVAL (count) & 3))
17660 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17661 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17662 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17663 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17664 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17665 if (mode != QImode)
17667 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17668 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17669 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17670 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17671 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17672 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17676 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17677 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17679 if (CONST_INT_P (count))
17681 count = GEN_INT (INTVAL (count)
17682 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17683 destmem = shallow_copy_rtx (destmem);
17684 srcmem = shallow_copy_rtx (srcmem);
17685 set_mem_size (destmem, count);
17686 set_mem_size (srcmem, count);
17690 if (MEM_SIZE (destmem))
17691 set_mem_size (destmem, NULL_RTX);
17692 if (MEM_SIZE (srcmem))
17693 set_mem_size (srcmem, NULL_RTX);
17695 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17699 /* Output "rep; stos" instruction.
17700 Arguments have same meaning as for previous function */
17702 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17703 rtx count, enum machine_mode mode,
17709 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17710 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17711 value = force_reg (mode, gen_lowpart (mode, value));
17712 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17713 if (mode != QImode)
17715 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17716 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17717 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17720 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17721 if (orig_value == const0_rtx && CONST_INT_P (count))
17723 count = GEN_INT (INTVAL (count)
17724 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17725 destmem = shallow_copy_rtx (destmem);
17726 set_mem_size (destmem, count);
17728 else if (MEM_SIZE (destmem))
17729 set_mem_size (destmem, NULL_RTX);
17730 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17734 emit_strmov (rtx destmem, rtx srcmem,
17735 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17737 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17738 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17739 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17742 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17744 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17745 rtx destptr, rtx srcptr, rtx count, int max_size)
17748 if (CONST_INT_P (count))
17750 HOST_WIDE_INT countval = INTVAL (count);
17753 if ((countval & 0x10) && max_size > 16)
17757 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17758 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17761 gcc_unreachable ();
17764 if ((countval & 0x08) && max_size > 8)
17767 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17770 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17771 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17775 if ((countval & 0x04) && max_size > 4)
17777 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17780 if ((countval & 0x02) && max_size > 2)
17782 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17785 if ((countval & 0x01) && max_size > 1)
17787 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17794 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17795 count, 1, OPTAB_DIRECT);
17796 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17797 count, QImode, 1, 4);
17801 /* When there are stringops, we can cheaply increase dest and src pointers.
17802 Otherwise we save code size by maintaining offset (zero is readily
17803 available from preceding rep operation) and using x86 addressing modes.
17805 if (TARGET_SINGLE_STRINGOP)
17809 rtx label = ix86_expand_aligntest (count, 4, true);
17810 src = change_address (srcmem, SImode, srcptr);
17811 dest = change_address (destmem, SImode, destptr);
17812 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17813 emit_label (label);
17814 LABEL_NUSES (label) = 1;
17818 rtx label = ix86_expand_aligntest (count, 2, true);
17819 src = change_address (srcmem, HImode, srcptr);
17820 dest = change_address (destmem, HImode, destptr);
17821 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17822 emit_label (label);
17823 LABEL_NUSES (label) = 1;
17827 rtx label = ix86_expand_aligntest (count, 1, true);
17828 src = change_address (srcmem, QImode, srcptr);
17829 dest = change_address (destmem, QImode, destptr);
17830 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17831 emit_label (label);
17832 LABEL_NUSES (label) = 1;
17837 rtx offset = force_reg (Pmode, const0_rtx);
17842 rtx label = ix86_expand_aligntest (count, 4, true);
17843 src = change_address (srcmem, SImode, srcptr);
17844 dest = change_address (destmem, SImode, destptr);
17845 emit_move_insn (dest, src);
17846 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17847 true, OPTAB_LIB_WIDEN);
17849 emit_move_insn (offset, tmp);
17850 emit_label (label);
17851 LABEL_NUSES (label) = 1;
17855 rtx label = ix86_expand_aligntest (count, 2, true);
17856 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17857 src = change_address (srcmem, HImode, tmp);
17858 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17859 dest = change_address (destmem, HImode, tmp);
17860 emit_move_insn (dest, src);
17861 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17862 true, OPTAB_LIB_WIDEN);
17864 emit_move_insn (offset, tmp);
17865 emit_label (label);
17866 LABEL_NUSES (label) = 1;
17870 rtx label = ix86_expand_aligntest (count, 1, true);
17871 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17872 src = change_address (srcmem, QImode, tmp);
17873 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17874 dest = change_address (destmem, QImode, tmp);
17875 emit_move_insn (dest, src);
17876 emit_label (label);
17877 LABEL_NUSES (label) = 1;
17882 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17884 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17885 rtx count, int max_size)
17888 expand_simple_binop (counter_mode (count), AND, count,
17889 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17890 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17891 gen_lowpart (QImode, value), count, QImode,
17895 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17897 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17901 if (CONST_INT_P (count))
17903 HOST_WIDE_INT countval = INTVAL (count);
17906 if ((countval & 0x10) && max_size > 16)
17910 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17911 emit_insn (gen_strset (destptr, dest, value));
17912 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17913 emit_insn (gen_strset (destptr, dest, value));
17916 gcc_unreachable ();
17919 if ((countval & 0x08) && max_size > 8)
17923 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17924 emit_insn (gen_strset (destptr, dest, value));
17928 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17929 emit_insn (gen_strset (destptr, dest, value));
17930 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17931 emit_insn (gen_strset (destptr, dest, value));
17935 if ((countval & 0x04) && max_size > 4)
17937 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17938 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17941 if ((countval & 0x02) && max_size > 2)
17943 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17944 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17947 if ((countval & 0x01) && max_size > 1)
17949 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17950 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17957 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17962 rtx label = ix86_expand_aligntest (count, 16, true);
17965 dest = change_address (destmem, DImode, destptr);
17966 emit_insn (gen_strset (destptr, dest, value));
17967 emit_insn (gen_strset (destptr, dest, value));
17971 dest = change_address (destmem, SImode, destptr);
17972 emit_insn (gen_strset (destptr, dest, value));
17973 emit_insn (gen_strset (destptr, dest, value));
17974 emit_insn (gen_strset (destptr, dest, value));
17975 emit_insn (gen_strset (destptr, dest, value));
17977 emit_label (label);
17978 LABEL_NUSES (label) = 1;
17982 rtx label = ix86_expand_aligntest (count, 8, true);
17985 dest = change_address (destmem, DImode, destptr);
17986 emit_insn (gen_strset (destptr, dest, value));
17990 dest = change_address (destmem, SImode, destptr);
17991 emit_insn (gen_strset (destptr, dest, value));
17992 emit_insn (gen_strset (destptr, dest, value));
17994 emit_label (label);
17995 LABEL_NUSES (label) = 1;
17999 rtx label = ix86_expand_aligntest (count, 4, true);
18000 dest = change_address (destmem, SImode, destptr);
18001 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18002 emit_label (label);
18003 LABEL_NUSES (label) = 1;
18007 rtx label = ix86_expand_aligntest (count, 2, true);
18008 dest = change_address (destmem, HImode, destptr);
18009 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18010 emit_label (label);
18011 LABEL_NUSES (label) = 1;
18015 rtx label = ix86_expand_aligntest (count, 1, true);
18016 dest = change_address (destmem, QImode, destptr);
18017 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18018 emit_label (label);
18019 LABEL_NUSES (label) = 1;
18023 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18024 DESIRED_ALIGNMENT. */
18026 expand_movmem_prologue (rtx destmem, rtx srcmem,
18027 rtx destptr, rtx srcptr, rtx count,
18028 int align, int desired_alignment)
18030 if (align <= 1 && desired_alignment > 1)
18032 rtx label = ix86_expand_aligntest (destptr, 1, false);
18033 srcmem = change_address (srcmem, QImode, srcptr);
18034 destmem = change_address (destmem, QImode, destptr);
18035 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18036 ix86_adjust_counter (count, 1);
18037 emit_label (label);
18038 LABEL_NUSES (label) = 1;
18040 if (align <= 2 && desired_alignment > 2)
18042 rtx label = ix86_expand_aligntest (destptr, 2, false);
18043 srcmem = change_address (srcmem, HImode, srcptr);
18044 destmem = change_address (destmem, HImode, destptr);
18045 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18046 ix86_adjust_counter (count, 2);
18047 emit_label (label);
18048 LABEL_NUSES (label) = 1;
18050 if (align <= 4 && desired_alignment > 4)
18052 rtx label = ix86_expand_aligntest (destptr, 4, false);
18053 srcmem = change_address (srcmem, SImode, srcptr);
18054 destmem = change_address (destmem, SImode, destptr);
18055 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18056 ix86_adjust_counter (count, 4);
18057 emit_label (label);
18058 LABEL_NUSES (label) = 1;
18060 gcc_assert (desired_alignment <= 8);
18063 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
18064 ALIGN_BYTES is how many bytes need to be copied. */
18066 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
18067 int desired_align, int align_bytes)
18070 rtx src_size, dst_size;
18072 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
18073 if (src_align_bytes >= 0)
18074 src_align_bytes = desired_align - src_align_bytes;
18075 src_size = MEM_SIZE (src);
18076 dst_size = MEM_SIZE (dst);
18077 if (align_bytes & 1)
18079 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18080 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
18082 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18084 if (align_bytes & 2)
18086 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18087 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
18088 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18089 set_mem_align (dst, 2 * BITS_PER_UNIT);
18090 if (src_align_bytes >= 0
18091 && (src_align_bytes & 1) == (align_bytes & 1)
18092 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18093 set_mem_align (src, 2 * BITS_PER_UNIT);
18095 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18097 if (align_bytes & 4)
18099 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18100 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18101 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18102 set_mem_align (dst, 4 * BITS_PER_UNIT);
18103 if (src_align_bytes >= 0)
18105 unsigned int src_align = 0;
18106 if ((src_align_bytes & 3) == (align_bytes & 3))
18108 else if ((src_align_bytes & 1) == (align_bytes & 1))
18110 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18111 set_mem_align (src, src_align * BITS_PER_UNIT);
18114 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18116 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18117 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18118 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18119 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18120 if (src_align_bytes >= 0)
18122 unsigned int src_align = 0;
18123 if ((src_align_bytes & 7) == (align_bytes & 7))
18125 else if ((src_align_bytes & 3) == (align_bytes & 3))
18127 else if ((src_align_bytes & 1) == (align_bytes & 1))
18129 if (src_align > (unsigned int) desired_align)
18130 src_align = desired_align;
18131 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18132 set_mem_align (src, src_align * BITS_PER_UNIT);
18135 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18137 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18142 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18143 DESIRED_ALIGNMENT. */
18145 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18146 int align, int desired_alignment)
18148 if (align <= 1 && desired_alignment > 1)
18150 rtx label = ix86_expand_aligntest (destptr, 1, false);
18151 destmem = change_address (destmem, QImode, destptr);
18152 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18153 ix86_adjust_counter (count, 1);
18154 emit_label (label);
18155 LABEL_NUSES (label) = 1;
18157 if (align <= 2 && desired_alignment > 2)
18159 rtx label = ix86_expand_aligntest (destptr, 2, false);
18160 destmem = change_address (destmem, HImode, destptr);
18161 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18162 ix86_adjust_counter (count, 2);
18163 emit_label (label);
18164 LABEL_NUSES (label) = 1;
18166 if (align <= 4 && desired_alignment > 4)
18168 rtx label = ix86_expand_aligntest (destptr, 4, false);
18169 destmem = change_address (destmem, SImode, destptr);
18170 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18171 ix86_adjust_counter (count, 4);
18172 emit_label (label);
18173 LABEL_NUSES (label) = 1;
18175 gcc_assert (desired_alignment <= 8);
18178 /* Set enough from DST to align DST known to by aligned by ALIGN to
18179 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
18181 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18182 int desired_align, int align_bytes)
18185 rtx dst_size = MEM_SIZE (dst);
18186 if (align_bytes & 1)
18188 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18190 emit_insn (gen_strset (destreg, dst,
18191 gen_lowpart (QImode, value)));
18193 if (align_bytes & 2)
18195 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18196 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18197 set_mem_align (dst, 2 * BITS_PER_UNIT);
18199 emit_insn (gen_strset (destreg, dst,
18200 gen_lowpart (HImode, value)));
18202 if (align_bytes & 4)
18204 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18205 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18206 set_mem_align (dst, 4 * BITS_PER_UNIT);
18208 emit_insn (gen_strset (destreg, dst,
18209 gen_lowpart (SImode, value)));
18211 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18212 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18213 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18215 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18219 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18220 static enum stringop_alg
18221 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18222 int *dynamic_check)
18224 const struct stringop_algs * algs;
18225 bool optimize_for_speed;
18226 /* Algorithms using the rep prefix want at least edi and ecx;
18227 additionally, memset wants eax and memcpy wants esi. Don't
18228 consider such algorithms if the user has appropriated those
18229 registers for their own purposes. */
18230 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18232 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18234 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18235 || (alg != rep_prefix_1_byte \
18236 && alg != rep_prefix_4_byte \
18237 && alg != rep_prefix_8_byte))
18238 const struct processor_costs *cost;
18240 /* Even if the string operation call is cold, we still might spend a lot
18241 of time processing large blocks. */
18242 if (optimize_function_for_size_p (cfun)
18243 || (optimize_insn_for_size_p ()
18244 && expected_size != -1 && expected_size < 256))
18245 optimize_for_speed = false;
18247 optimize_for_speed = true;
18249 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18251 *dynamic_check = -1;
18253 algs = &cost->memset[TARGET_64BIT != 0];
18255 algs = &cost->memcpy[TARGET_64BIT != 0];
18256 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18257 return stringop_alg;
18258 /* rep; movq or rep; movl is the smallest variant. */
18259 else if (!optimize_for_speed)
18261 if (!count || (count & 3))
18262 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18264 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18266 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18268 else if (expected_size != -1 && expected_size < 4)
18269 return loop_1_byte;
18270 else if (expected_size != -1)
18273 enum stringop_alg alg = libcall;
18274 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18276 /* We get here if the algorithms that were not libcall-based
18277 were rep-prefix based and we are unable to use rep prefixes
18278 based on global register usage. Break out of the loop and
18279 use the heuristic below. */
18280 if (algs->size[i].max == 0)
18282 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18284 enum stringop_alg candidate = algs->size[i].alg;
18286 if (candidate != libcall && ALG_USABLE_P (candidate))
18288 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18289 last non-libcall inline algorithm. */
18290 if (TARGET_INLINE_ALL_STRINGOPS)
18292 /* When the current size is best to be copied by a libcall,
18293 but we are still forced to inline, run the heuristic below
18294 that will pick code for medium sized blocks. */
18295 if (alg != libcall)
18299 else if (ALG_USABLE_P (candidate))
18303 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18305 /* When asked to inline the call anyway, try to pick meaningful choice.
18306 We look for maximal size of block that is faster to copy by hand and
18307 take blocks of at most of that size guessing that average size will
18308 be roughly half of the block.
18310 If this turns out to be bad, we might simply specify the preferred
18311 choice in ix86_costs. */
18312 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18313 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18316 enum stringop_alg alg;
18318 bool any_alg_usable_p = true;
18320 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18322 enum stringop_alg candidate = algs->size[i].alg;
18323 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18325 if (candidate != libcall && candidate
18326 && ALG_USABLE_P (candidate))
18327 max = algs->size[i].max;
18329 /* If there aren't any usable algorithms, then recursing on
18330 smaller sizes isn't going to find anything. Just return the
18331 simple byte-at-a-time copy loop. */
18332 if (!any_alg_usable_p)
18334 /* Pick something reasonable. */
18335 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18336 *dynamic_check = 128;
18337 return loop_1_byte;
18341 alg = decide_alg (count, max / 2, memset, dynamic_check);
18342 gcc_assert (*dynamic_check == -1);
18343 gcc_assert (alg != libcall);
18344 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18345 *dynamic_check = max;
18348 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18349 #undef ALG_USABLE_P
18352 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18353 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18355 decide_alignment (int align,
18356 enum stringop_alg alg,
18359 int desired_align = 0;
18363 gcc_unreachable ();
18365 case unrolled_loop:
18366 desired_align = GET_MODE_SIZE (Pmode);
18368 case rep_prefix_8_byte:
18371 case rep_prefix_4_byte:
18372 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18373 copying whole cacheline at once. */
18374 if (TARGET_PENTIUMPRO)
18379 case rep_prefix_1_byte:
18380 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18381 copying whole cacheline at once. */
18382 if (TARGET_PENTIUMPRO)
18396 if (desired_align < align)
18397 desired_align = align;
18398 if (expected_size != -1 && expected_size < 4)
18399 desired_align = align;
18400 return desired_align;
18403 /* Return the smallest power of 2 greater than VAL. */
18405 smallest_pow2_greater_than (int val)
18413 /* Expand string move (memcpy) operation. Use i386 string operations when
18414 profitable. expand_setmem contains similar code. The code depends upon
18415 architecture, block size and alignment, but always has the same
18418 1) Prologue guard: Conditional that jumps up to epilogues for small
18419 blocks that can be handled by epilogue alone. This is faster but
18420 also needed for correctness, since prologue assume the block is larger
18421 than the desired alignment.
18423 Optional dynamic check for size and libcall for large
18424 blocks is emitted here too, with -minline-stringops-dynamically.
18426 2) Prologue: copy first few bytes in order to get destination aligned
18427 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18428 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18429 We emit either a jump tree on power of two sized blocks, or a byte loop.
18431 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18432 with specified algorithm.
18434 4) Epilogue: code copying tail of the block that is too small to be
18435 handled by main body (or up to size guarded by prologue guard). */
18438 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18439 rtx expected_align_exp, rtx expected_size_exp)
18445 rtx jump_around_label = NULL;
18446 HOST_WIDE_INT align = 1;
18447 unsigned HOST_WIDE_INT count = 0;
18448 HOST_WIDE_INT expected_size = -1;
18449 int size_needed = 0, epilogue_size_needed;
18450 int desired_align = 0, align_bytes = 0;
18451 enum stringop_alg alg;
18453 bool need_zero_guard = false;
18455 if (CONST_INT_P (align_exp))
18456 align = INTVAL (align_exp);
18457 /* i386 can do misaligned access on reasonably increased cost. */
18458 if (CONST_INT_P (expected_align_exp)
18459 && INTVAL (expected_align_exp) > align)
18460 align = INTVAL (expected_align_exp);
18461 /* ALIGN is the minimum of destination and source alignment, but we care here
18462 just about destination alignment. */
18463 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18464 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18466 if (CONST_INT_P (count_exp))
18467 count = expected_size = INTVAL (count_exp);
18468 if (CONST_INT_P (expected_size_exp) && count == 0)
18469 expected_size = INTVAL (expected_size_exp);
18471 /* Make sure we don't need to care about overflow later on. */
18472 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18475 /* Step 0: Decide on preferred algorithm, desired alignment and
18476 size of chunks to be copied by main loop. */
18478 alg = decide_alg (count, expected_size, false, &dynamic_check);
18479 desired_align = decide_alignment (align, alg, expected_size);
18481 if (!TARGET_ALIGN_STRINGOPS)
18482 align = desired_align;
18484 if (alg == libcall)
18486 gcc_assert (alg != no_stringop);
18488 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18489 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18490 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18495 gcc_unreachable ();
18497 need_zero_guard = true;
18498 size_needed = GET_MODE_SIZE (Pmode);
18500 case unrolled_loop:
18501 need_zero_guard = true;
18502 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18504 case rep_prefix_8_byte:
18507 case rep_prefix_4_byte:
18510 case rep_prefix_1_byte:
18514 need_zero_guard = true;
18519 epilogue_size_needed = size_needed;
18521 /* Step 1: Prologue guard. */
18523 /* Alignment code needs count to be in register. */
18524 if (CONST_INT_P (count_exp) && desired_align > align)
18526 if (INTVAL (count_exp) > desired_align
18527 && INTVAL (count_exp) > size_needed)
18530 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18531 if (align_bytes <= 0)
18534 align_bytes = desired_align - align_bytes;
18536 if (align_bytes == 0)
18537 count_exp = force_reg (counter_mode (count_exp), count_exp);
18539 gcc_assert (desired_align >= 1 && align >= 1);
18541 /* Ensure that alignment prologue won't copy past end of block. */
18542 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18544 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18545 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18546 Make sure it is power of 2. */
18547 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18551 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18553 /* If main algorithm works on QImode, no epilogue is needed.
18554 For small sizes just don't align anything. */
18555 if (size_needed == 1)
18556 desired_align = align;
18563 label = gen_label_rtx ();
18564 emit_cmp_and_jump_insns (count_exp,
18565 GEN_INT (epilogue_size_needed),
18566 LTU, 0, counter_mode (count_exp), 1, label);
18567 if (expected_size == -1 || expected_size < epilogue_size_needed)
18568 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18570 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18574 /* Emit code to decide on runtime whether library call or inline should be
18576 if (dynamic_check != -1)
18578 if (CONST_INT_P (count_exp))
18580 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18582 emit_block_move_via_libcall (dst, src, count_exp, false);
18583 count_exp = const0_rtx;
18589 rtx hot_label = gen_label_rtx ();
18590 jump_around_label = gen_label_rtx ();
18591 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18592 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18593 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18594 emit_block_move_via_libcall (dst, src, count_exp, false);
18595 emit_jump (jump_around_label);
18596 emit_label (hot_label);
18600 /* Step 2: Alignment prologue. */
18602 if (desired_align > align)
18604 if (align_bytes == 0)
18606 /* Except for the first move in epilogue, we no longer know
18607 constant offset in aliasing info. It don't seems to worth
18608 the pain to maintain it for the first move, so throw away
18610 src = change_address (src, BLKmode, srcreg);
18611 dst = change_address (dst, BLKmode, destreg);
18612 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18617 /* If we know how many bytes need to be stored before dst is
18618 sufficiently aligned, maintain aliasing info accurately. */
18619 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18620 desired_align, align_bytes);
18621 count_exp = plus_constant (count_exp, -align_bytes);
18622 count -= align_bytes;
18624 if (need_zero_guard
18625 && (count < (unsigned HOST_WIDE_INT) size_needed
18626 || (align_bytes == 0
18627 && count < ((unsigned HOST_WIDE_INT) size_needed
18628 + desired_align - align))))
18630 /* It is possible that we copied enough so the main loop will not
18632 gcc_assert (size_needed > 1);
18633 if (label == NULL_RTX)
18634 label = gen_label_rtx ();
18635 emit_cmp_and_jump_insns (count_exp,
18636 GEN_INT (size_needed),
18637 LTU, 0, counter_mode (count_exp), 1, label);
18638 if (expected_size == -1
18639 || expected_size < (desired_align - align) / 2 + size_needed)
18640 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18642 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18645 if (label && size_needed == 1)
18647 emit_label (label);
18648 LABEL_NUSES (label) = 1;
18650 epilogue_size_needed = 1;
18652 else if (label == NULL_RTX)
18653 epilogue_size_needed = size_needed;
18655 /* Step 3: Main loop. */
18661 gcc_unreachable ();
18663 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18664 count_exp, QImode, 1, expected_size);
18667 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18668 count_exp, Pmode, 1, expected_size);
18670 case unrolled_loop:
18671 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18672 registers for 4 temporaries anyway. */
18673 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18674 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18677 case rep_prefix_8_byte:
18678 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18681 case rep_prefix_4_byte:
18682 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18685 case rep_prefix_1_byte:
18686 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18690 /* Adjust properly the offset of src and dest memory for aliasing. */
18691 if (CONST_INT_P (count_exp))
18693 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18694 (count / size_needed) * size_needed);
18695 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18696 (count / size_needed) * size_needed);
18700 src = change_address (src, BLKmode, srcreg);
18701 dst = change_address (dst, BLKmode, destreg);
18704 /* Step 4: Epilogue to copy the remaining bytes. */
18708 /* When the main loop is done, COUNT_EXP might hold original count,
18709 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18710 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18711 bytes. Compensate if needed. */
18713 if (size_needed < epilogue_size_needed)
18716 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18717 GEN_INT (size_needed - 1), count_exp, 1,
18719 if (tmp != count_exp)
18720 emit_move_insn (count_exp, tmp);
18722 emit_label (label);
18723 LABEL_NUSES (label) = 1;
18726 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18727 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18728 epilogue_size_needed);
18729 if (jump_around_label)
18730 emit_label (jump_around_label);
18734 /* Helper function for memcpy. For QImode value 0xXY produce
18735 0xXYXYXYXY of wide specified by MODE. This is essentially
18736 a * 0x10101010, but we can do slightly better than
18737 synth_mult by unwinding the sequence by hand on CPUs with
18740 promote_duplicated_reg (enum machine_mode mode, rtx val)
18742 enum machine_mode valmode = GET_MODE (val);
18744 int nops = mode == DImode ? 3 : 2;
18746 gcc_assert (mode == SImode || mode == DImode);
18747 if (val == const0_rtx)
18748 return copy_to_mode_reg (mode, const0_rtx);
18749 if (CONST_INT_P (val))
18751 HOST_WIDE_INT v = INTVAL (val) & 255;
18755 if (mode == DImode)
18756 v |= (v << 16) << 16;
18757 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18760 if (valmode == VOIDmode)
18762 if (valmode != QImode)
18763 val = gen_lowpart (QImode, val);
18764 if (mode == QImode)
18766 if (!TARGET_PARTIAL_REG_STALL)
18768 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18769 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18770 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18771 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18773 rtx reg = convert_modes (mode, QImode, val, true);
18774 tmp = promote_duplicated_reg (mode, const1_rtx);
18775 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18780 rtx reg = convert_modes (mode, QImode, val, true);
18782 if (!TARGET_PARTIAL_REG_STALL)
18783 if (mode == SImode)
18784 emit_insn (gen_movsi_insv_1 (reg, reg));
18786 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18789 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18790 NULL, 1, OPTAB_DIRECT);
18792 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18794 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18795 NULL, 1, OPTAB_DIRECT);
18796 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18797 if (mode == SImode)
18799 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18800 NULL, 1, OPTAB_DIRECT);
18801 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18806 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18807 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18808 alignment from ALIGN to DESIRED_ALIGN. */
18810 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18815 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18816 promoted_val = promote_duplicated_reg (DImode, val);
18817 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18818 promoted_val = promote_duplicated_reg (SImode, val);
18819 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18820 promoted_val = promote_duplicated_reg (HImode, val);
18822 promoted_val = val;
18824 return promoted_val;
18827 /* Expand string clear operation (bzero). Use i386 string operations when
18828 profitable. See expand_movmem comment for explanation of individual
18829 steps performed. */
18831 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18832 rtx expected_align_exp, rtx expected_size_exp)
18837 rtx jump_around_label = NULL;
18838 HOST_WIDE_INT align = 1;
18839 unsigned HOST_WIDE_INT count = 0;
18840 HOST_WIDE_INT expected_size = -1;
18841 int size_needed = 0, epilogue_size_needed;
18842 int desired_align = 0, align_bytes = 0;
18843 enum stringop_alg alg;
18844 rtx promoted_val = NULL;
18845 bool force_loopy_epilogue = false;
18847 bool need_zero_guard = false;
18849 if (CONST_INT_P (align_exp))
18850 align = INTVAL (align_exp);
18851 /* i386 can do misaligned access on reasonably increased cost. */
18852 if (CONST_INT_P (expected_align_exp)
18853 && INTVAL (expected_align_exp) > align)
18854 align = INTVAL (expected_align_exp);
18855 if (CONST_INT_P (count_exp))
18856 count = expected_size = INTVAL (count_exp);
18857 if (CONST_INT_P (expected_size_exp) && count == 0)
18858 expected_size = INTVAL (expected_size_exp);
18860 /* Make sure we don't need to care about overflow later on. */
18861 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18864 /* Step 0: Decide on preferred algorithm, desired alignment and
18865 size of chunks to be copied by main loop. */
18867 alg = decide_alg (count, expected_size, true, &dynamic_check);
18868 desired_align = decide_alignment (align, alg, expected_size);
18870 if (!TARGET_ALIGN_STRINGOPS)
18871 align = desired_align;
18873 if (alg == libcall)
18875 gcc_assert (alg != no_stringop);
18877 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18878 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18883 gcc_unreachable ();
18885 need_zero_guard = true;
18886 size_needed = GET_MODE_SIZE (Pmode);
18888 case unrolled_loop:
18889 need_zero_guard = true;
18890 size_needed = GET_MODE_SIZE (Pmode) * 4;
18892 case rep_prefix_8_byte:
18895 case rep_prefix_4_byte:
18898 case rep_prefix_1_byte:
18902 need_zero_guard = true;
18906 epilogue_size_needed = size_needed;
18908 /* Step 1: Prologue guard. */
18910 /* Alignment code needs count to be in register. */
18911 if (CONST_INT_P (count_exp) && desired_align > align)
18913 if (INTVAL (count_exp) > desired_align
18914 && INTVAL (count_exp) > size_needed)
18917 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18918 if (align_bytes <= 0)
18921 align_bytes = desired_align - align_bytes;
18923 if (align_bytes == 0)
18925 enum machine_mode mode = SImode;
18926 if (TARGET_64BIT && (count & ~0xffffffff))
18928 count_exp = force_reg (mode, count_exp);
18931 /* Do the cheap promotion to allow better CSE across the
18932 main loop and epilogue (ie one load of the big constant in the
18933 front of all code. */
18934 if (CONST_INT_P (val_exp))
18935 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18936 desired_align, align);
18937 /* Ensure that alignment prologue won't copy past end of block. */
18938 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18940 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18941 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18942 Make sure it is power of 2. */
18943 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18945 /* To improve performance of small blocks, we jump around the VAL
18946 promoting mode. This mean that if the promoted VAL is not constant,
18947 we might not use it in the epilogue and have to use byte
18949 if (epilogue_size_needed > 2 && !promoted_val)
18950 force_loopy_epilogue = true;
18953 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18955 /* If main algorithm works on QImode, no epilogue is needed.
18956 For small sizes just don't align anything. */
18957 if (size_needed == 1)
18958 desired_align = align;
18965 label = gen_label_rtx ();
18966 emit_cmp_and_jump_insns (count_exp,
18967 GEN_INT (epilogue_size_needed),
18968 LTU, 0, counter_mode (count_exp), 1, label);
18969 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18970 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18972 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18975 if (dynamic_check != -1)
18977 rtx hot_label = gen_label_rtx ();
18978 jump_around_label = gen_label_rtx ();
18979 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18980 LEU, 0, counter_mode (count_exp), 1, hot_label);
18981 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18982 set_storage_via_libcall (dst, count_exp, val_exp, false);
18983 emit_jump (jump_around_label);
18984 emit_label (hot_label);
18987 /* Step 2: Alignment prologue. */
18989 /* Do the expensive promotion once we branched off the small blocks. */
18991 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18992 desired_align, align);
18993 gcc_assert (desired_align >= 1 && align >= 1);
18995 if (desired_align > align)
18997 if (align_bytes == 0)
18999 /* Except for the first move in epilogue, we no longer know
19000 constant offset in aliasing info. It don't seems to worth
19001 the pain to maintain it for the first move, so throw away
19003 dst = change_address (dst, BLKmode, destreg);
19004 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19009 /* If we know how many bytes need to be stored before dst is
19010 sufficiently aligned, maintain aliasing info accurately. */
19011 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19012 desired_align, align_bytes);
19013 count_exp = plus_constant (count_exp, -align_bytes);
19014 count -= align_bytes;
19016 if (need_zero_guard
19017 && (count < (unsigned HOST_WIDE_INT) size_needed
19018 || (align_bytes == 0
19019 && count < ((unsigned HOST_WIDE_INT) size_needed
19020 + desired_align - align))))
19022 /* It is possible that we copied enough so the main loop will not
19024 gcc_assert (size_needed > 1);
19025 if (label == NULL_RTX)
19026 label = gen_label_rtx ();
19027 emit_cmp_and_jump_insns (count_exp,
19028 GEN_INT (size_needed),
19029 LTU, 0, counter_mode (count_exp), 1, label);
19030 if (expected_size == -1
19031 || expected_size < (desired_align - align) / 2 + size_needed)
19032 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19034 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19037 if (label && size_needed == 1)
19039 emit_label (label);
19040 LABEL_NUSES (label) = 1;
19042 promoted_val = val_exp;
19043 epilogue_size_needed = 1;
19045 else if (label == NULL_RTX)
19046 epilogue_size_needed = size_needed;
19048 /* Step 3: Main loop. */
19054 gcc_unreachable ();
19056 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19057 count_exp, QImode, 1, expected_size);
19060 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19061 count_exp, Pmode, 1, expected_size);
19063 case unrolled_loop:
19064 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19065 count_exp, Pmode, 4, expected_size);
19067 case rep_prefix_8_byte:
19068 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19071 case rep_prefix_4_byte:
19072 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19075 case rep_prefix_1_byte:
19076 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19080 /* Adjust properly the offset of src and dest memory for aliasing. */
19081 if (CONST_INT_P (count_exp))
19082 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19083 (count / size_needed) * size_needed);
19085 dst = change_address (dst, BLKmode, destreg);
19087 /* Step 4: Epilogue to copy the remaining bytes. */
19091 /* When the main loop is done, COUNT_EXP might hold original count,
19092 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19093 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19094 bytes. Compensate if needed. */
19096 if (size_needed < epilogue_size_needed)
19099 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19100 GEN_INT (size_needed - 1), count_exp, 1,
19102 if (tmp != count_exp)
19103 emit_move_insn (count_exp, tmp);
19105 emit_label (label);
19106 LABEL_NUSES (label) = 1;
19109 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19111 if (force_loopy_epilogue)
19112 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19113 epilogue_size_needed);
19115 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19116 epilogue_size_needed);
19118 if (jump_around_label)
19119 emit_label (jump_around_label);
19123 /* Expand the appropriate insns for doing strlen if not just doing
19126 out = result, initialized with the start address
19127 align_rtx = alignment of the address.
19128 scratch = scratch register, initialized with the startaddress when
19129 not aligned, otherwise undefined
19131 This is just the body. It needs the initializations mentioned above and
19132 some address computing at the end. These things are done in i386.md. */
19135 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19139 rtx align_2_label = NULL_RTX;
19140 rtx align_3_label = NULL_RTX;
19141 rtx align_4_label = gen_label_rtx ();
19142 rtx end_0_label = gen_label_rtx ();
19144 rtx tmpreg = gen_reg_rtx (SImode);
19145 rtx scratch = gen_reg_rtx (SImode);
19149 if (CONST_INT_P (align_rtx))
19150 align = INTVAL (align_rtx);
19152 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19154 /* Is there a known alignment and is it less than 4? */
19157 rtx scratch1 = gen_reg_rtx (Pmode);
19158 emit_move_insn (scratch1, out);
19159 /* Is there a known alignment and is it not 2? */
19162 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19163 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19165 /* Leave just the 3 lower bits. */
19166 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19167 NULL_RTX, 0, OPTAB_WIDEN);
19169 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19170 Pmode, 1, align_4_label);
19171 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19172 Pmode, 1, align_2_label);
19173 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19174 Pmode, 1, align_3_label);
19178 /* Since the alignment is 2, we have to check 2 or 0 bytes;
19179 check if is aligned to 4 - byte. */
19181 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19182 NULL_RTX, 0, OPTAB_WIDEN);
19184 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19185 Pmode, 1, align_4_label);
19188 mem = change_address (src, QImode, out);
19190 /* Now compare the bytes. */
19192 /* Compare the first n unaligned byte on a byte per byte basis. */
19193 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19194 QImode, 1, end_0_label);
19196 /* Increment the address. */
19197 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19199 /* Not needed with an alignment of 2 */
19202 emit_label (align_2_label);
19204 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19207 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19209 emit_label (align_3_label);
19212 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19215 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19218 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19219 align this loop. It gives only huge programs, but does not help to
19221 emit_label (align_4_label);
19223 mem = change_address (src, SImode, out);
19224 emit_move_insn (scratch, mem);
19225 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
19227 /* This formula yields a nonzero result iff one of the bytes is zero.
19228 This saves three branches inside loop and many cycles. */
19230 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19231 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19232 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19233 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19234 gen_int_mode (0x80808080, SImode)));
19235 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19240 rtx reg = gen_reg_rtx (SImode);
19241 rtx reg2 = gen_reg_rtx (Pmode);
19242 emit_move_insn (reg, tmpreg);
19243 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19245 /* If zero is not in the first two bytes, move two bytes forward. */
19246 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19247 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19248 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19249 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19250 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19253 /* Emit lea manually to avoid clobbering of flags. */
19254 emit_insn (gen_rtx_SET (SImode, reg2,
19255 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19257 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19258 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19259 emit_insn (gen_rtx_SET (VOIDmode, out,
19260 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19267 rtx end_2_label = gen_label_rtx ();
19268 /* Is zero in the first two bytes? */
19270 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19271 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19272 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19273 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19274 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19276 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19277 JUMP_LABEL (tmp) = end_2_label;
19279 /* Not in the first two. Move two bytes forward. */
19280 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19281 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19283 emit_label (end_2_label);
19287 /* Avoid branch in fixing the byte. */
19288 tmpreg = gen_lowpart (QImode, tmpreg);
19289 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19290 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19291 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19293 emit_label (end_0_label);
19296 /* Expand strlen. */
19299 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19301 rtx addr, scratch1, scratch2, scratch3, scratch4;
19303 /* The generic case of strlen expander is long. Avoid it's
19304 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19306 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19307 && !TARGET_INLINE_ALL_STRINGOPS
19308 && !optimize_insn_for_size_p ()
19309 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19312 addr = force_reg (Pmode, XEXP (src, 0));
19313 scratch1 = gen_reg_rtx (Pmode);
19315 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19316 && !optimize_insn_for_size_p ())
19318 /* Well it seems that some optimizer does not combine a call like
19319 foo(strlen(bar), strlen(bar));
19320 when the move and the subtraction is done here. It does calculate
19321 the length just once when these instructions are done inside of
19322 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19323 often used and I use one fewer register for the lifetime of
19324 output_strlen_unroll() this is better. */
19326 emit_move_insn (out, addr);
19328 ix86_expand_strlensi_unroll_1 (out, src, align);
19330 /* strlensi_unroll_1 returns the address of the zero at the end of
19331 the string, like memchr(), so compute the length by subtracting
19332 the start address. */
19333 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19339 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19340 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19343 scratch2 = gen_reg_rtx (Pmode);
19344 scratch3 = gen_reg_rtx (Pmode);
19345 scratch4 = force_reg (Pmode, constm1_rtx);
19347 emit_move_insn (scratch3, addr);
19348 eoschar = force_reg (QImode, eoschar);
19350 src = replace_equiv_address_nv (src, scratch3);
19352 /* If .md starts supporting :P, this can be done in .md. */
19353 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19354 scratch4), UNSPEC_SCAS);
19355 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19356 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19357 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19362 /* For given symbol (function) construct code to compute address of it's PLT
19363 entry in large x86-64 PIC model. */
19365 construct_plt_address (rtx symbol)
19367 rtx tmp = gen_reg_rtx (Pmode);
19368 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19370 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19371 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19373 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19374 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19379 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19381 rtx pop, int sibcall)
19383 rtx use = NULL, call;
19385 if (pop == const0_rtx)
19387 gcc_assert (!TARGET_64BIT || !pop);
19389 if (TARGET_MACHO && !TARGET_64BIT)
19392 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19393 fnaddr = machopic_indirect_call_target (fnaddr);
19398 /* Static functions and indirect calls don't need the pic register. */
19399 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19400 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19401 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19402 use_reg (&use, pic_offset_table_rtx);
19405 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19407 rtx al = gen_rtx_REG (QImode, AX_REG);
19408 emit_move_insn (al, callarg2);
19409 use_reg (&use, al);
19412 if (ix86_cmodel == CM_LARGE_PIC
19414 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19415 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19416 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19417 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19419 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19420 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19422 if (sibcall && TARGET_64BIT
19423 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19426 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19427 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19428 emit_move_insn (fnaddr, addr);
19429 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19432 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19434 call = gen_rtx_SET (VOIDmode, retval, call);
19437 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19438 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19439 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19442 && ix86_cfun_abi () == MS_ABI
19443 && (!callarg2 || INTVAL (callarg2) != -2))
19445 /* We need to represent that SI and DI registers are clobbered
19447 static int clobbered_registers[] = {
19448 XMM6_REG, XMM7_REG, XMM8_REG,
19449 XMM9_REG, XMM10_REG, XMM11_REG,
19450 XMM12_REG, XMM13_REG, XMM14_REG,
19451 XMM15_REG, SI_REG, DI_REG
19454 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19455 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19456 UNSPEC_MS_TO_SYSV_CALL);
19460 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19461 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19464 (SSE_REGNO_P (clobbered_registers[i])
19466 clobbered_registers[i]));
19468 call = gen_rtx_PARALLEL (VOIDmode,
19469 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19473 call = emit_call_insn (call);
19475 CALL_INSN_FUNCTION_USAGE (call) = use;
19479 /* Clear stack slot assignments remembered from previous functions.
19480 This is called from INIT_EXPANDERS once before RTL is emitted for each
19483 static struct machine_function *
19484 ix86_init_machine_status (void)
19486 struct machine_function *f;
19488 f = GGC_CNEW (struct machine_function);
19489 f->use_fast_prologue_epilogue_nregs = -1;
19490 f->tls_descriptor_call_expanded_p = 0;
19491 f->call_abi = ix86_abi;
19496 /* Return a MEM corresponding to a stack slot with mode MODE.
19497 Allocate a new slot if necessary.
19499 The RTL for a function can have several slots available: N is
19500 which slot to use. */
19503 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19505 struct stack_local_entry *s;
19507 gcc_assert (n < MAX_386_STACK_LOCALS);
19509 /* Virtual slot is valid only before vregs are instantiated. */
19510 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19512 for (s = ix86_stack_locals; s; s = s->next)
19513 if (s->mode == mode && s->n == n)
19514 return copy_rtx (s->rtl);
19516 s = (struct stack_local_entry *)
19517 ggc_alloc (sizeof (struct stack_local_entry));
19520 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19522 s->next = ix86_stack_locals;
19523 ix86_stack_locals = s;
19527 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19529 static GTY(()) rtx ix86_tls_symbol;
19531 ix86_tls_get_addr (void)
19534 if (!ix86_tls_symbol)
19536 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19537 (TARGET_ANY_GNU_TLS
19539 ? "___tls_get_addr"
19540 : "__tls_get_addr");
19543 return ix86_tls_symbol;
19546 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19548 static GTY(()) rtx ix86_tls_module_base_symbol;
19550 ix86_tls_module_base (void)
19553 if (!ix86_tls_module_base_symbol)
19555 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19556 "_TLS_MODULE_BASE_");
19557 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19558 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19561 return ix86_tls_module_base_symbol;
19564 /* Calculate the length of the memory address in the instruction
19565 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19568 memory_address_length (rtx addr)
19570 struct ix86_address parts;
19571 rtx base, index, disp;
19575 if (GET_CODE (addr) == PRE_DEC
19576 || GET_CODE (addr) == POST_INC
19577 || GET_CODE (addr) == PRE_MODIFY
19578 || GET_CODE (addr) == POST_MODIFY)
19581 ok = ix86_decompose_address (addr, &parts);
19584 if (parts.base && GET_CODE (parts.base) == SUBREG)
19585 parts.base = SUBREG_REG (parts.base);
19586 if (parts.index && GET_CODE (parts.index) == SUBREG)
19587 parts.index = SUBREG_REG (parts.index);
19590 index = parts.index;
19595 - esp as the base always wants an index,
19596 - ebp as the base always wants a displacement,
19597 - r12 as the base always wants an index,
19598 - r13 as the base always wants a displacement. */
19600 /* Register Indirect. */
19601 if (base && !index && !disp)
19603 /* esp (for its index) and ebp (for its displacement) need
19604 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19607 && (addr == arg_pointer_rtx
19608 || addr == frame_pointer_rtx
19609 || REGNO (addr) == SP_REG
19610 || REGNO (addr) == BP_REG
19611 || REGNO (addr) == R12_REG
19612 || REGNO (addr) == R13_REG))
19616 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
19617 is not disp32, but disp32(%rip), so for disp32
19618 SIB byte is needed, unless print_operand_address
19619 optimizes it into disp32(%rip) or (%rip) is implied
19621 else if (disp && !base && !index)
19628 if (GET_CODE (disp) == CONST)
19629 symbol = XEXP (disp, 0);
19630 if (GET_CODE (symbol) == PLUS
19631 && CONST_INT_P (XEXP (symbol, 1)))
19632 symbol = XEXP (symbol, 0);
19634 if (GET_CODE (symbol) != LABEL_REF
19635 && (GET_CODE (symbol) != SYMBOL_REF
19636 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19637 && (GET_CODE (symbol) != UNSPEC
19638 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19639 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19646 /* Find the length of the displacement constant. */
19649 if (base && satisfies_constraint_K (disp))
19654 /* ebp always wants a displacement. Similarly r13. */
19655 else if (REG_P (base)
19656 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19659 /* An index requires the two-byte modrm form.... */
19661 /* ...like esp (or r12), which always wants an index. */
19662 || base == arg_pointer_rtx
19663 || base == frame_pointer_rtx
19665 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19682 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19683 is set, expect that insn have 8bit immediate alternative. */
19685 ix86_attr_length_immediate_default (rtx insn, int shortform)
19689 extract_insn_cached (insn);
19690 for (i = recog_data.n_operands - 1; i >= 0; --i)
19691 if (CONSTANT_P (recog_data.operand[i]))
19693 enum attr_mode mode = get_attr_mode (insn);
19696 if (shortform && CONST_INT_P (recog_data.operand[i]))
19698 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19705 ival = trunc_int_for_mode (ival, HImode);
19708 ival = trunc_int_for_mode (ival, SImode);
19713 if (IN_RANGE (ival, -128, 127))
19730 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19735 fatal_insn ("unknown insn mode", insn);
19740 /* Compute default value for "length_address" attribute. */
19742 ix86_attr_length_address_default (rtx insn)
19746 if (get_attr_type (insn) == TYPE_LEA)
19748 rtx set = PATTERN (insn), addr;
19750 if (GET_CODE (set) == PARALLEL)
19751 set = XVECEXP (set, 0, 0);
19753 gcc_assert (GET_CODE (set) == SET);
19755 addr = SET_SRC (set);
19756 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19758 if (GET_CODE (addr) == ZERO_EXTEND)
19759 addr = XEXP (addr, 0);
19760 if (GET_CODE (addr) == SUBREG)
19761 addr = SUBREG_REG (addr);
19764 return memory_address_length (addr);
19767 extract_insn_cached (insn);
19768 for (i = recog_data.n_operands - 1; i >= 0; --i)
19769 if (MEM_P (recog_data.operand[i]))
19771 constrain_operands_cached (reload_completed);
19772 if (which_alternative != -1)
19774 const char *constraints = recog_data.constraints[i];
19775 int alt = which_alternative;
19777 while (*constraints == '=' || *constraints == '+')
19780 while (*constraints++ != ',')
19782 /* Skip ignored operands. */
19783 if (*constraints == 'X')
19786 return memory_address_length (XEXP (recog_data.operand[i], 0));
19791 /* Compute default value for "length_vex" attribute. It includes
19792 2 or 3 byte VEX prefix and 1 opcode byte. */
19795 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19800 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19801 byte VEX prefix. */
19802 if (!has_0f_opcode || has_vex_w)
19805 /* We can always use 2 byte VEX prefix in 32bit. */
19809 extract_insn_cached (insn);
19811 for (i = recog_data.n_operands - 1; i >= 0; --i)
19812 if (REG_P (recog_data.operand[i]))
19814 /* REX.W bit uses 3 byte VEX prefix. */
19815 if (GET_MODE (recog_data.operand[i]) == DImode
19816 && GENERAL_REG_P (recog_data.operand[i]))
19821 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19822 if (MEM_P (recog_data.operand[i])
19823 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19830 /* Return the maximum number of instructions a cpu can issue. */
19833 ix86_issue_rate (void)
19837 case PROCESSOR_PENTIUM:
19838 case PROCESSOR_ATOM:
19842 case PROCESSOR_PENTIUMPRO:
19843 case PROCESSOR_PENTIUM4:
19844 case PROCESSOR_ATHLON:
19846 case PROCESSOR_AMDFAM10:
19847 case PROCESSOR_NOCONA:
19848 case PROCESSOR_GENERIC32:
19849 case PROCESSOR_GENERIC64:
19852 case PROCESSOR_CORE2:
19860 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19861 by DEP_INSN and nothing set by DEP_INSN. */
19864 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19868 /* Simplify the test for uninteresting insns. */
19869 if (insn_type != TYPE_SETCC
19870 && insn_type != TYPE_ICMOV
19871 && insn_type != TYPE_FCMOV
19872 && insn_type != TYPE_IBR)
19875 if ((set = single_set (dep_insn)) != 0)
19877 set = SET_DEST (set);
19880 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19881 && XVECLEN (PATTERN (dep_insn), 0) == 2
19882 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19883 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19885 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19886 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19891 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19894 /* This test is true if the dependent insn reads the flags but
19895 not any other potentially set register. */
19896 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19899 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19905 /* Return true iff USE_INSN has a memory address with operands set by
19909 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19912 extract_insn_cached (use_insn);
19913 for (i = recog_data.n_operands - 1; i >= 0; --i)
19914 if (MEM_P (recog_data.operand[i]))
19916 rtx addr = XEXP (recog_data.operand[i], 0);
19917 return modified_in_p (addr, set_insn) != 0;
19923 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19925 enum attr_type insn_type, dep_insn_type;
19926 enum attr_memory memory;
19928 int dep_insn_code_number;
19930 /* Anti and output dependencies have zero cost on all CPUs. */
19931 if (REG_NOTE_KIND (link) != 0)
19934 dep_insn_code_number = recog_memoized (dep_insn);
19936 /* If we can't recognize the insns, we can't really do anything. */
19937 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19940 insn_type = get_attr_type (insn);
19941 dep_insn_type = get_attr_type (dep_insn);
19945 case PROCESSOR_PENTIUM:
19946 /* Address Generation Interlock adds a cycle of latency. */
19947 if (insn_type == TYPE_LEA)
19949 rtx addr = PATTERN (insn);
19951 if (GET_CODE (addr) == PARALLEL)
19952 addr = XVECEXP (addr, 0, 0);
19954 gcc_assert (GET_CODE (addr) == SET);
19956 addr = SET_SRC (addr);
19957 if (modified_in_p (addr, dep_insn))
19960 else if (ix86_agi_dependent (dep_insn, insn))
19963 /* ??? Compares pair with jump/setcc. */
19964 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19967 /* Floating point stores require value to be ready one cycle earlier. */
19968 if (insn_type == TYPE_FMOV
19969 && get_attr_memory (insn) == MEMORY_STORE
19970 && !ix86_agi_dependent (dep_insn, insn))
19974 case PROCESSOR_PENTIUMPRO:
19975 memory = get_attr_memory (insn);
19977 /* INT->FP conversion is expensive. */
19978 if (get_attr_fp_int_src (dep_insn))
19981 /* There is one cycle extra latency between an FP op and a store. */
19982 if (insn_type == TYPE_FMOV
19983 && (set = single_set (dep_insn)) != NULL_RTX
19984 && (set2 = single_set (insn)) != NULL_RTX
19985 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19986 && MEM_P (SET_DEST (set2)))
19989 /* Show ability of reorder buffer to hide latency of load by executing
19990 in parallel with previous instruction in case
19991 previous instruction is not needed to compute the address. */
19992 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19993 && !ix86_agi_dependent (dep_insn, insn))
19995 /* Claim moves to take one cycle, as core can issue one load
19996 at time and the next load can start cycle later. */
19997 if (dep_insn_type == TYPE_IMOV
19998 || dep_insn_type == TYPE_FMOV)
20006 memory = get_attr_memory (insn);
20008 /* The esp dependency is resolved before the instruction is really
20010 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20011 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20014 /* INT->FP conversion is expensive. */
20015 if (get_attr_fp_int_src (dep_insn))
20018 /* Show ability of reorder buffer to hide latency of load by executing
20019 in parallel with previous instruction in case
20020 previous instruction is not needed to compute the address. */
20021 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20022 && !ix86_agi_dependent (dep_insn, insn))
20024 /* Claim moves to take one cycle, as core can issue one load
20025 at time and the next load can start cycle later. */
20026 if (dep_insn_type == TYPE_IMOV
20027 || dep_insn_type == TYPE_FMOV)
20036 case PROCESSOR_ATHLON:
20038 case PROCESSOR_AMDFAM10:
20039 case PROCESSOR_ATOM:
20040 case PROCESSOR_GENERIC32:
20041 case PROCESSOR_GENERIC64:
20042 memory = get_attr_memory (insn);
20044 /* Show ability of reorder buffer to hide latency of load by executing
20045 in parallel with previous instruction in case
20046 previous instruction is not needed to compute the address. */
20047 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20048 && !ix86_agi_dependent (dep_insn, insn))
20050 enum attr_unit unit = get_attr_unit (insn);
20053 /* Because of the difference between the length of integer and
20054 floating unit pipeline preparation stages, the memory operands
20055 for floating point are cheaper.
20057 ??? For Athlon it the difference is most probably 2. */
20058 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
20061 loadcost = TARGET_ATHLON ? 2 : 0;
20063 if (cost >= loadcost)
20076 /* How many alternative schedules to try. This should be as wide as the
20077 scheduling freedom in the DFA, but no wider. Making this value too
20078 large results extra work for the scheduler. */
20081 ia32_multipass_dfa_lookahead (void)
20085 case PROCESSOR_PENTIUM:
20088 case PROCESSOR_PENTIUMPRO:
20098 /* Compute the alignment given to a constant that is being placed in memory.
20099 EXP is the constant and ALIGN is the alignment that the object would
20101 The value of this function is used instead of that alignment to align
20105 ix86_constant_alignment (tree exp, int align)
20107 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20108 || TREE_CODE (exp) == INTEGER_CST)
20110 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20112 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20115 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20116 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20117 return BITS_PER_WORD;
20122 /* Compute the alignment for a static variable.
20123 TYPE is the data type, and ALIGN is the alignment that
20124 the object would ordinarily have. The value of this function is used
20125 instead of that alignment to align the object. */
20128 ix86_data_alignment (tree type, int align)
20130 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20132 if (AGGREGATE_TYPE_P (type)
20133 && TYPE_SIZE (type)
20134 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20135 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20136 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20137 && align < max_align)
20140 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20141 to 16byte boundary. */
20144 if (AGGREGATE_TYPE_P (type)
20145 && TYPE_SIZE (type)
20146 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20147 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20148 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20152 if (TREE_CODE (type) == ARRAY_TYPE)
20154 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20156 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20159 else if (TREE_CODE (type) == COMPLEX_TYPE)
20162 if (TYPE_MODE (type) == DCmode && align < 64)
20164 if ((TYPE_MODE (type) == XCmode
20165 || TYPE_MODE (type) == TCmode) && align < 128)
20168 else if ((TREE_CODE (type) == RECORD_TYPE
20169 || TREE_CODE (type) == UNION_TYPE
20170 || TREE_CODE (type) == QUAL_UNION_TYPE)
20171 && TYPE_FIELDS (type))
20173 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20175 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20178 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20179 || TREE_CODE (type) == INTEGER_TYPE)
20181 if (TYPE_MODE (type) == DFmode && align < 64)
20183 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20190 /* Compute the alignment for a local variable or a stack slot. EXP is
20191 the data type or decl itself, MODE is the widest mode available and
20192 ALIGN is the alignment that the object would ordinarily have. The
20193 value of this macro is used instead of that alignment to align the
20197 ix86_local_alignment (tree exp, enum machine_mode mode,
20198 unsigned int align)
20202 if (exp && DECL_P (exp))
20204 type = TREE_TYPE (exp);
20213 /* Don't do dynamic stack realignment for long long objects with
20214 -mpreferred-stack-boundary=2. */
20217 && ix86_preferred_stack_boundary < 64
20218 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20219 && (!type || !TYPE_USER_ALIGN (type))
20220 && (!decl || !DECL_USER_ALIGN (decl)))
20223 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20224 register in MODE. We will return the largest alignment of XF
20228 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20229 align = GET_MODE_ALIGNMENT (DFmode);
20233 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20234 to 16byte boundary. */
20237 if (AGGREGATE_TYPE_P (type)
20238 && TYPE_SIZE (type)
20239 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20240 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20241 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20244 if (TREE_CODE (type) == ARRAY_TYPE)
20246 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20248 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20251 else if (TREE_CODE (type) == COMPLEX_TYPE)
20253 if (TYPE_MODE (type) == DCmode && align < 64)
20255 if ((TYPE_MODE (type) == XCmode
20256 || TYPE_MODE (type) == TCmode) && align < 128)
20259 else if ((TREE_CODE (type) == RECORD_TYPE
20260 || TREE_CODE (type) == UNION_TYPE
20261 || TREE_CODE (type) == QUAL_UNION_TYPE)
20262 && TYPE_FIELDS (type))
20264 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20266 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20269 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20270 || TREE_CODE (type) == INTEGER_TYPE)
20273 if (TYPE_MODE (type) == DFmode && align < 64)
20275 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20281 /* Emit RTL insns to initialize the variable parts of a trampoline.
20282 FNADDR is an RTX for the address of the function's pure code.
20283 CXT is an RTX for the static chain value for the function. */
20285 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
20289 /* Compute offset from the end of the jmp to the target function. */
20290 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
20291 plus_constant (tramp, 10),
20292 NULL_RTX, 1, OPTAB_DIRECT);
20293 emit_move_insn (gen_rtx_MEM (QImode, tramp),
20294 gen_int_mode (0xb9, QImode));
20295 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
20296 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
20297 gen_int_mode (0xe9, QImode));
20298 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
20303 /* Try to load address using shorter movl instead of movabs.
20304 We may want to support movq for kernel mode, but kernel does not use
20305 trampolines at the moment. */
20306 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20308 fnaddr = copy_to_mode_reg (DImode, fnaddr);
20309 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20310 gen_int_mode (0xbb41, HImode));
20311 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
20312 gen_lowpart (SImode, fnaddr));
20317 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20318 gen_int_mode (0xbb49, HImode));
20319 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20323 /* Load static chain using movabs to r10. */
20324 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20325 gen_int_mode (0xba49, HImode));
20326 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20329 /* Jump to the r11 */
20330 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20331 gen_int_mode (0xff49, HImode));
20332 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
20333 gen_int_mode (0xe3, QImode));
20335 gcc_assert (offset <= TRAMPOLINE_SIZE);
20338 #ifdef ENABLE_EXECUTE_STACK
20339 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20340 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
20344 /* Codes for all the SSE/MMX builtins. */
20347 IX86_BUILTIN_ADDPS,
20348 IX86_BUILTIN_ADDSS,
20349 IX86_BUILTIN_DIVPS,
20350 IX86_BUILTIN_DIVSS,
20351 IX86_BUILTIN_MULPS,
20352 IX86_BUILTIN_MULSS,
20353 IX86_BUILTIN_SUBPS,
20354 IX86_BUILTIN_SUBSS,
20356 IX86_BUILTIN_CMPEQPS,
20357 IX86_BUILTIN_CMPLTPS,
20358 IX86_BUILTIN_CMPLEPS,
20359 IX86_BUILTIN_CMPGTPS,
20360 IX86_BUILTIN_CMPGEPS,
20361 IX86_BUILTIN_CMPNEQPS,
20362 IX86_BUILTIN_CMPNLTPS,
20363 IX86_BUILTIN_CMPNLEPS,
20364 IX86_BUILTIN_CMPNGTPS,
20365 IX86_BUILTIN_CMPNGEPS,
20366 IX86_BUILTIN_CMPORDPS,
20367 IX86_BUILTIN_CMPUNORDPS,
20368 IX86_BUILTIN_CMPEQSS,
20369 IX86_BUILTIN_CMPLTSS,
20370 IX86_BUILTIN_CMPLESS,
20371 IX86_BUILTIN_CMPNEQSS,
20372 IX86_BUILTIN_CMPNLTSS,
20373 IX86_BUILTIN_CMPNLESS,
20374 IX86_BUILTIN_CMPNGTSS,
20375 IX86_BUILTIN_CMPNGESS,
20376 IX86_BUILTIN_CMPORDSS,
20377 IX86_BUILTIN_CMPUNORDSS,
20379 IX86_BUILTIN_COMIEQSS,
20380 IX86_BUILTIN_COMILTSS,
20381 IX86_BUILTIN_COMILESS,
20382 IX86_BUILTIN_COMIGTSS,
20383 IX86_BUILTIN_COMIGESS,
20384 IX86_BUILTIN_COMINEQSS,
20385 IX86_BUILTIN_UCOMIEQSS,
20386 IX86_BUILTIN_UCOMILTSS,
20387 IX86_BUILTIN_UCOMILESS,
20388 IX86_BUILTIN_UCOMIGTSS,
20389 IX86_BUILTIN_UCOMIGESS,
20390 IX86_BUILTIN_UCOMINEQSS,
20392 IX86_BUILTIN_CVTPI2PS,
20393 IX86_BUILTIN_CVTPS2PI,
20394 IX86_BUILTIN_CVTSI2SS,
20395 IX86_BUILTIN_CVTSI642SS,
20396 IX86_BUILTIN_CVTSS2SI,
20397 IX86_BUILTIN_CVTSS2SI64,
20398 IX86_BUILTIN_CVTTPS2PI,
20399 IX86_BUILTIN_CVTTSS2SI,
20400 IX86_BUILTIN_CVTTSS2SI64,
20402 IX86_BUILTIN_MAXPS,
20403 IX86_BUILTIN_MAXSS,
20404 IX86_BUILTIN_MINPS,
20405 IX86_BUILTIN_MINSS,
20407 IX86_BUILTIN_LOADUPS,
20408 IX86_BUILTIN_STOREUPS,
20409 IX86_BUILTIN_MOVSS,
20411 IX86_BUILTIN_MOVHLPS,
20412 IX86_BUILTIN_MOVLHPS,
20413 IX86_BUILTIN_LOADHPS,
20414 IX86_BUILTIN_LOADLPS,
20415 IX86_BUILTIN_STOREHPS,
20416 IX86_BUILTIN_STORELPS,
20418 IX86_BUILTIN_MASKMOVQ,
20419 IX86_BUILTIN_MOVMSKPS,
20420 IX86_BUILTIN_PMOVMSKB,
20422 IX86_BUILTIN_MOVNTPS,
20423 IX86_BUILTIN_MOVNTQ,
20425 IX86_BUILTIN_LOADDQU,
20426 IX86_BUILTIN_STOREDQU,
20428 IX86_BUILTIN_PACKSSWB,
20429 IX86_BUILTIN_PACKSSDW,
20430 IX86_BUILTIN_PACKUSWB,
20432 IX86_BUILTIN_PADDB,
20433 IX86_BUILTIN_PADDW,
20434 IX86_BUILTIN_PADDD,
20435 IX86_BUILTIN_PADDQ,
20436 IX86_BUILTIN_PADDSB,
20437 IX86_BUILTIN_PADDSW,
20438 IX86_BUILTIN_PADDUSB,
20439 IX86_BUILTIN_PADDUSW,
20440 IX86_BUILTIN_PSUBB,
20441 IX86_BUILTIN_PSUBW,
20442 IX86_BUILTIN_PSUBD,
20443 IX86_BUILTIN_PSUBQ,
20444 IX86_BUILTIN_PSUBSB,
20445 IX86_BUILTIN_PSUBSW,
20446 IX86_BUILTIN_PSUBUSB,
20447 IX86_BUILTIN_PSUBUSW,
20450 IX86_BUILTIN_PANDN,
20454 IX86_BUILTIN_PAVGB,
20455 IX86_BUILTIN_PAVGW,
20457 IX86_BUILTIN_PCMPEQB,
20458 IX86_BUILTIN_PCMPEQW,
20459 IX86_BUILTIN_PCMPEQD,
20460 IX86_BUILTIN_PCMPGTB,
20461 IX86_BUILTIN_PCMPGTW,
20462 IX86_BUILTIN_PCMPGTD,
20464 IX86_BUILTIN_PMADDWD,
20466 IX86_BUILTIN_PMAXSW,
20467 IX86_BUILTIN_PMAXUB,
20468 IX86_BUILTIN_PMINSW,
20469 IX86_BUILTIN_PMINUB,
20471 IX86_BUILTIN_PMULHUW,
20472 IX86_BUILTIN_PMULHW,
20473 IX86_BUILTIN_PMULLW,
20475 IX86_BUILTIN_PSADBW,
20476 IX86_BUILTIN_PSHUFW,
20478 IX86_BUILTIN_PSLLW,
20479 IX86_BUILTIN_PSLLD,
20480 IX86_BUILTIN_PSLLQ,
20481 IX86_BUILTIN_PSRAW,
20482 IX86_BUILTIN_PSRAD,
20483 IX86_BUILTIN_PSRLW,
20484 IX86_BUILTIN_PSRLD,
20485 IX86_BUILTIN_PSRLQ,
20486 IX86_BUILTIN_PSLLWI,
20487 IX86_BUILTIN_PSLLDI,
20488 IX86_BUILTIN_PSLLQI,
20489 IX86_BUILTIN_PSRAWI,
20490 IX86_BUILTIN_PSRADI,
20491 IX86_BUILTIN_PSRLWI,
20492 IX86_BUILTIN_PSRLDI,
20493 IX86_BUILTIN_PSRLQI,
20495 IX86_BUILTIN_PUNPCKHBW,
20496 IX86_BUILTIN_PUNPCKHWD,
20497 IX86_BUILTIN_PUNPCKHDQ,
20498 IX86_BUILTIN_PUNPCKLBW,
20499 IX86_BUILTIN_PUNPCKLWD,
20500 IX86_BUILTIN_PUNPCKLDQ,
20502 IX86_BUILTIN_SHUFPS,
20504 IX86_BUILTIN_RCPPS,
20505 IX86_BUILTIN_RCPSS,
20506 IX86_BUILTIN_RSQRTPS,
20507 IX86_BUILTIN_RSQRTPS_NR,
20508 IX86_BUILTIN_RSQRTSS,
20509 IX86_BUILTIN_RSQRTF,
20510 IX86_BUILTIN_SQRTPS,
20511 IX86_BUILTIN_SQRTPS_NR,
20512 IX86_BUILTIN_SQRTSS,
20514 IX86_BUILTIN_UNPCKHPS,
20515 IX86_BUILTIN_UNPCKLPS,
20517 IX86_BUILTIN_ANDPS,
20518 IX86_BUILTIN_ANDNPS,
20520 IX86_BUILTIN_XORPS,
20523 IX86_BUILTIN_LDMXCSR,
20524 IX86_BUILTIN_STMXCSR,
20525 IX86_BUILTIN_SFENCE,
20527 /* 3DNow! Original */
20528 IX86_BUILTIN_FEMMS,
20529 IX86_BUILTIN_PAVGUSB,
20530 IX86_BUILTIN_PF2ID,
20531 IX86_BUILTIN_PFACC,
20532 IX86_BUILTIN_PFADD,
20533 IX86_BUILTIN_PFCMPEQ,
20534 IX86_BUILTIN_PFCMPGE,
20535 IX86_BUILTIN_PFCMPGT,
20536 IX86_BUILTIN_PFMAX,
20537 IX86_BUILTIN_PFMIN,
20538 IX86_BUILTIN_PFMUL,
20539 IX86_BUILTIN_PFRCP,
20540 IX86_BUILTIN_PFRCPIT1,
20541 IX86_BUILTIN_PFRCPIT2,
20542 IX86_BUILTIN_PFRSQIT1,
20543 IX86_BUILTIN_PFRSQRT,
20544 IX86_BUILTIN_PFSUB,
20545 IX86_BUILTIN_PFSUBR,
20546 IX86_BUILTIN_PI2FD,
20547 IX86_BUILTIN_PMULHRW,
20549 /* 3DNow! Athlon Extensions */
20550 IX86_BUILTIN_PF2IW,
20551 IX86_BUILTIN_PFNACC,
20552 IX86_BUILTIN_PFPNACC,
20553 IX86_BUILTIN_PI2FW,
20554 IX86_BUILTIN_PSWAPDSI,
20555 IX86_BUILTIN_PSWAPDSF,
20558 IX86_BUILTIN_ADDPD,
20559 IX86_BUILTIN_ADDSD,
20560 IX86_BUILTIN_DIVPD,
20561 IX86_BUILTIN_DIVSD,
20562 IX86_BUILTIN_MULPD,
20563 IX86_BUILTIN_MULSD,
20564 IX86_BUILTIN_SUBPD,
20565 IX86_BUILTIN_SUBSD,
20567 IX86_BUILTIN_CMPEQPD,
20568 IX86_BUILTIN_CMPLTPD,
20569 IX86_BUILTIN_CMPLEPD,
20570 IX86_BUILTIN_CMPGTPD,
20571 IX86_BUILTIN_CMPGEPD,
20572 IX86_BUILTIN_CMPNEQPD,
20573 IX86_BUILTIN_CMPNLTPD,
20574 IX86_BUILTIN_CMPNLEPD,
20575 IX86_BUILTIN_CMPNGTPD,
20576 IX86_BUILTIN_CMPNGEPD,
20577 IX86_BUILTIN_CMPORDPD,
20578 IX86_BUILTIN_CMPUNORDPD,
20579 IX86_BUILTIN_CMPEQSD,
20580 IX86_BUILTIN_CMPLTSD,
20581 IX86_BUILTIN_CMPLESD,
20582 IX86_BUILTIN_CMPNEQSD,
20583 IX86_BUILTIN_CMPNLTSD,
20584 IX86_BUILTIN_CMPNLESD,
20585 IX86_BUILTIN_CMPORDSD,
20586 IX86_BUILTIN_CMPUNORDSD,
20588 IX86_BUILTIN_COMIEQSD,
20589 IX86_BUILTIN_COMILTSD,
20590 IX86_BUILTIN_COMILESD,
20591 IX86_BUILTIN_COMIGTSD,
20592 IX86_BUILTIN_COMIGESD,
20593 IX86_BUILTIN_COMINEQSD,
20594 IX86_BUILTIN_UCOMIEQSD,
20595 IX86_BUILTIN_UCOMILTSD,
20596 IX86_BUILTIN_UCOMILESD,
20597 IX86_BUILTIN_UCOMIGTSD,
20598 IX86_BUILTIN_UCOMIGESD,
20599 IX86_BUILTIN_UCOMINEQSD,
20601 IX86_BUILTIN_MAXPD,
20602 IX86_BUILTIN_MAXSD,
20603 IX86_BUILTIN_MINPD,
20604 IX86_BUILTIN_MINSD,
20606 IX86_BUILTIN_ANDPD,
20607 IX86_BUILTIN_ANDNPD,
20609 IX86_BUILTIN_XORPD,
20611 IX86_BUILTIN_SQRTPD,
20612 IX86_BUILTIN_SQRTSD,
20614 IX86_BUILTIN_UNPCKHPD,
20615 IX86_BUILTIN_UNPCKLPD,
20617 IX86_BUILTIN_SHUFPD,
20619 IX86_BUILTIN_LOADUPD,
20620 IX86_BUILTIN_STOREUPD,
20621 IX86_BUILTIN_MOVSD,
20623 IX86_BUILTIN_LOADHPD,
20624 IX86_BUILTIN_LOADLPD,
20626 IX86_BUILTIN_CVTDQ2PD,
20627 IX86_BUILTIN_CVTDQ2PS,
20629 IX86_BUILTIN_CVTPD2DQ,
20630 IX86_BUILTIN_CVTPD2PI,
20631 IX86_BUILTIN_CVTPD2PS,
20632 IX86_BUILTIN_CVTTPD2DQ,
20633 IX86_BUILTIN_CVTTPD2PI,
20635 IX86_BUILTIN_CVTPI2PD,
20636 IX86_BUILTIN_CVTSI2SD,
20637 IX86_BUILTIN_CVTSI642SD,
20639 IX86_BUILTIN_CVTSD2SI,
20640 IX86_BUILTIN_CVTSD2SI64,
20641 IX86_BUILTIN_CVTSD2SS,
20642 IX86_BUILTIN_CVTSS2SD,
20643 IX86_BUILTIN_CVTTSD2SI,
20644 IX86_BUILTIN_CVTTSD2SI64,
20646 IX86_BUILTIN_CVTPS2DQ,
20647 IX86_BUILTIN_CVTPS2PD,
20648 IX86_BUILTIN_CVTTPS2DQ,
20650 IX86_BUILTIN_MOVNTI,
20651 IX86_BUILTIN_MOVNTPD,
20652 IX86_BUILTIN_MOVNTDQ,
20654 IX86_BUILTIN_MOVQ128,
20657 IX86_BUILTIN_MASKMOVDQU,
20658 IX86_BUILTIN_MOVMSKPD,
20659 IX86_BUILTIN_PMOVMSKB128,
20661 IX86_BUILTIN_PACKSSWB128,
20662 IX86_BUILTIN_PACKSSDW128,
20663 IX86_BUILTIN_PACKUSWB128,
20665 IX86_BUILTIN_PADDB128,
20666 IX86_BUILTIN_PADDW128,
20667 IX86_BUILTIN_PADDD128,
20668 IX86_BUILTIN_PADDQ128,
20669 IX86_BUILTIN_PADDSB128,
20670 IX86_BUILTIN_PADDSW128,
20671 IX86_BUILTIN_PADDUSB128,
20672 IX86_BUILTIN_PADDUSW128,
20673 IX86_BUILTIN_PSUBB128,
20674 IX86_BUILTIN_PSUBW128,
20675 IX86_BUILTIN_PSUBD128,
20676 IX86_BUILTIN_PSUBQ128,
20677 IX86_BUILTIN_PSUBSB128,
20678 IX86_BUILTIN_PSUBSW128,
20679 IX86_BUILTIN_PSUBUSB128,
20680 IX86_BUILTIN_PSUBUSW128,
20682 IX86_BUILTIN_PAND128,
20683 IX86_BUILTIN_PANDN128,
20684 IX86_BUILTIN_POR128,
20685 IX86_BUILTIN_PXOR128,
20687 IX86_BUILTIN_PAVGB128,
20688 IX86_BUILTIN_PAVGW128,
20690 IX86_BUILTIN_PCMPEQB128,
20691 IX86_BUILTIN_PCMPEQW128,
20692 IX86_BUILTIN_PCMPEQD128,
20693 IX86_BUILTIN_PCMPGTB128,
20694 IX86_BUILTIN_PCMPGTW128,
20695 IX86_BUILTIN_PCMPGTD128,
20697 IX86_BUILTIN_PMADDWD128,
20699 IX86_BUILTIN_PMAXSW128,
20700 IX86_BUILTIN_PMAXUB128,
20701 IX86_BUILTIN_PMINSW128,
20702 IX86_BUILTIN_PMINUB128,
20704 IX86_BUILTIN_PMULUDQ,
20705 IX86_BUILTIN_PMULUDQ128,
20706 IX86_BUILTIN_PMULHUW128,
20707 IX86_BUILTIN_PMULHW128,
20708 IX86_BUILTIN_PMULLW128,
20710 IX86_BUILTIN_PSADBW128,
20711 IX86_BUILTIN_PSHUFHW,
20712 IX86_BUILTIN_PSHUFLW,
20713 IX86_BUILTIN_PSHUFD,
20715 IX86_BUILTIN_PSLLDQI128,
20716 IX86_BUILTIN_PSLLWI128,
20717 IX86_BUILTIN_PSLLDI128,
20718 IX86_BUILTIN_PSLLQI128,
20719 IX86_BUILTIN_PSRAWI128,
20720 IX86_BUILTIN_PSRADI128,
20721 IX86_BUILTIN_PSRLDQI128,
20722 IX86_BUILTIN_PSRLWI128,
20723 IX86_BUILTIN_PSRLDI128,
20724 IX86_BUILTIN_PSRLQI128,
20726 IX86_BUILTIN_PSLLDQ128,
20727 IX86_BUILTIN_PSLLW128,
20728 IX86_BUILTIN_PSLLD128,
20729 IX86_BUILTIN_PSLLQ128,
20730 IX86_BUILTIN_PSRAW128,
20731 IX86_BUILTIN_PSRAD128,
20732 IX86_BUILTIN_PSRLW128,
20733 IX86_BUILTIN_PSRLD128,
20734 IX86_BUILTIN_PSRLQ128,
20736 IX86_BUILTIN_PUNPCKHBW128,
20737 IX86_BUILTIN_PUNPCKHWD128,
20738 IX86_BUILTIN_PUNPCKHDQ128,
20739 IX86_BUILTIN_PUNPCKHQDQ128,
20740 IX86_BUILTIN_PUNPCKLBW128,
20741 IX86_BUILTIN_PUNPCKLWD128,
20742 IX86_BUILTIN_PUNPCKLDQ128,
20743 IX86_BUILTIN_PUNPCKLQDQ128,
20745 IX86_BUILTIN_CLFLUSH,
20746 IX86_BUILTIN_MFENCE,
20747 IX86_BUILTIN_LFENCE,
20750 IX86_BUILTIN_ADDSUBPS,
20751 IX86_BUILTIN_HADDPS,
20752 IX86_BUILTIN_HSUBPS,
20753 IX86_BUILTIN_MOVSHDUP,
20754 IX86_BUILTIN_MOVSLDUP,
20755 IX86_BUILTIN_ADDSUBPD,
20756 IX86_BUILTIN_HADDPD,
20757 IX86_BUILTIN_HSUBPD,
20758 IX86_BUILTIN_LDDQU,
20760 IX86_BUILTIN_MONITOR,
20761 IX86_BUILTIN_MWAIT,
20764 IX86_BUILTIN_PHADDW,
20765 IX86_BUILTIN_PHADDD,
20766 IX86_BUILTIN_PHADDSW,
20767 IX86_BUILTIN_PHSUBW,
20768 IX86_BUILTIN_PHSUBD,
20769 IX86_BUILTIN_PHSUBSW,
20770 IX86_BUILTIN_PMADDUBSW,
20771 IX86_BUILTIN_PMULHRSW,
20772 IX86_BUILTIN_PSHUFB,
20773 IX86_BUILTIN_PSIGNB,
20774 IX86_BUILTIN_PSIGNW,
20775 IX86_BUILTIN_PSIGND,
20776 IX86_BUILTIN_PALIGNR,
20777 IX86_BUILTIN_PABSB,
20778 IX86_BUILTIN_PABSW,
20779 IX86_BUILTIN_PABSD,
20781 IX86_BUILTIN_PHADDW128,
20782 IX86_BUILTIN_PHADDD128,
20783 IX86_BUILTIN_PHADDSW128,
20784 IX86_BUILTIN_PHSUBW128,
20785 IX86_BUILTIN_PHSUBD128,
20786 IX86_BUILTIN_PHSUBSW128,
20787 IX86_BUILTIN_PMADDUBSW128,
20788 IX86_BUILTIN_PMULHRSW128,
20789 IX86_BUILTIN_PSHUFB128,
20790 IX86_BUILTIN_PSIGNB128,
20791 IX86_BUILTIN_PSIGNW128,
20792 IX86_BUILTIN_PSIGND128,
20793 IX86_BUILTIN_PALIGNR128,
20794 IX86_BUILTIN_PABSB128,
20795 IX86_BUILTIN_PABSW128,
20796 IX86_BUILTIN_PABSD128,
20798 /* AMDFAM10 - SSE4A New Instructions. */
20799 IX86_BUILTIN_MOVNTSD,
20800 IX86_BUILTIN_MOVNTSS,
20801 IX86_BUILTIN_EXTRQI,
20802 IX86_BUILTIN_EXTRQ,
20803 IX86_BUILTIN_INSERTQI,
20804 IX86_BUILTIN_INSERTQ,
20807 IX86_BUILTIN_BLENDPD,
20808 IX86_BUILTIN_BLENDPS,
20809 IX86_BUILTIN_BLENDVPD,
20810 IX86_BUILTIN_BLENDVPS,
20811 IX86_BUILTIN_PBLENDVB128,
20812 IX86_BUILTIN_PBLENDW128,
20817 IX86_BUILTIN_INSERTPS128,
20819 IX86_BUILTIN_MOVNTDQA,
20820 IX86_BUILTIN_MPSADBW128,
20821 IX86_BUILTIN_PACKUSDW128,
20822 IX86_BUILTIN_PCMPEQQ,
20823 IX86_BUILTIN_PHMINPOSUW128,
20825 IX86_BUILTIN_PMAXSB128,
20826 IX86_BUILTIN_PMAXSD128,
20827 IX86_BUILTIN_PMAXUD128,
20828 IX86_BUILTIN_PMAXUW128,
20830 IX86_BUILTIN_PMINSB128,
20831 IX86_BUILTIN_PMINSD128,
20832 IX86_BUILTIN_PMINUD128,
20833 IX86_BUILTIN_PMINUW128,
20835 IX86_BUILTIN_PMOVSXBW128,
20836 IX86_BUILTIN_PMOVSXBD128,
20837 IX86_BUILTIN_PMOVSXBQ128,
20838 IX86_BUILTIN_PMOVSXWD128,
20839 IX86_BUILTIN_PMOVSXWQ128,
20840 IX86_BUILTIN_PMOVSXDQ128,
20842 IX86_BUILTIN_PMOVZXBW128,
20843 IX86_BUILTIN_PMOVZXBD128,
20844 IX86_BUILTIN_PMOVZXBQ128,
20845 IX86_BUILTIN_PMOVZXWD128,
20846 IX86_BUILTIN_PMOVZXWQ128,
20847 IX86_BUILTIN_PMOVZXDQ128,
20849 IX86_BUILTIN_PMULDQ128,
20850 IX86_BUILTIN_PMULLD128,
20852 IX86_BUILTIN_ROUNDPD,
20853 IX86_BUILTIN_ROUNDPS,
20854 IX86_BUILTIN_ROUNDSD,
20855 IX86_BUILTIN_ROUNDSS,
20857 IX86_BUILTIN_PTESTZ,
20858 IX86_BUILTIN_PTESTC,
20859 IX86_BUILTIN_PTESTNZC,
20861 IX86_BUILTIN_VEC_INIT_V2SI,
20862 IX86_BUILTIN_VEC_INIT_V4HI,
20863 IX86_BUILTIN_VEC_INIT_V8QI,
20864 IX86_BUILTIN_VEC_EXT_V2DF,
20865 IX86_BUILTIN_VEC_EXT_V2DI,
20866 IX86_BUILTIN_VEC_EXT_V4SF,
20867 IX86_BUILTIN_VEC_EXT_V4SI,
20868 IX86_BUILTIN_VEC_EXT_V8HI,
20869 IX86_BUILTIN_VEC_EXT_V2SI,
20870 IX86_BUILTIN_VEC_EXT_V4HI,
20871 IX86_BUILTIN_VEC_EXT_V16QI,
20872 IX86_BUILTIN_VEC_SET_V2DI,
20873 IX86_BUILTIN_VEC_SET_V4SF,
20874 IX86_BUILTIN_VEC_SET_V4SI,
20875 IX86_BUILTIN_VEC_SET_V8HI,
20876 IX86_BUILTIN_VEC_SET_V4HI,
20877 IX86_BUILTIN_VEC_SET_V16QI,
20879 IX86_BUILTIN_VEC_PACK_SFIX,
20882 IX86_BUILTIN_CRC32QI,
20883 IX86_BUILTIN_CRC32HI,
20884 IX86_BUILTIN_CRC32SI,
20885 IX86_BUILTIN_CRC32DI,
20887 IX86_BUILTIN_PCMPESTRI128,
20888 IX86_BUILTIN_PCMPESTRM128,
20889 IX86_BUILTIN_PCMPESTRA128,
20890 IX86_BUILTIN_PCMPESTRC128,
20891 IX86_BUILTIN_PCMPESTRO128,
20892 IX86_BUILTIN_PCMPESTRS128,
20893 IX86_BUILTIN_PCMPESTRZ128,
20894 IX86_BUILTIN_PCMPISTRI128,
20895 IX86_BUILTIN_PCMPISTRM128,
20896 IX86_BUILTIN_PCMPISTRA128,
20897 IX86_BUILTIN_PCMPISTRC128,
20898 IX86_BUILTIN_PCMPISTRO128,
20899 IX86_BUILTIN_PCMPISTRS128,
20900 IX86_BUILTIN_PCMPISTRZ128,
20902 IX86_BUILTIN_PCMPGTQ,
20904 /* AES instructions */
20905 IX86_BUILTIN_AESENC128,
20906 IX86_BUILTIN_AESENCLAST128,
20907 IX86_BUILTIN_AESDEC128,
20908 IX86_BUILTIN_AESDECLAST128,
20909 IX86_BUILTIN_AESIMC128,
20910 IX86_BUILTIN_AESKEYGENASSIST128,
20912 /* PCLMUL instruction */
20913 IX86_BUILTIN_PCLMULQDQ128,
20916 IX86_BUILTIN_ADDPD256,
20917 IX86_BUILTIN_ADDPS256,
20918 IX86_BUILTIN_ADDSUBPD256,
20919 IX86_BUILTIN_ADDSUBPS256,
20920 IX86_BUILTIN_ANDPD256,
20921 IX86_BUILTIN_ANDPS256,
20922 IX86_BUILTIN_ANDNPD256,
20923 IX86_BUILTIN_ANDNPS256,
20924 IX86_BUILTIN_BLENDPD256,
20925 IX86_BUILTIN_BLENDPS256,
20926 IX86_BUILTIN_BLENDVPD256,
20927 IX86_BUILTIN_BLENDVPS256,
20928 IX86_BUILTIN_DIVPD256,
20929 IX86_BUILTIN_DIVPS256,
20930 IX86_BUILTIN_DPPS256,
20931 IX86_BUILTIN_HADDPD256,
20932 IX86_BUILTIN_HADDPS256,
20933 IX86_BUILTIN_HSUBPD256,
20934 IX86_BUILTIN_HSUBPS256,
20935 IX86_BUILTIN_MAXPD256,
20936 IX86_BUILTIN_MAXPS256,
20937 IX86_BUILTIN_MINPD256,
20938 IX86_BUILTIN_MINPS256,
20939 IX86_BUILTIN_MULPD256,
20940 IX86_BUILTIN_MULPS256,
20941 IX86_BUILTIN_ORPD256,
20942 IX86_BUILTIN_ORPS256,
20943 IX86_BUILTIN_SHUFPD256,
20944 IX86_BUILTIN_SHUFPS256,
20945 IX86_BUILTIN_SUBPD256,
20946 IX86_BUILTIN_SUBPS256,
20947 IX86_BUILTIN_XORPD256,
20948 IX86_BUILTIN_XORPS256,
20949 IX86_BUILTIN_CMPSD,
20950 IX86_BUILTIN_CMPSS,
20951 IX86_BUILTIN_CMPPD,
20952 IX86_BUILTIN_CMPPS,
20953 IX86_BUILTIN_CMPPD256,
20954 IX86_BUILTIN_CMPPS256,
20955 IX86_BUILTIN_CVTDQ2PD256,
20956 IX86_BUILTIN_CVTDQ2PS256,
20957 IX86_BUILTIN_CVTPD2PS256,
20958 IX86_BUILTIN_CVTPS2DQ256,
20959 IX86_BUILTIN_CVTPS2PD256,
20960 IX86_BUILTIN_CVTTPD2DQ256,
20961 IX86_BUILTIN_CVTPD2DQ256,
20962 IX86_BUILTIN_CVTTPS2DQ256,
20963 IX86_BUILTIN_EXTRACTF128PD256,
20964 IX86_BUILTIN_EXTRACTF128PS256,
20965 IX86_BUILTIN_EXTRACTF128SI256,
20966 IX86_BUILTIN_VZEROALL,
20967 IX86_BUILTIN_VZEROUPPER,
20968 IX86_BUILTIN_VZEROUPPER_REX64,
20969 IX86_BUILTIN_VPERMILVARPD,
20970 IX86_BUILTIN_VPERMILVARPS,
20971 IX86_BUILTIN_VPERMILVARPD256,
20972 IX86_BUILTIN_VPERMILVARPS256,
20973 IX86_BUILTIN_VPERMILPD,
20974 IX86_BUILTIN_VPERMILPS,
20975 IX86_BUILTIN_VPERMILPD256,
20976 IX86_BUILTIN_VPERMILPS256,
20977 IX86_BUILTIN_VPERM2F128PD256,
20978 IX86_BUILTIN_VPERM2F128PS256,
20979 IX86_BUILTIN_VPERM2F128SI256,
20980 IX86_BUILTIN_VBROADCASTSS,
20981 IX86_BUILTIN_VBROADCASTSD256,
20982 IX86_BUILTIN_VBROADCASTSS256,
20983 IX86_BUILTIN_VBROADCASTPD256,
20984 IX86_BUILTIN_VBROADCASTPS256,
20985 IX86_BUILTIN_VINSERTF128PD256,
20986 IX86_BUILTIN_VINSERTF128PS256,
20987 IX86_BUILTIN_VINSERTF128SI256,
20988 IX86_BUILTIN_LOADUPD256,
20989 IX86_BUILTIN_LOADUPS256,
20990 IX86_BUILTIN_STOREUPD256,
20991 IX86_BUILTIN_STOREUPS256,
20992 IX86_BUILTIN_LDDQU256,
20993 IX86_BUILTIN_MOVNTDQ256,
20994 IX86_BUILTIN_MOVNTPD256,
20995 IX86_BUILTIN_MOVNTPS256,
20996 IX86_BUILTIN_LOADDQU256,
20997 IX86_BUILTIN_STOREDQU256,
20998 IX86_BUILTIN_MASKLOADPD,
20999 IX86_BUILTIN_MASKLOADPS,
21000 IX86_BUILTIN_MASKSTOREPD,
21001 IX86_BUILTIN_MASKSTOREPS,
21002 IX86_BUILTIN_MASKLOADPD256,
21003 IX86_BUILTIN_MASKLOADPS256,
21004 IX86_BUILTIN_MASKSTOREPD256,
21005 IX86_BUILTIN_MASKSTOREPS256,
21006 IX86_BUILTIN_MOVSHDUP256,
21007 IX86_BUILTIN_MOVSLDUP256,
21008 IX86_BUILTIN_MOVDDUP256,
21010 IX86_BUILTIN_SQRTPD256,
21011 IX86_BUILTIN_SQRTPS256,
21012 IX86_BUILTIN_SQRTPS_NR256,
21013 IX86_BUILTIN_RSQRTPS256,
21014 IX86_BUILTIN_RSQRTPS_NR256,
21016 IX86_BUILTIN_RCPPS256,
21018 IX86_BUILTIN_ROUNDPD256,
21019 IX86_BUILTIN_ROUNDPS256,
21021 IX86_BUILTIN_UNPCKHPD256,
21022 IX86_BUILTIN_UNPCKLPD256,
21023 IX86_BUILTIN_UNPCKHPS256,
21024 IX86_BUILTIN_UNPCKLPS256,
21026 IX86_BUILTIN_SI256_SI,
21027 IX86_BUILTIN_PS256_PS,
21028 IX86_BUILTIN_PD256_PD,
21029 IX86_BUILTIN_SI_SI256,
21030 IX86_BUILTIN_PS_PS256,
21031 IX86_BUILTIN_PD_PD256,
21033 IX86_BUILTIN_VTESTZPD,
21034 IX86_BUILTIN_VTESTCPD,
21035 IX86_BUILTIN_VTESTNZCPD,
21036 IX86_BUILTIN_VTESTZPS,
21037 IX86_BUILTIN_VTESTCPS,
21038 IX86_BUILTIN_VTESTNZCPS,
21039 IX86_BUILTIN_VTESTZPD256,
21040 IX86_BUILTIN_VTESTCPD256,
21041 IX86_BUILTIN_VTESTNZCPD256,
21042 IX86_BUILTIN_VTESTZPS256,
21043 IX86_BUILTIN_VTESTCPS256,
21044 IX86_BUILTIN_VTESTNZCPS256,
21045 IX86_BUILTIN_PTESTZ256,
21046 IX86_BUILTIN_PTESTC256,
21047 IX86_BUILTIN_PTESTNZC256,
21049 IX86_BUILTIN_MOVMSKPD256,
21050 IX86_BUILTIN_MOVMSKPS256,
21052 /* TFmode support builtins. */
21054 IX86_BUILTIN_HUGE_VALQ,
21055 IX86_BUILTIN_FABSQ,
21056 IX86_BUILTIN_COPYSIGNQ,
21058 /* SSE5 instructions */
21059 IX86_BUILTIN_FMADDSS,
21060 IX86_BUILTIN_FMADDSD,
21061 IX86_BUILTIN_FMADDPS,
21062 IX86_BUILTIN_FMADDPD,
21063 IX86_BUILTIN_FMSUBSS,
21064 IX86_BUILTIN_FMSUBSD,
21065 IX86_BUILTIN_FMSUBPS,
21066 IX86_BUILTIN_FMSUBPD,
21067 IX86_BUILTIN_FNMADDSS,
21068 IX86_BUILTIN_FNMADDSD,
21069 IX86_BUILTIN_FNMADDPS,
21070 IX86_BUILTIN_FNMADDPD,
21071 IX86_BUILTIN_FNMSUBSS,
21072 IX86_BUILTIN_FNMSUBSD,
21073 IX86_BUILTIN_FNMSUBPS,
21074 IX86_BUILTIN_FNMSUBPD,
21075 IX86_BUILTIN_PCMOV,
21076 IX86_BUILTIN_PCMOV_V2DI,
21077 IX86_BUILTIN_PCMOV_V4SI,
21078 IX86_BUILTIN_PCMOV_V8HI,
21079 IX86_BUILTIN_PCMOV_V16QI,
21080 IX86_BUILTIN_PCMOV_V4SF,
21081 IX86_BUILTIN_PCMOV_V2DF,
21082 IX86_BUILTIN_PPERM,
21083 IX86_BUILTIN_PERMPS,
21084 IX86_BUILTIN_PERMPD,
21085 IX86_BUILTIN_PMACSSWW,
21086 IX86_BUILTIN_PMACSWW,
21087 IX86_BUILTIN_PMACSSWD,
21088 IX86_BUILTIN_PMACSWD,
21089 IX86_BUILTIN_PMACSSDD,
21090 IX86_BUILTIN_PMACSDD,
21091 IX86_BUILTIN_PMACSSDQL,
21092 IX86_BUILTIN_PMACSSDQH,
21093 IX86_BUILTIN_PMACSDQL,
21094 IX86_BUILTIN_PMACSDQH,
21095 IX86_BUILTIN_PMADCSSWD,
21096 IX86_BUILTIN_PMADCSWD,
21097 IX86_BUILTIN_PHADDBW,
21098 IX86_BUILTIN_PHADDBD,
21099 IX86_BUILTIN_PHADDBQ,
21100 IX86_BUILTIN_PHADDWD,
21101 IX86_BUILTIN_PHADDWQ,
21102 IX86_BUILTIN_PHADDDQ,
21103 IX86_BUILTIN_PHADDUBW,
21104 IX86_BUILTIN_PHADDUBD,
21105 IX86_BUILTIN_PHADDUBQ,
21106 IX86_BUILTIN_PHADDUWD,
21107 IX86_BUILTIN_PHADDUWQ,
21108 IX86_BUILTIN_PHADDUDQ,
21109 IX86_BUILTIN_PHSUBBW,
21110 IX86_BUILTIN_PHSUBWD,
21111 IX86_BUILTIN_PHSUBDQ,
21112 IX86_BUILTIN_PROTB,
21113 IX86_BUILTIN_PROTW,
21114 IX86_BUILTIN_PROTD,
21115 IX86_BUILTIN_PROTQ,
21116 IX86_BUILTIN_PROTB_IMM,
21117 IX86_BUILTIN_PROTW_IMM,
21118 IX86_BUILTIN_PROTD_IMM,
21119 IX86_BUILTIN_PROTQ_IMM,
21120 IX86_BUILTIN_PSHLB,
21121 IX86_BUILTIN_PSHLW,
21122 IX86_BUILTIN_PSHLD,
21123 IX86_BUILTIN_PSHLQ,
21124 IX86_BUILTIN_PSHAB,
21125 IX86_BUILTIN_PSHAW,
21126 IX86_BUILTIN_PSHAD,
21127 IX86_BUILTIN_PSHAQ,
21128 IX86_BUILTIN_FRCZSS,
21129 IX86_BUILTIN_FRCZSD,
21130 IX86_BUILTIN_FRCZPS,
21131 IX86_BUILTIN_FRCZPD,
21132 IX86_BUILTIN_CVTPH2PS,
21133 IX86_BUILTIN_CVTPS2PH,
21135 IX86_BUILTIN_COMEQSS,
21136 IX86_BUILTIN_COMNESS,
21137 IX86_BUILTIN_COMLTSS,
21138 IX86_BUILTIN_COMLESS,
21139 IX86_BUILTIN_COMGTSS,
21140 IX86_BUILTIN_COMGESS,
21141 IX86_BUILTIN_COMUEQSS,
21142 IX86_BUILTIN_COMUNESS,
21143 IX86_BUILTIN_COMULTSS,
21144 IX86_BUILTIN_COMULESS,
21145 IX86_BUILTIN_COMUGTSS,
21146 IX86_BUILTIN_COMUGESS,
21147 IX86_BUILTIN_COMORDSS,
21148 IX86_BUILTIN_COMUNORDSS,
21149 IX86_BUILTIN_COMFALSESS,
21150 IX86_BUILTIN_COMTRUESS,
21152 IX86_BUILTIN_COMEQSD,
21153 IX86_BUILTIN_COMNESD,
21154 IX86_BUILTIN_COMLTSD,
21155 IX86_BUILTIN_COMLESD,
21156 IX86_BUILTIN_COMGTSD,
21157 IX86_BUILTIN_COMGESD,
21158 IX86_BUILTIN_COMUEQSD,
21159 IX86_BUILTIN_COMUNESD,
21160 IX86_BUILTIN_COMULTSD,
21161 IX86_BUILTIN_COMULESD,
21162 IX86_BUILTIN_COMUGTSD,
21163 IX86_BUILTIN_COMUGESD,
21164 IX86_BUILTIN_COMORDSD,
21165 IX86_BUILTIN_COMUNORDSD,
21166 IX86_BUILTIN_COMFALSESD,
21167 IX86_BUILTIN_COMTRUESD,
21169 IX86_BUILTIN_COMEQPS,
21170 IX86_BUILTIN_COMNEPS,
21171 IX86_BUILTIN_COMLTPS,
21172 IX86_BUILTIN_COMLEPS,
21173 IX86_BUILTIN_COMGTPS,
21174 IX86_BUILTIN_COMGEPS,
21175 IX86_BUILTIN_COMUEQPS,
21176 IX86_BUILTIN_COMUNEPS,
21177 IX86_BUILTIN_COMULTPS,
21178 IX86_BUILTIN_COMULEPS,
21179 IX86_BUILTIN_COMUGTPS,
21180 IX86_BUILTIN_COMUGEPS,
21181 IX86_BUILTIN_COMORDPS,
21182 IX86_BUILTIN_COMUNORDPS,
21183 IX86_BUILTIN_COMFALSEPS,
21184 IX86_BUILTIN_COMTRUEPS,
21186 IX86_BUILTIN_COMEQPD,
21187 IX86_BUILTIN_COMNEPD,
21188 IX86_BUILTIN_COMLTPD,
21189 IX86_BUILTIN_COMLEPD,
21190 IX86_BUILTIN_COMGTPD,
21191 IX86_BUILTIN_COMGEPD,
21192 IX86_BUILTIN_COMUEQPD,
21193 IX86_BUILTIN_COMUNEPD,
21194 IX86_BUILTIN_COMULTPD,
21195 IX86_BUILTIN_COMULEPD,
21196 IX86_BUILTIN_COMUGTPD,
21197 IX86_BUILTIN_COMUGEPD,
21198 IX86_BUILTIN_COMORDPD,
21199 IX86_BUILTIN_COMUNORDPD,
21200 IX86_BUILTIN_COMFALSEPD,
21201 IX86_BUILTIN_COMTRUEPD,
21203 IX86_BUILTIN_PCOMEQUB,
21204 IX86_BUILTIN_PCOMNEUB,
21205 IX86_BUILTIN_PCOMLTUB,
21206 IX86_BUILTIN_PCOMLEUB,
21207 IX86_BUILTIN_PCOMGTUB,
21208 IX86_BUILTIN_PCOMGEUB,
21209 IX86_BUILTIN_PCOMFALSEUB,
21210 IX86_BUILTIN_PCOMTRUEUB,
21211 IX86_BUILTIN_PCOMEQUW,
21212 IX86_BUILTIN_PCOMNEUW,
21213 IX86_BUILTIN_PCOMLTUW,
21214 IX86_BUILTIN_PCOMLEUW,
21215 IX86_BUILTIN_PCOMGTUW,
21216 IX86_BUILTIN_PCOMGEUW,
21217 IX86_BUILTIN_PCOMFALSEUW,
21218 IX86_BUILTIN_PCOMTRUEUW,
21219 IX86_BUILTIN_PCOMEQUD,
21220 IX86_BUILTIN_PCOMNEUD,
21221 IX86_BUILTIN_PCOMLTUD,
21222 IX86_BUILTIN_PCOMLEUD,
21223 IX86_BUILTIN_PCOMGTUD,
21224 IX86_BUILTIN_PCOMGEUD,
21225 IX86_BUILTIN_PCOMFALSEUD,
21226 IX86_BUILTIN_PCOMTRUEUD,
21227 IX86_BUILTIN_PCOMEQUQ,
21228 IX86_BUILTIN_PCOMNEUQ,
21229 IX86_BUILTIN_PCOMLTUQ,
21230 IX86_BUILTIN_PCOMLEUQ,
21231 IX86_BUILTIN_PCOMGTUQ,
21232 IX86_BUILTIN_PCOMGEUQ,
21233 IX86_BUILTIN_PCOMFALSEUQ,
21234 IX86_BUILTIN_PCOMTRUEUQ,
21236 IX86_BUILTIN_PCOMEQB,
21237 IX86_BUILTIN_PCOMNEB,
21238 IX86_BUILTIN_PCOMLTB,
21239 IX86_BUILTIN_PCOMLEB,
21240 IX86_BUILTIN_PCOMGTB,
21241 IX86_BUILTIN_PCOMGEB,
21242 IX86_BUILTIN_PCOMFALSEB,
21243 IX86_BUILTIN_PCOMTRUEB,
21244 IX86_BUILTIN_PCOMEQW,
21245 IX86_BUILTIN_PCOMNEW,
21246 IX86_BUILTIN_PCOMLTW,
21247 IX86_BUILTIN_PCOMLEW,
21248 IX86_BUILTIN_PCOMGTW,
21249 IX86_BUILTIN_PCOMGEW,
21250 IX86_BUILTIN_PCOMFALSEW,
21251 IX86_BUILTIN_PCOMTRUEW,
21252 IX86_BUILTIN_PCOMEQD,
21253 IX86_BUILTIN_PCOMNED,
21254 IX86_BUILTIN_PCOMLTD,
21255 IX86_BUILTIN_PCOMLED,
21256 IX86_BUILTIN_PCOMGTD,
21257 IX86_BUILTIN_PCOMGED,
21258 IX86_BUILTIN_PCOMFALSED,
21259 IX86_BUILTIN_PCOMTRUED,
21260 IX86_BUILTIN_PCOMEQQ,
21261 IX86_BUILTIN_PCOMNEQ,
21262 IX86_BUILTIN_PCOMLTQ,
21263 IX86_BUILTIN_PCOMLEQ,
21264 IX86_BUILTIN_PCOMGTQ,
21265 IX86_BUILTIN_PCOMGEQ,
21266 IX86_BUILTIN_PCOMFALSEQ,
21267 IX86_BUILTIN_PCOMTRUEQ,
21272 /* Table for the ix86 builtin decls. */
21273 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21275 /* Table of all of the builtin functions that are possible with different ISA's
21276 but are waiting to be built until a function is declared to use that
21278 struct GTY(()) builtin_isa {
21279 tree type; /* builtin type to use in the declaration */
21280 const char *name; /* function name */
21281 int isa; /* isa_flags this builtin is defined for */
21282 bool const_p; /* true if the declaration is constant */
21285 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21288 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
21289 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
21290 * function decl in the ix86_builtins array. Returns the function decl or
21291 * NULL_TREE, if the builtin was not added.
21293 * If the front end has a special hook for builtin functions, delay adding
21294 * builtin functions that aren't in the current ISA until the ISA is changed
21295 * with function specific optimization. Doing so, can save about 300K for the
21296 * default compiler. When the builtin is expanded, check at that time whether
21299 * If the front end doesn't have a special hook, record all builtins, even if
21300 * it isn't an instruction set in the current ISA in case the user uses
21301 * function specific options for a different ISA, so that we don't get scope
21302 * errors if a builtin is added in the middle of a function scope. */
21305 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
21307 tree decl = NULL_TREE;
21309 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21311 ix86_builtins_isa[(int) code].isa = mask;
21313 if ((mask & ix86_isa_flags) != 0
21314 || (lang_hooks.builtin_function
21315 == lang_hooks.builtin_function_ext_scope))
21318 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
21320 ix86_builtins[(int) code] = decl;
21321 ix86_builtins_isa[(int) code].type = NULL_TREE;
21325 ix86_builtins[(int) code] = NULL_TREE;
21326 ix86_builtins_isa[(int) code].const_p = false;
21327 ix86_builtins_isa[(int) code].type = type;
21328 ix86_builtins_isa[(int) code].name = name;
21335 /* Like def_builtin, but also marks the function decl "const". */
21338 def_builtin_const (int mask, const char *name, tree type,
21339 enum ix86_builtins code)
21341 tree decl = def_builtin (mask, name, type, code);
21343 TREE_READONLY (decl) = 1;
21345 ix86_builtins_isa[(int) code].const_p = true;
21350 /* Add any new builtin functions for a given ISA that may not have been
21351 declared. This saves a bit of space compared to adding all of the
21352 declarations to the tree, even if we didn't use them. */
21355 ix86_add_new_builtins (int isa)
21360 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21362 if ((ix86_builtins_isa[i].isa & isa) != 0
21363 && ix86_builtins_isa[i].type != NULL_TREE)
21365 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21366 ix86_builtins_isa[i].type,
21367 i, BUILT_IN_MD, NULL,
21370 ix86_builtins[i] = decl;
21371 ix86_builtins_isa[i].type = NULL_TREE;
21372 if (ix86_builtins_isa[i].const_p)
21373 TREE_READONLY (decl) = 1;
21378 /* Bits for builtin_description.flag. */
21380 /* Set when we don't support the comparison natively, and should
21381 swap_comparison in order to support it. */
21382 #define BUILTIN_DESC_SWAP_OPERANDS 1
21384 struct builtin_description
21386 const unsigned int mask;
21387 const enum insn_code icode;
21388 const char *const name;
21389 const enum ix86_builtins code;
21390 const enum rtx_code comparison;
21394 static const struct builtin_description bdesc_comi[] =
21396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21397 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21422 static const struct builtin_description bdesc_pcmpestr[] =
21425 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21426 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21427 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21428 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21429 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21430 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21431 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21434 static const struct builtin_description bdesc_pcmpistr[] =
21437 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21438 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21439 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21440 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21441 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21442 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21443 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21446 /* Special builtin types */
21447 enum ix86_special_builtin_type
21449 SPECIAL_FTYPE_UNKNOWN,
21451 V32QI_FTYPE_PCCHAR,
21452 V16QI_FTYPE_PCCHAR,
21454 V8SF_FTYPE_PCFLOAT,
21456 V4DF_FTYPE_PCDOUBLE,
21457 V4SF_FTYPE_PCFLOAT,
21458 V2DF_FTYPE_PCDOUBLE,
21459 V8SF_FTYPE_PCV8SF_V8SF,
21460 V4DF_FTYPE_PCV4DF_V4DF,
21461 V4SF_FTYPE_V4SF_PCV2SF,
21462 V4SF_FTYPE_PCV4SF_V4SF,
21463 V2DF_FTYPE_V2DF_PCDOUBLE,
21464 V2DF_FTYPE_PCV2DF_V2DF,
21466 VOID_FTYPE_PV2SF_V4SF,
21467 VOID_FTYPE_PV4DI_V4DI,
21468 VOID_FTYPE_PV2DI_V2DI,
21469 VOID_FTYPE_PCHAR_V32QI,
21470 VOID_FTYPE_PCHAR_V16QI,
21471 VOID_FTYPE_PFLOAT_V8SF,
21472 VOID_FTYPE_PFLOAT_V4SF,
21473 VOID_FTYPE_PDOUBLE_V4DF,
21474 VOID_FTYPE_PDOUBLE_V2DF,
21476 VOID_FTYPE_PINT_INT,
21477 VOID_FTYPE_PV8SF_V8SF_V8SF,
21478 VOID_FTYPE_PV4DF_V4DF_V4DF,
21479 VOID_FTYPE_PV4SF_V4SF_V4SF,
21480 VOID_FTYPE_PV2DF_V2DF_V2DF
21483 /* Builtin types */
21484 enum ix86_builtin_type
21487 FLOAT128_FTYPE_FLOAT128,
21489 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21490 INT_FTYPE_V8SF_V8SF_PTEST,
21491 INT_FTYPE_V4DI_V4DI_PTEST,
21492 INT_FTYPE_V4DF_V4DF_PTEST,
21493 INT_FTYPE_V4SF_V4SF_PTEST,
21494 INT_FTYPE_V2DI_V2DI_PTEST,
21495 INT_FTYPE_V2DF_V2DF_PTEST,
21527 V4SF_FTYPE_V4SF_VEC_MERGE,
21536 V2DF_FTYPE_V2DF_VEC_MERGE,
21547 V16QI_FTYPE_V16QI_V16QI,
21548 V16QI_FTYPE_V8HI_V8HI,
21549 V8QI_FTYPE_V8QI_V8QI,
21550 V8QI_FTYPE_V4HI_V4HI,
21551 V8HI_FTYPE_V8HI_V8HI,
21552 V8HI_FTYPE_V8HI_V8HI_COUNT,
21553 V8HI_FTYPE_V16QI_V16QI,
21554 V8HI_FTYPE_V4SI_V4SI,
21555 V8HI_FTYPE_V8HI_SI_COUNT,
21556 V8SF_FTYPE_V8SF_V8SF,
21557 V8SF_FTYPE_V8SF_V8SI,
21558 V4SI_FTYPE_V4SI_V4SI,
21559 V4SI_FTYPE_V4SI_V4SI_COUNT,
21560 V4SI_FTYPE_V8HI_V8HI,
21561 V4SI_FTYPE_V4SF_V4SF,
21562 V4SI_FTYPE_V2DF_V2DF,
21563 V4SI_FTYPE_V4SI_SI_COUNT,
21564 V4HI_FTYPE_V4HI_V4HI,
21565 V4HI_FTYPE_V4HI_V4HI_COUNT,
21566 V4HI_FTYPE_V8QI_V8QI,
21567 V4HI_FTYPE_V2SI_V2SI,
21568 V4HI_FTYPE_V4HI_SI_COUNT,
21569 V4DF_FTYPE_V4DF_V4DF,
21570 V4DF_FTYPE_V4DF_V4DI,
21571 V4SF_FTYPE_V4SF_V4SF,
21572 V4SF_FTYPE_V4SF_V4SF_SWAP,
21573 V4SF_FTYPE_V4SF_V4SI,
21574 V4SF_FTYPE_V4SF_V2SI,
21575 V4SF_FTYPE_V4SF_V2DF,
21576 V4SF_FTYPE_V4SF_DI,
21577 V4SF_FTYPE_V4SF_SI,
21578 V2DI_FTYPE_V2DI_V2DI,
21579 V2DI_FTYPE_V2DI_V2DI_COUNT,
21580 V2DI_FTYPE_V16QI_V16QI,
21581 V2DI_FTYPE_V4SI_V4SI,
21582 V2DI_FTYPE_V2DI_V16QI,
21583 V2DI_FTYPE_V2DF_V2DF,
21584 V2DI_FTYPE_V2DI_SI_COUNT,
21585 V2SI_FTYPE_V2SI_V2SI,
21586 V2SI_FTYPE_V2SI_V2SI_COUNT,
21587 V2SI_FTYPE_V4HI_V4HI,
21588 V2SI_FTYPE_V2SF_V2SF,
21589 V2SI_FTYPE_V2SI_SI_COUNT,
21590 V2DF_FTYPE_V2DF_V2DF,
21591 V2DF_FTYPE_V2DF_V2DF_SWAP,
21592 V2DF_FTYPE_V2DF_V4SF,
21593 V2DF_FTYPE_V2DF_V2DI,
21594 V2DF_FTYPE_V2DF_DI,
21595 V2DF_FTYPE_V2DF_SI,
21596 V2SF_FTYPE_V2SF_V2SF,
21597 V1DI_FTYPE_V1DI_V1DI,
21598 V1DI_FTYPE_V1DI_V1DI_COUNT,
21599 V1DI_FTYPE_V8QI_V8QI,
21600 V1DI_FTYPE_V2SI_V2SI,
21601 V1DI_FTYPE_V1DI_SI_COUNT,
21602 UINT64_FTYPE_UINT64_UINT64,
21603 UINT_FTYPE_UINT_UINT,
21604 UINT_FTYPE_UINT_USHORT,
21605 UINT_FTYPE_UINT_UCHAR,
21606 V8HI_FTYPE_V8HI_INT,
21607 V4SI_FTYPE_V4SI_INT,
21608 V4HI_FTYPE_V4HI_INT,
21609 V8SF_FTYPE_V8SF_INT,
21610 V4SI_FTYPE_V8SI_INT,
21611 V4SF_FTYPE_V8SF_INT,
21612 V2DF_FTYPE_V4DF_INT,
21613 V4DF_FTYPE_V4DF_INT,
21614 V4SF_FTYPE_V4SF_INT,
21615 V2DI_FTYPE_V2DI_INT,
21616 V2DI2TI_FTYPE_V2DI_INT,
21617 V2DF_FTYPE_V2DF_INT,
21618 V16QI_FTYPE_V16QI_V16QI_V16QI,
21619 V8SF_FTYPE_V8SF_V8SF_V8SF,
21620 V4DF_FTYPE_V4DF_V4DF_V4DF,
21621 V4SF_FTYPE_V4SF_V4SF_V4SF,
21622 V2DF_FTYPE_V2DF_V2DF_V2DF,
21623 V16QI_FTYPE_V16QI_V16QI_INT,
21624 V8SI_FTYPE_V8SI_V8SI_INT,
21625 V8SI_FTYPE_V8SI_V4SI_INT,
21626 V8HI_FTYPE_V8HI_V8HI_INT,
21627 V8SF_FTYPE_V8SF_V8SF_INT,
21628 V8SF_FTYPE_V8SF_V4SF_INT,
21629 V4SI_FTYPE_V4SI_V4SI_INT,
21630 V4DF_FTYPE_V4DF_V4DF_INT,
21631 V4DF_FTYPE_V4DF_V2DF_INT,
21632 V4SF_FTYPE_V4SF_V4SF_INT,
21633 V2DI_FTYPE_V2DI_V2DI_INT,
21634 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21635 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21636 V2DF_FTYPE_V2DF_V2DF_INT,
21637 V2DI_FTYPE_V2DI_UINT_UINT,
21638 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21641 /* Special builtins with variable number of arguments. */
21642 static const struct builtin_description bdesc_special_args[] =
21645 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21648 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21651 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21652 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21653 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21655 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21656 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21660 /* SSE or 3DNow!A */
21661 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21662 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21679 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21682 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21685 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21686 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21691 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21721 /* Builtins with variable number of arguments. */
21722 static const struct builtin_description bdesc_args[] =
21725 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21726 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21727 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21728 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21729 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21730 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21732 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21733 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21734 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21735 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21736 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21737 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21738 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21739 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21741 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21742 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21744 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21745 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21746 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21747 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21749 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21750 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21751 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21752 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21753 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21754 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21756 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21757 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21758 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21759 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21760 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21761 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21763 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21764 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21765 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21767 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21769 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21770 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21771 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21772 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21773 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21774 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21776 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21777 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21778 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21779 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21780 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21781 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21783 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21784 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21785 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21786 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21789 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21790 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21791 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21792 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21794 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21795 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21796 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21797 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21798 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21799 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21800 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21801 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21802 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21803 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21804 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21805 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21806 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21807 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21808 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21811 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21812 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21813 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21814 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21815 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21816 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21819 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21820 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21821 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21822 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21823 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21824 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21825 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21826 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21827 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21828 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21829 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21830 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21832 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21834 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21835 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21836 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21837 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21838 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21839 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21840 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21841 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21843 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21844 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21845 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21846 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21847 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21848 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21849 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21850 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21851 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21854 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21855 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21858 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21859 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21860 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21861 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21862 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21863 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21864 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21866 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21867 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21868 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21869 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21871 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21872 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21873 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21874 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21876 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21877 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21878 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21879 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21880 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21882 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21883 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21884 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21886 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21889 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21890 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21892 /* SSE MMX or 3Dnow!A */
21893 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21894 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21895 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21897 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21898 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21899 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21900 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21902 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21903 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21905 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21910 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21912 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21913 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21917 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21922 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21926 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21927 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21933 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21934 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21935 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21936 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21938 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21964 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21968 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21970 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21971 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21977 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21979 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21980 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21981 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21982 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21983 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21984 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21985 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21986 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21997 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21998 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
22000 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22002 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22003 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22015 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22016 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22017 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22033 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
22036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
22037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
22041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
22042 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
22043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
22044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
22046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
22047 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22048 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22049 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22050 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22051 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22052 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
22055 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22056 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22057 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22058 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22059 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22060 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22062 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22063 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22064 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22065 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
22068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
22073 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
22074 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
22076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22079 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22080 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22083 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
22084 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22086 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22087 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22088 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22089 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22090 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22091 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22094 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
22095 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
22096 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22097 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
22098 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
22099 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22101 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22102 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22103 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22104 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22105 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22106 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22107 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22108 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22109 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22110 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22111 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22112 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22113 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
22114 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
22115 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22116 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22117 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22118 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22119 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22120 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22121 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22122 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22123 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22124 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22127 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
22128 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
22131 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22132 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22133 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
22134 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
22135 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22136 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22137 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22138 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
22139 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22140 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
22142 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22143 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22144 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22145 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22146 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22147 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22148 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22149 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22150 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22151 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22152 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22153 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22154 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22156 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22157 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22158 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22159 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22160 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22161 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22162 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22163 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22164 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22165 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22166 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22167 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22169 /* SSE4.1 and SSE5 */
22170 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22171 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22172 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22173 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22175 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22176 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22177 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22180 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22181 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22182 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22183 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22184 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22187 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22188 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22189 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22190 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22193 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22194 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22196 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22197 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22198 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22199 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22202 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22205 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22206 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22209 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22210 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22213 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22215 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22219 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22220 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22221 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22222 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22223 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22224 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22225 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22226 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22227 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22228 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22229 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22230 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
22234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
22240 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
22241 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22242 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22245 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22246 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22247 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22250 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
22251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
22252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
22253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
22254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
22255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
22256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
22258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22267 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22272 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22273 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22276 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22277 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22278 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22280 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22293 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22294 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22296 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22297 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22299 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22300 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22301 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22302 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22303 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22305 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22308 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22309 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22310 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
22316 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22320 enum multi_arg_type {
22330 MULTI_ARG_3_PERMPS,
22331 MULTI_ARG_3_PERMPD,
22338 MULTI_ARG_2_DI_IMM,
22339 MULTI_ARG_2_SI_IMM,
22340 MULTI_ARG_2_HI_IMM,
22341 MULTI_ARG_2_QI_IMM,
22342 MULTI_ARG_2_SF_CMP,
22343 MULTI_ARG_2_DF_CMP,
22344 MULTI_ARG_2_DI_CMP,
22345 MULTI_ARG_2_SI_CMP,
22346 MULTI_ARG_2_HI_CMP,
22347 MULTI_ARG_2_QI_CMP,
22370 static const struct builtin_description bdesc_multi_arg[] =
22372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22377 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22380 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22385 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22388 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22393 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22396 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22401 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22404 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22412 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22420 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22426 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22428 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22444 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22453 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22476 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22477 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22478 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22479 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22494 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22496 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22497 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22499 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22500 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22502 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22505 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22507 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22508 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22509 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22511 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22514 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22517 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22518 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22520 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22521 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22522 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22524 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22525 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22526 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22527 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22528 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22529 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22530 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22532 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22533 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22534 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22535 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22536 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22537 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22538 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22540 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22541 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22542 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22543 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22544 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22545 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22546 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22548 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22549 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22550 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22551 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22552 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22553 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22554 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22556 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22557 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22558 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22559 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22560 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22561 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22562 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22564 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22565 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22566 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22567 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22568 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22569 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22570 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22572 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22573 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22574 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22575 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22576 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22577 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22578 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22580 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22581 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22582 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22583 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22584 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22585 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22586 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22587 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22589 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22590 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22591 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22592 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22593 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22594 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22595 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22596 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22598 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22599 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22600 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22601 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22602 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22603 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22604 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22605 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22608 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22609 in the current target ISA to allow the user to compile particular modules
22610 with different target specific options that differ from the command line
22613 ix86_init_mmx_sse_builtins (void)
22615 const struct builtin_description * d;
22618 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22619 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22620 tree V1DI_type_node
22621 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22622 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22623 tree V2DI_type_node
22624 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22625 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22626 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22627 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22628 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22629 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22630 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22632 tree pchar_type_node = build_pointer_type (char_type_node);
22633 tree pcchar_type_node
22634 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22635 tree pfloat_type_node = build_pointer_type (float_type_node);
22636 tree pcfloat_type_node
22637 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22638 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22639 tree pcv2sf_type_node
22640 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22641 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22642 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22645 tree int_ftype_v4sf_v4sf
22646 = build_function_type_list (integer_type_node,
22647 V4SF_type_node, V4SF_type_node, NULL_TREE);
22648 tree v4si_ftype_v4sf_v4sf
22649 = build_function_type_list (V4SI_type_node,
22650 V4SF_type_node, V4SF_type_node, NULL_TREE);
22651 /* MMX/SSE/integer conversions. */
22652 tree int_ftype_v4sf
22653 = build_function_type_list (integer_type_node,
22654 V4SF_type_node, NULL_TREE);
22655 tree int64_ftype_v4sf
22656 = build_function_type_list (long_long_integer_type_node,
22657 V4SF_type_node, NULL_TREE);
22658 tree int_ftype_v8qi
22659 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22660 tree v4sf_ftype_v4sf_int
22661 = build_function_type_list (V4SF_type_node,
22662 V4SF_type_node, integer_type_node, NULL_TREE);
22663 tree v4sf_ftype_v4sf_int64
22664 = build_function_type_list (V4SF_type_node,
22665 V4SF_type_node, long_long_integer_type_node,
22667 tree v4sf_ftype_v4sf_v2si
22668 = build_function_type_list (V4SF_type_node,
22669 V4SF_type_node, V2SI_type_node, NULL_TREE);
22671 /* Miscellaneous. */
22672 tree v8qi_ftype_v4hi_v4hi
22673 = build_function_type_list (V8QI_type_node,
22674 V4HI_type_node, V4HI_type_node, NULL_TREE);
22675 tree v4hi_ftype_v2si_v2si
22676 = build_function_type_list (V4HI_type_node,
22677 V2SI_type_node, V2SI_type_node, NULL_TREE);
22678 tree v4sf_ftype_v4sf_v4sf_int
22679 = build_function_type_list (V4SF_type_node,
22680 V4SF_type_node, V4SF_type_node,
22681 integer_type_node, NULL_TREE);
22682 tree v2si_ftype_v4hi_v4hi
22683 = build_function_type_list (V2SI_type_node,
22684 V4HI_type_node, V4HI_type_node, NULL_TREE);
22685 tree v4hi_ftype_v4hi_int
22686 = build_function_type_list (V4HI_type_node,
22687 V4HI_type_node, integer_type_node, NULL_TREE);
22688 tree v2si_ftype_v2si_int
22689 = build_function_type_list (V2SI_type_node,
22690 V2SI_type_node, integer_type_node, NULL_TREE);
22691 tree v1di_ftype_v1di_int
22692 = build_function_type_list (V1DI_type_node,
22693 V1DI_type_node, integer_type_node, NULL_TREE);
22695 tree void_ftype_void
22696 = build_function_type (void_type_node, void_list_node);
22697 tree void_ftype_unsigned
22698 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22699 tree void_ftype_unsigned_unsigned
22700 = build_function_type_list (void_type_node, unsigned_type_node,
22701 unsigned_type_node, NULL_TREE);
22702 tree void_ftype_pcvoid_unsigned_unsigned
22703 = build_function_type_list (void_type_node, const_ptr_type_node,
22704 unsigned_type_node, unsigned_type_node,
22706 tree unsigned_ftype_void
22707 = build_function_type (unsigned_type_node, void_list_node);
22708 tree v2si_ftype_v4sf
22709 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22710 /* Loads/stores. */
22711 tree void_ftype_v8qi_v8qi_pchar
22712 = build_function_type_list (void_type_node,
22713 V8QI_type_node, V8QI_type_node,
22714 pchar_type_node, NULL_TREE);
22715 tree v4sf_ftype_pcfloat
22716 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22717 tree v4sf_ftype_v4sf_pcv2sf
22718 = build_function_type_list (V4SF_type_node,
22719 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22720 tree void_ftype_pv2sf_v4sf
22721 = build_function_type_list (void_type_node,
22722 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22723 tree void_ftype_pfloat_v4sf
22724 = build_function_type_list (void_type_node,
22725 pfloat_type_node, V4SF_type_node, NULL_TREE);
22726 tree void_ftype_pdi_di
22727 = build_function_type_list (void_type_node,
22728 pdi_type_node, long_long_unsigned_type_node,
22730 tree void_ftype_pv2di_v2di
22731 = build_function_type_list (void_type_node,
22732 pv2di_type_node, V2DI_type_node, NULL_TREE);
22733 /* Normal vector unops. */
22734 tree v4sf_ftype_v4sf
22735 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22736 tree v16qi_ftype_v16qi
22737 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22738 tree v8hi_ftype_v8hi
22739 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22740 tree v4si_ftype_v4si
22741 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22742 tree v8qi_ftype_v8qi
22743 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22744 tree v4hi_ftype_v4hi
22745 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22747 /* Normal vector binops. */
22748 tree v4sf_ftype_v4sf_v4sf
22749 = build_function_type_list (V4SF_type_node,
22750 V4SF_type_node, V4SF_type_node, NULL_TREE);
22751 tree v8qi_ftype_v8qi_v8qi
22752 = build_function_type_list (V8QI_type_node,
22753 V8QI_type_node, V8QI_type_node, NULL_TREE);
22754 tree v4hi_ftype_v4hi_v4hi
22755 = build_function_type_list (V4HI_type_node,
22756 V4HI_type_node, V4HI_type_node, NULL_TREE);
22757 tree v2si_ftype_v2si_v2si
22758 = build_function_type_list (V2SI_type_node,
22759 V2SI_type_node, V2SI_type_node, NULL_TREE);
22760 tree v1di_ftype_v1di_v1di
22761 = build_function_type_list (V1DI_type_node,
22762 V1DI_type_node, V1DI_type_node, NULL_TREE);
22763 tree v1di_ftype_v1di_v1di_int
22764 = build_function_type_list (V1DI_type_node,
22765 V1DI_type_node, V1DI_type_node,
22766 integer_type_node, NULL_TREE);
22767 tree v2si_ftype_v2sf
22768 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22769 tree v2sf_ftype_v2si
22770 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22771 tree v2si_ftype_v2si
22772 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22773 tree v2sf_ftype_v2sf
22774 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22775 tree v2sf_ftype_v2sf_v2sf
22776 = build_function_type_list (V2SF_type_node,
22777 V2SF_type_node, V2SF_type_node, NULL_TREE);
22778 tree v2si_ftype_v2sf_v2sf
22779 = build_function_type_list (V2SI_type_node,
22780 V2SF_type_node, V2SF_type_node, NULL_TREE);
22781 tree pint_type_node = build_pointer_type (integer_type_node);
22782 tree pdouble_type_node = build_pointer_type (double_type_node);
22783 tree pcdouble_type_node = build_pointer_type (
22784 build_type_variant (double_type_node, 1, 0));
22785 tree int_ftype_v2df_v2df
22786 = build_function_type_list (integer_type_node,
22787 V2DF_type_node, V2DF_type_node, NULL_TREE);
22789 tree void_ftype_pcvoid
22790 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22791 tree v4sf_ftype_v4si
22792 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22793 tree v4si_ftype_v4sf
22794 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22795 tree v2df_ftype_v4si
22796 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22797 tree v4si_ftype_v2df
22798 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22799 tree v4si_ftype_v2df_v2df
22800 = build_function_type_list (V4SI_type_node,
22801 V2DF_type_node, V2DF_type_node, NULL_TREE);
22802 tree v2si_ftype_v2df
22803 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22804 tree v4sf_ftype_v2df
22805 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22806 tree v2df_ftype_v2si
22807 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22808 tree v2df_ftype_v4sf
22809 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22810 tree int_ftype_v2df
22811 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22812 tree int64_ftype_v2df
22813 = build_function_type_list (long_long_integer_type_node,
22814 V2DF_type_node, NULL_TREE);
22815 tree v2df_ftype_v2df_int
22816 = build_function_type_list (V2DF_type_node,
22817 V2DF_type_node, integer_type_node, NULL_TREE);
22818 tree v2df_ftype_v2df_int64
22819 = build_function_type_list (V2DF_type_node,
22820 V2DF_type_node, long_long_integer_type_node,
22822 tree v4sf_ftype_v4sf_v2df
22823 = build_function_type_list (V4SF_type_node,
22824 V4SF_type_node, V2DF_type_node, NULL_TREE);
22825 tree v2df_ftype_v2df_v4sf
22826 = build_function_type_list (V2DF_type_node,
22827 V2DF_type_node, V4SF_type_node, NULL_TREE);
22828 tree v2df_ftype_v2df_v2df_int
22829 = build_function_type_list (V2DF_type_node,
22830 V2DF_type_node, V2DF_type_node,
22833 tree v2df_ftype_v2df_pcdouble
22834 = build_function_type_list (V2DF_type_node,
22835 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22836 tree void_ftype_pdouble_v2df
22837 = build_function_type_list (void_type_node,
22838 pdouble_type_node, V2DF_type_node, NULL_TREE);
22839 tree void_ftype_pint_int
22840 = build_function_type_list (void_type_node,
22841 pint_type_node, integer_type_node, NULL_TREE);
22842 tree void_ftype_v16qi_v16qi_pchar
22843 = build_function_type_list (void_type_node,
22844 V16QI_type_node, V16QI_type_node,
22845 pchar_type_node, NULL_TREE);
22846 tree v2df_ftype_pcdouble
22847 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22848 tree v2df_ftype_v2df_v2df
22849 = build_function_type_list (V2DF_type_node,
22850 V2DF_type_node, V2DF_type_node, NULL_TREE);
22851 tree v16qi_ftype_v16qi_v16qi
22852 = build_function_type_list (V16QI_type_node,
22853 V16QI_type_node, V16QI_type_node, NULL_TREE);
22854 tree v8hi_ftype_v8hi_v8hi
22855 = build_function_type_list (V8HI_type_node,
22856 V8HI_type_node, V8HI_type_node, NULL_TREE);
22857 tree v4si_ftype_v4si_v4si
22858 = build_function_type_list (V4SI_type_node,
22859 V4SI_type_node, V4SI_type_node, NULL_TREE);
22860 tree v2di_ftype_v2di_v2di
22861 = build_function_type_list (V2DI_type_node,
22862 V2DI_type_node, V2DI_type_node, NULL_TREE);
22863 tree v2di_ftype_v2df_v2df
22864 = build_function_type_list (V2DI_type_node,
22865 V2DF_type_node, V2DF_type_node, NULL_TREE);
22866 tree v2df_ftype_v2df
22867 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22868 tree v2di_ftype_v2di_int
22869 = build_function_type_list (V2DI_type_node,
22870 V2DI_type_node, integer_type_node, NULL_TREE);
22871 tree v2di_ftype_v2di_v2di_int
22872 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22873 V2DI_type_node, integer_type_node, NULL_TREE);
22874 tree v4si_ftype_v4si_int
22875 = build_function_type_list (V4SI_type_node,
22876 V4SI_type_node, integer_type_node, NULL_TREE);
22877 tree v8hi_ftype_v8hi_int
22878 = build_function_type_list (V8HI_type_node,
22879 V8HI_type_node, integer_type_node, NULL_TREE);
22880 tree v4si_ftype_v8hi_v8hi
22881 = build_function_type_list (V4SI_type_node,
22882 V8HI_type_node, V8HI_type_node, NULL_TREE);
22883 tree v1di_ftype_v8qi_v8qi
22884 = build_function_type_list (V1DI_type_node,
22885 V8QI_type_node, V8QI_type_node, NULL_TREE);
22886 tree v1di_ftype_v2si_v2si
22887 = build_function_type_list (V1DI_type_node,
22888 V2SI_type_node, V2SI_type_node, NULL_TREE);
22889 tree v2di_ftype_v16qi_v16qi
22890 = build_function_type_list (V2DI_type_node,
22891 V16QI_type_node, V16QI_type_node, NULL_TREE);
22892 tree v2di_ftype_v4si_v4si
22893 = build_function_type_list (V2DI_type_node,
22894 V4SI_type_node, V4SI_type_node, NULL_TREE);
22895 tree int_ftype_v16qi
22896 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22897 tree v16qi_ftype_pcchar
22898 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22899 tree void_ftype_pchar_v16qi
22900 = build_function_type_list (void_type_node,
22901 pchar_type_node, V16QI_type_node, NULL_TREE);
22903 tree v2di_ftype_v2di_unsigned_unsigned
22904 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22905 unsigned_type_node, unsigned_type_node,
22907 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22908 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22909 unsigned_type_node, unsigned_type_node,
22911 tree v2di_ftype_v2di_v16qi
22912 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22914 tree v2df_ftype_v2df_v2df_v2df
22915 = build_function_type_list (V2DF_type_node,
22916 V2DF_type_node, V2DF_type_node,
22917 V2DF_type_node, NULL_TREE);
22918 tree v4sf_ftype_v4sf_v4sf_v4sf
22919 = build_function_type_list (V4SF_type_node,
22920 V4SF_type_node, V4SF_type_node,
22921 V4SF_type_node, NULL_TREE);
22922 tree v8hi_ftype_v16qi
22923 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22925 tree v4si_ftype_v16qi
22926 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22928 tree v2di_ftype_v16qi
22929 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22931 tree v4si_ftype_v8hi
22932 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22934 tree v2di_ftype_v8hi
22935 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22937 tree v2di_ftype_v4si
22938 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22940 tree v2di_ftype_pv2di
22941 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22943 tree v16qi_ftype_v16qi_v16qi_int
22944 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22945 V16QI_type_node, integer_type_node,
22947 tree v16qi_ftype_v16qi_v16qi_v16qi
22948 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22949 V16QI_type_node, V16QI_type_node,
22951 tree v8hi_ftype_v8hi_v8hi_int
22952 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22953 V8HI_type_node, integer_type_node,
22955 tree v4si_ftype_v4si_v4si_int
22956 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22957 V4SI_type_node, integer_type_node,
22959 tree int_ftype_v2di_v2di
22960 = build_function_type_list (integer_type_node,
22961 V2DI_type_node, V2DI_type_node,
22963 tree int_ftype_v16qi_int_v16qi_int_int
22964 = build_function_type_list (integer_type_node,
22971 tree v16qi_ftype_v16qi_int_v16qi_int_int
22972 = build_function_type_list (V16QI_type_node,
22979 tree int_ftype_v16qi_v16qi_int
22980 = build_function_type_list (integer_type_node,
22986 /* SSE5 instructions */
22987 tree v2di_ftype_v2di_v2di_v2di
22988 = build_function_type_list (V2DI_type_node,
22994 tree v4si_ftype_v4si_v4si_v4si
22995 = build_function_type_list (V4SI_type_node,
23001 tree v4si_ftype_v4si_v4si_v2di
23002 = build_function_type_list (V4SI_type_node,
23008 tree v8hi_ftype_v8hi_v8hi_v8hi
23009 = build_function_type_list (V8HI_type_node,
23015 tree v8hi_ftype_v8hi_v8hi_v4si
23016 = build_function_type_list (V8HI_type_node,
23022 tree v2df_ftype_v2df_v2df_v16qi
23023 = build_function_type_list (V2DF_type_node,
23029 tree v4sf_ftype_v4sf_v4sf_v16qi
23030 = build_function_type_list (V4SF_type_node,
23036 tree v2di_ftype_v2di_si
23037 = build_function_type_list (V2DI_type_node,
23042 tree v4si_ftype_v4si_si
23043 = build_function_type_list (V4SI_type_node,
23048 tree v8hi_ftype_v8hi_si
23049 = build_function_type_list (V8HI_type_node,
23054 tree v16qi_ftype_v16qi_si
23055 = build_function_type_list (V16QI_type_node,
23059 tree v4sf_ftype_v4hi
23060 = build_function_type_list (V4SF_type_node,
23064 tree v4hi_ftype_v4sf
23065 = build_function_type_list (V4HI_type_node,
23069 tree v2di_ftype_v2di
23070 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
23072 tree v16qi_ftype_v8hi_v8hi
23073 = build_function_type_list (V16QI_type_node,
23074 V8HI_type_node, V8HI_type_node,
23076 tree v8hi_ftype_v4si_v4si
23077 = build_function_type_list (V8HI_type_node,
23078 V4SI_type_node, V4SI_type_node,
23080 tree v8hi_ftype_v16qi_v16qi
23081 = build_function_type_list (V8HI_type_node,
23082 V16QI_type_node, V16QI_type_node,
23084 tree v4hi_ftype_v8qi_v8qi
23085 = build_function_type_list (V4HI_type_node,
23086 V8QI_type_node, V8QI_type_node,
23088 tree unsigned_ftype_unsigned_uchar
23089 = build_function_type_list (unsigned_type_node,
23090 unsigned_type_node,
23091 unsigned_char_type_node,
23093 tree unsigned_ftype_unsigned_ushort
23094 = build_function_type_list (unsigned_type_node,
23095 unsigned_type_node,
23096 short_unsigned_type_node,
23098 tree unsigned_ftype_unsigned_unsigned
23099 = build_function_type_list (unsigned_type_node,
23100 unsigned_type_node,
23101 unsigned_type_node,
23103 tree uint64_ftype_uint64_uint64
23104 = build_function_type_list (long_long_unsigned_type_node,
23105 long_long_unsigned_type_node,
23106 long_long_unsigned_type_node,
23108 tree float_ftype_float
23109 = build_function_type_list (float_type_node,
23114 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
23116 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
23118 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
23120 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
23122 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
23124 tree v8sf_ftype_v8sf
23125 = build_function_type_list (V8SF_type_node,
23128 tree v8si_ftype_v8sf
23129 = build_function_type_list (V8SI_type_node,
23132 tree v8sf_ftype_v8si
23133 = build_function_type_list (V8SF_type_node,
23136 tree v4si_ftype_v4df
23137 = build_function_type_list (V4SI_type_node,
23140 tree v4df_ftype_v4df
23141 = build_function_type_list (V4DF_type_node,
23144 tree v4df_ftype_v4si
23145 = build_function_type_list (V4DF_type_node,
23148 tree v4df_ftype_v4sf
23149 = build_function_type_list (V4DF_type_node,
23152 tree v4sf_ftype_v4df
23153 = build_function_type_list (V4SF_type_node,
23156 tree v8sf_ftype_v8sf_v8sf
23157 = build_function_type_list (V8SF_type_node,
23158 V8SF_type_node, V8SF_type_node,
23160 tree v4df_ftype_v4df_v4df
23161 = build_function_type_list (V4DF_type_node,
23162 V4DF_type_node, V4DF_type_node,
23164 tree v8sf_ftype_v8sf_int
23165 = build_function_type_list (V8SF_type_node,
23166 V8SF_type_node, integer_type_node,
23168 tree v4si_ftype_v8si_int
23169 = build_function_type_list (V4SI_type_node,
23170 V8SI_type_node, integer_type_node,
23172 tree v4df_ftype_v4df_int
23173 = build_function_type_list (V4DF_type_node,
23174 V4DF_type_node, integer_type_node,
23176 tree v4sf_ftype_v8sf_int
23177 = build_function_type_list (V4SF_type_node,
23178 V8SF_type_node, integer_type_node,
23180 tree v2df_ftype_v4df_int
23181 = build_function_type_list (V2DF_type_node,
23182 V4DF_type_node, integer_type_node,
23184 tree v8sf_ftype_v8sf_v8sf_int
23185 = build_function_type_list (V8SF_type_node,
23186 V8SF_type_node, V8SF_type_node,
23189 tree v8sf_ftype_v8sf_v8sf_v8sf
23190 = build_function_type_list (V8SF_type_node,
23191 V8SF_type_node, V8SF_type_node,
23194 tree v4df_ftype_v4df_v4df_v4df
23195 = build_function_type_list (V4DF_type_node,
23196 V4DF_type_node, V4DF_type_node,
23199 tree v8si_ftype_v8si_v8si_int
23200 = build_function_type_list (V8SI_type_node,
23201 V8SI_type_node, V8SI_type_node,
23204 tree v4df_ftype_v4df_v4df_int
23205 = build_function_type_list (V4DF_type_node,
23206 V4DF_type_node, V4DF_type_node,
23209 tree v8sf_ftype_pcfloat
23210 = build_function_type_list (V8SF_type_node,
23213 tree v4df_ftype_pcdouble
23214 = build_function_type_list (V4DF_type_node,
23215 pcdouble_type_node,
23217 tree pcv4sf_type_node
23218 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
23219 tree pcv2df_type_node
23220 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
23221 tree v8sf_ftype_pcv4sf
23222 = build_function_type_list (V8SF_type_node,
23225 tree v4df_ftype_pcv2df
23226 = build_function_type_list (V4DF_type_node,
23229 tree v32qi_ftype_pcchar
23230 = build_function_type_list (V32QI_type_node,
23233 tree void_ftype_pchar_v32qi
23234 = build_function_type_list (void_type_node,
23235 pchar_type_node, V32QI_type_node,
23237 tree v8si_ftype_v8si_v4si_int
23238 = build_function_type_list (V8SI_type_node,
23239 V8SI_type_node, V4SI_type_node,
23242 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
23243 tree void_ftype_pv4di_v4di
23244 = build_function_type_list (void_type_node,
23245 pv4di_type_node, V4DI_type_node,
23247 tree v8sf_ftype_v8sf_v4sf_int
23248 = build_function_type_list (V8SF_type_node,
23249 V8SF_type_node, V4SF_type_node,
23252 tree v4df_ftype_v4df_v2df_int
23253 = build_function_type_list (V4DF_type_node,
23254 V4DF_type_node, V2DF_type_node,
23257 tree void_ftype_pfloat_v8sf
23258 = build_function_type_list (void_type_node,
23259 pfloat_type_node, V8SF_type_node,
23261 tree void_ftype_pdouble_v4df
23262 = build_function_type_list (void_type_node,
23263 pdouble_type_node, V4DF_type_node,
23265 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
23266 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
23267 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
23268 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
23269 tree pcv8sf_type_node
23270 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
23271 tree pcv4df_type_node
23272 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
23273 tree v8sf_ftype_pcv8sf_v8sf
23274 = build_function_type_list (V8SF_type_node,
23275 pcv8sf_type_node, V8SF_type_node,
23277 tree v4df_ftype_pcv4df_v4df
23278 = build_function_type_list (V4DF_type_node,
23279 pcv4df_type_node, V4DF_type_node,
23281 tree v4sf_ftype_pcv4sf_v4sf
23282 = build_function_type_list (V4SF_type_node,
23283 pcv4sf_type_node, V4SF_type_node,
23285 tree v2df_ftype_pcv2df_v2df
23286 = build_function_type_list (V2DF_type_node,
23287 pcv2df_type_node, V2DF_type_node,
23289 tree void_ftype_pv8sf_v8sf_v8sf
23290 = build_function_type_list (void_type_node,
23291 pv8sf_type_node, V8SF_type_node,
23294 tree void_ftype_pv4df_v4df_v4df
23295 = build_function_type_list (void_type_node,
23296 pv4df_type_node, V4DF_type_node,
23299 tree void_ftype_pv4sf_v4sf_v4sf
23300 = build_function_type_list (void_type_node,
23301 pv4sf_type_node, V4SF_type_node,
23304 tree void_ftype_pv2df_v2df_v2df
23305 = build_function_type_list (void_type_node,
23306 pv2df_type_node, V2DF_type_node,
23309 tree v4df_ftype_v2df
23310 = build_function_type_list (V4DF_type_node,
23313 tree v8sf_ftype_v4sf
23314 = build_function_type_list (V8SF_type_node,
23317 tree v8si_ftype_v4si
23318 = build_function_type_list (V8SI_type_node,
23321 tree v2df_ftype_v4df
23322 = build_function_type_list (V2DF_type_node,
23325 tree v4sf_ftype_v8sf
23326 = build_function_type_list (V4SF_type_node,
23329 tree v4si_ftype_v8si
23330 = build_function_type_list (V4SI_type_node,
23333 tree int_ftype_v4df
23334 = build_function_type_list (integer_type_node,
23337 tree int_ftype_v8sf
23338 = build_function_type_list (integer_type_node,
23341 tree int_ftype_v8sf_v8sf
23342 = build_function_type_list (integer_type_node,
23343 V8SF_type_node, V8SF_type_node,
23345 tree int_ftype_v4di_v4di
23346 = build_function_type_list (integer_type_node,
23347 V4DI_type_node, V4DI_type_node,
23349 tree int_ftype_v4df_v4df
23350 = build_function_type_list (integer_type_node,
23351 V4DF_type_node, V4DF_type_node,
23353 tree v8sf_ftype_v8sf_v8si
23354 = build_function_type_list (V8SF_type_node,
23355 V8SF_type_node, V8SI_type_node,
23357 tree v4df_ftype_v4df_v4di
23358 = build_function_type_list (V4DF_type_node,
23359 V4DF_type_node, V4DI_type_node,
23361 tree v4sf_ftype_v4sf_v4si
23362 = build_function_type_list (V4SF_type_node,
23363 V4SF_type_node, V4SI_type_node, NULL_TREE);
23364 tree v2df_ftype_v2df_v2di
23365 = build_function_type_list (V2DF_type_node,
23366 V2DF_type_node, V2DI_type_node, NULL_TREE);
23370 /* Add all special builtins with variable number of operands. */
23371 for (i = 0, d = bdesc_special_args;
23372 i < ARRAY_SIZE (bdesc_special_args);
23380 switch ((enum ix86_special_builtin_type) d->flag)
23382 case VOID_FTYPE_VOID:
23383 type = void_ftype_void;
23385 case V32QI_FTYPE_PCCHAR:
23386 type = v32qi_ftype_pcchar;
23388 case V16QI_FTYPE_PCCHAR:
23389 type = v16qi_ftype_pcchar;
23391 case V8SF_FTYPE_PCV4SF:
23392 type = v8sf_ftype_pcv4sf;
23394 case V8SF_FTYPE_PCFLOAT:
23395 type = v8sf_ftype_pcfloat;
23397 case V4DF_FTYPE_PCV2DF:
23398 type = v4df_ftype_pcv2df;
23400 case V4DF_FTYPE_PCDOUBLE:
23401 type = v4df_ftype_pcdouble;
23403 case V4SF_FTYPE_PCFLOAT:
23404 type = v4sf_ftype_pcfloat;
23406 case V2DI_FTYPE_PV2DI:
23407 type = v2di_ftype_pv2di;
23409 case V2DF_FTYPE_PCDOUBLE:
23410 type = v2df_ftype_pcdouble;
23412 case V8SF_FTYPE_PCV8SF_V8SF:
23413 type = v8sf_ftype_pcv8sf_v8sf;
23415 case V4DF_FTYPE_PCV4DF_V4DF:
23416 type = v4df_ftype_pcv4df_v4df;
23418 case V4SF_FTYPE_V4SF_PCV2SF:
23419 type = v4sf_ftype_v4sf_pcv2sf;
23421 case V4SF_FTYPE_PCV4SF_V4SF:
23422 type = v4sf_ftype_pcv4sf_v4sf;
23424 case V2DF_FTYPE_V2DF_PCDOUBLE:
23425 type = v2df_ftype_v2df_pcdouble;
23427 case V2DF_FTYPE_PCV2DF_V2DF:
23428 type = v2df_ftype_pcv2df_v2df;
23430 case VOID_FTYPE_PV2SF_V4SF:
23431 type = void_ftype_pv2sf_v4sf;
23433 case VOID_FTYPE_PV4DI_V4DI:
23434 type = void_ftype_pv4di_v4di;
23436 case VOID_FTYPE_PV2DI_V2DI:
23437 type = void_ftype_pv2di_v2di;
23439 case VOID_FTYPE_PCHAR_V32QI:
23440 type = void_ftype_pchar_v32qi;
23442 case VOID_FTYPE_PCHAR_V16QI:
23443 type = void_ftype_pchar_v16qi;
23445 case VOID_FTYPE_PFLOAT_V8SF:
23446 type = void_ftype_pfloat_v8sf;
23448 case VOID_FTYPE_PFLOAT_V4SF:
23449 type = void_ftype_pfloat_v4sf;
23451 case VOID_FTYPE_PDOUBLE_V4DF:
23452 type = void_ftype_pdouble_v4df;
23454 case VOID_FTYPE_PDOUBLE_V2DF:
23455 type = void_ftype_pdouble_v2df;
23457 case VOID_FTYPE_PDI_DI:
23458 type = void_ftype_pdi_di;
23460 case VOID_FTYPE_PINT_INT:
23461 type = void_ftype_pint_int;
23463 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23464 type = void_ftype_pv8sf_v8sf_v8sf;
23466 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23467 type = void_ftype_pv4df_v4df_v4df;
23469 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23470 type = void_ftype_pv4sf_v4sf_v4sf;
23472 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23473 type = void_ftype_pv2df_v2df_v2df;
23476 gcc_unreachable ();
23479 def_builtin (d->mask, d->name, type, d->code);
23482 /* Add all builtins with variable number of operands. */
23483 for (i = 0, d = bdesc_args;
23484 i < ARRAY_SIZE (bdesc_args);
23492 switch ((enum ix86_builtin_type) d->flag)
23494 case FLOAT_FTYPE_FLOAT:
23495 type = float_ftype_float;
23497 case INT_FTYPE_V8SF_V8SF_PTEST:
23498 type = int_ftype_v8sf_v8sf;
23500 case INT_FTYPE_V4DI_V4DI_PTEST:
23501 type = int_ftype_v4di_v4di;
23503 case INT_FTYPE_V4DF_V4DF_PTEST:
23504 type = int_ftype_v4df_v4df;
23506 case INT_FTYPE_V4SF_V4SF_PTEST:
23507 type = int_ftype_v4sf_v4sf;
23509 case INT_FTYPE_V2DI_V2DI_PTEST:
23510 type = int_ftype_v2di_v2di;
23512 case INT_FTYPE_V2DF_V2DF_PTEST:
23513 type = int_ftype_v2df_v2df;
23515 case INT64_FTYPE_V4SF:
23516 type = int64_ftype_v4sf;
23518 case INT64_FTYPE_V2DF:
23519 type = int64_ftype_v2df;
23521 case INT_FTYPE_V16QI:
23522 type = int_ftype_v16qi;
23524 case INT_FTYPE_V8QI:
23525 type = int_ftype_v8qi;
23527 case INT_FTYPE_V8SF:
23528 type = int_ftype_v8sf;
23530 case INT_FTYPE_V4DF:
23531 type = int_ftype_v4df;
23533 case INT_FTYPE_V4SF:
23534 type = int_ftype_v4sf;
23536 case INT_FTYPE_V2DF:
23537 type = int_ftype_v2df;
23539 case V16QI_FTYPE_V16QI:
23540 type = v16qi_ftype_v16qi;
23542 case V8SI_FTYPE_V8SF:
23543 type = v8si_ftype_v8sf;
23545 case V8SI_FTYPE_V4SI:
23546 type = v8si_ftype_v4si;
23548 case V8HI_FTYPE_V8HI:
23549 type = v8hi_ftype_v8hi;
23551 case V8HI_FTYPE_V16QI:
23552 type = v8hi_ftype_v16qi;
23554 case V8QI_FTYPE_V8QI:
23555 type = v8qi_ftype_v8qi;
23557 case V8SF_FTYPE_V8SF:
23558 type = v8sf_ftype_v8sf;
23560 case V8SF_FTYPE_V8SI:
23561 type = v8sf_ftype_v8si;
23563 case V8SF_FTYPE_V4SF:
23564 type = v8sf_ftype_v4sf;
23566 case V4SI_FTYPE_V4DF:
23567 type = v4si_ftype_v4df;
23569 case V4SI_FTYPE_V4SI:
23570 type = v4si_ftype_v4si;
23572 case V4SI_FTYPE_V16QI:
23573 type = v4si_ftype_v16qi;
23575 case V4SI_FTYPE_V8SI:
23576 type = v4si_ftype_v8si;
23578 case V4SI_FTYPE_V8HI:
23579 type = v4si_ftype_v8hi;
23581 case V4SI_FTYPE_V4SF:
23582 type = v4si_ftype_v4sf;
23584 case V4SI_FTYPE_V2DF:
23585 type = v4si_ftype_v2df;
23587 case V4HI_FTYPE_V4HI:
23588 type = v4hi_ftype_v4hi;
23590 case V4DF_FTYPE_V4DF:
23591 type = v4df_ftype_v4df;
23593 case V4DF_FTYPE_V4SI:
23594 type = v4df_ftype_v4si;
23596 case V4DF_FTYPE_V4SF:
23597 type = v4df_ftype_v4sf;
23599 case V4DF_FTYPE_V2DF:
23600 type = v4df_ftype_v2df;
23602 case V4SF_FTYPE_V4SF:
23603 case V4SF_FTYPE_V4SF_VEC_MERGE:
23604 type = v4sf_ftype_v4sf;
23606 case V4SF_FTYPE_V8SF:
23607 type = v4sf_ftype_v8sf;
23609 case V4SF_FTYPE_V4SI:
23610 type = v4sf_ftype_v4si;
23612 case V4SF_FTYPE_V4DF:
23613 type = v4sf_ftype_v4df;
23615 case V4SF_FTYPE_V2DF:
23616 type = v4sf_ftype_v2df;
23618 case V2DI_FTYPE_V2DI:
23619 type = v2di_ftype_v2di;
23621 case V2DI_FTYPE_V16QI:
23622 type = v2di_ftype_v16qi;
23624 case V2DI_FTYPE_V8HI:
23625 type = v2di_ftype_v8hi;
23627 case V2DI_FTYPE_V4SI:
23628 type = v2di_ftype_v4si;
23630 case V2SI_FTYPE_V2SI:
23631 type = v2si_ftype_v2si;
23633 case V2SI_FTYPE_V4SF:
23634 type = v2si_ftype_v4sf;
23636 case V2SI_FTYPE_V2DF:
23637 type = v2si_ftype_v2df;
23639 case V2SI_FTYPE_V2SF:
23640 type = v2si_ftype_v2sf;
23642 case V2DF_FTYPE_V4DF:
23643 type = v2df_ftype_v4df;
23645 case V2DF_FTYPE_V4SF:
23646 type = v2df_ftype_v4sf;
23648 case V2DF_FTYPE_V2DF:
23649 case V2DF_FTYPE_V2DF_VEC_MERGE:
23650 type = v2df_ftype_v2df;
23652 case V2DF_FTYPE_V2SI:
23653 type = v2df_ftype_v2si;
23655 case V2DF_FTYPE_V4SI:
23656 type = v2df_ftype_v4si;
23658 case V2SF_FTYPE_V2SF:
23659 type = v2sf_ftype_v2sf;
23661 case V2SF_FTYPE_V2SI:
23662 type = v2sf_ftype_v2si;
23664 case V16QI_FTYPE_V16QI_V16QI:
23665 type = v16qi_ftype_v16qi_v16qi;
23667 case V16QI_FTYPE_V8HI_V8HI:
23668 type = v16qi_ftype_v8hi_v8hi;
23670 case V8QI_FTYPE_V8QI_V8QI:
23671 type = v8qi_ftype_v8qi_v8qi;
23673 case V8QI_FTYPE_V4HI_V4HI:
23674 type = v8qi_ftype_v4hi_v4hi;
23676 case V8HI_FTYPE_V8HI_V8HI:
23677 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23678 type = v8hi_ftype_v8hi_v8hi;
23680 case V8HI_FTYPE_V16QI_V16QI:
23681 type = v8hi_ftype_v16qi_v16qi;
23683 case V8HI_FTYPE_V4SI_V4SI:
23684 type = v8hi_ftype_v4si_v4si;
23686 case V8HI_FTYPE_V8HI_SI_COUNT:
23687 type = v8hi_ftype_v8hi_int;
23689 case V8SF_FTYPE_V8SF_V8SF:
23690 type = v8sf_ftype_v8sf_v8sf;
23692 case V8SF_FTYPE_V8SF_V8SI:
23693 type = v8sf_ftype_v8sf_v8si;
23695 case V4SI_FTYPE_V4SI_V4SI:
23696 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23697 type = v4si_ftype_v4si_v4si;
23699 case V4SI_FTYPE_V8HI_V8HI:
23700 type = v4si_ftype_v8hi_v8hi;
23702 case V4SI_FTYPE_V4SF_V4SF:
23703 type = v4si_ftype_v4sf_v4sf;
23705 case V4SI_FTYPE_V2DF_V2DF:
23706 type = v4si_ftype_v2df_v2df;
23708 case V4SI_FTYPE_V4SI_SI_COUNT:
23709 type = v4si_ftype_v4si_int;
23711 case V4HI_FTYPE_V4HI_V4HI:
23712 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23713 type = v4hi_ftype_v4hi_v4hi;
23715 case V4HI_FTYPE_V8QI_V8QI:
23716 type = v4hi_ftype_v8qi_v8qi;
23718 case V4HI_FTYPE_V2SI_V2SI:
23719 type = v4hi_ftype_v2si_v2si;
23721 case V4HI_FTYPE_V4HI_SI_COUNT:
23722 type = v4hi_ftype_v4hi_int;
23724 case V4DF_FTYPE_V4DF_V4DF:
23725 type = v4df_ftype_v4df_v4df;
23727 case V4DF_FTYPE_V4DF_V4DI:
23728 type = v4df_ftype_v4df_v4di;
23730 case V4SF_FTYPE_V4SF_V4SF:
23731 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23732 type = v4sf_ftype_v4sf_v4sf;
23734 case V4SF_FTYPE_V4SF_V4SI:
23735 type = v4sf_ftype_v4sf_v4si;
23737 case V4SF_FTYPE_V4SF_V2SI:
23738 type = v4sf_ftype_v4sf_v2si;
23740 case V4SF_FTYPE_V4SF_V2DF:
23741 type = v4sf_ftype_v4sf_v2df;
23743 case V4SF_FTYPE_V4SF_DI:
23744 type = v4sf_ftype_v4sf_int64;
23746 case V4SF_FTYPE_V4SF_SI:
23747 type = v4sf_ftype_v4sf_int;
23749 case V2DI_FTYPE_V2DI_V2DI:
23750 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23751 type = v2di_ftype_v2di_v2di;
23753 case V2DI_FTYPE_V16QI_V16QI:
23754 type = v2di_ftype_v16qi_v16qi;
23756 case V2DI_FTYPE_V4SI_V4SI:
23757 type = v2di_ftype_v4si_v4si;
23759 case V2DI_FTYPE_V2DI_V16QI:
23760 type = v2di_ftype_v2di_v16qi;
23762 case V2DI_FTYPE_V2DF_V2DF:
23763 type = v2di_ftype_v2df_v2df;
23765 case V2DI_FTYPE_V2DI_SI_COUNT:
23766 type = v2di_ftype_v2di_int;
23768 case V2SI_FTYPE_V2SI_V2SI:
23769 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23770 type = v2si_ftype_v2si_v2si;
23772 case V2SI_FTYPE_V4HI_V4HI:
23773 type = v2si_ftype_v4hi_v4hi;
23775 case V2SI_FTYPE_V2SF_V2SF:
23776 type = v2si_ftype_v2sf_v2sf;
23778 case V2SI_FTYPE_V2SI_SI_COUNT:
23779 type = v2si_ftype_v2si_int;
23781 case V2DF_FTYPE_V2DF_V2DF:
23782 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23783 type = v2df_ftype_v2df_v2df;
23785 case V2DF_FTYPE_V2DF_V4SF:
23786 type = v2df_ftype_v2df_v4sf;
23788 case V2DF_FTYPE_V2DF_V2DI:
23789 type = v2df_ftype_v2df_v2di;
23791 case V2DF_FTYPE_V2DF_DI:
23792 type = v2df_ftype_v2df_int64;
23794 case V2DF_FTYPE_V2DF_SI:
23795 type = v2df_ftype_v2df_int;
23797 case V2SF_FTYPE_V2SF_V2SF:
23798 type = v2sf_ftype_v2sf_v2sf;
23800 case V1DI_FTYPE_V1DI_V1DI:
23801 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23802 type = v1di_ftype_v1di_v1di;
23804 case V1DI_FTYPE_V8QI_V8QI:
23805 type = v1di_ftype_v8qi_v8qi;
23807 case V1DI_FTYPE_V2SI_V2SI:
23808 type = v1di_ftype_v2si_v2si;
23810 case V1DI_FTYPE_V1DI_SI_COUNT:
23811 type = v1di_ftype_v1di_int;
23813 case UINT64_FTYPE_UINT64_UINT64:
23814 type = uint64_ftype_uint64_uint64;
23816 case UINT_FTYPE_UINT_UINT:
23817 type = unsigned_ftype_unsigned_unsigned;
23819 case UINT_FTYPE_UINT_USHORT:
23820 type = unsigned_ftype_unsigned_ushort;
23822 case UINT_FTYPE_UINT_UCHAR:
23823 type = unsigned_ftype_unsigned_uchar;
23825 case V8HI_FTYPE_V8HI_INT:
23826 type = v8hi_ftype_v8hi_int;
23828 case V8SF_FTYPE_V8SF_INT:
23829 type = v8sf_ftype_v8sf_int;
23831 case V4SI_FTYPE_V4SI_INT:
23832 type = v4si_ftype_v4si_int;
23834 case V4SI_FTYPE_V8SI_INT:
23835 type = v4si_ftype_v8si_int;
23837 case V4HI_FTYPE_V4HI_INT:
23838 type = v4hi_ftype_v4hi_int;
23840 case V4DF_FTYPE_V4DF_INT:
23841 type = v4df_ftype_v4df_int;
23843 case V4SF_FTYPE_V4SF_INT:
23844 type = v4sf_ftype_v4sf_int;
23846 case V4SF_FTYPE_V8SF_INT:
23847 type = v4sf_ftype_v8sf_int;
23849 case V2DI_FTYPE_V2DI_INT:
23850 case V2DI2TI_FTYPE_V2DI_INT:
23851 type = v2di_ftype_v2di_int;
23853 case V2DF_FTYPE_V2DF_INT:
23854 type = v2df_ftype_v2df_int;
23856 case V2DF_FTYPE_V4DF_INT:
23857 type = v2df_ftype_v4df_int;
23859 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23860 type = v16qi_ftype_v16qi_v16qi_v16qi;
23862 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23863 type = v8sf_ftype_v8sf_v8sf_v8sf;
23865 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23866 type = v4df_ftype_v4df_v4df_v4df;
23868 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23869 type = v4sf_ftype_v4sf_v4sf_v4sf;
23871 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23872 type = v2df_ftype_v2df_v2df_v2df;
23874 case V16QI_FTYPE_V16QI_V16QI_INT:
23875 type = v16qi_ftype_v16qi_v16qi_int;
23877 case V8SI_FTYPE_V8SI_V8SI_INT:
23878 type = v8si_ftype_v8si_v8si_int;
23880 case V8SI_FTYPE_V8SI_V4SI_INT:
23881 type = v8si_ftype_v8si_v4si_int;
23883 case V8HI_FTYPE_V8HI_V8HI_INT:
23884 type = v8hi_ftype_v8hi_v8hi_int;
23886 case V8SF_FTYPE_V8SF_V8SF_INT:
23887 type = v8sf_ftype_v8sf_v8sf_int;
23889 case V8SF_FTYPE_V8SF_V4SF_INT:
23890 type = v8sf_ftype_v8sf_v4sf_int;
23892 case V4SI_FTYPE_V4SI_V4SI_INT:
23893 type = v4si_ftype_v4si_v4si_int;
23895 case V4DF_FTYPE_V4DF_V4DF_INT:
23896 type = v4df_ftype_v4df_v4df_int;
23898 case V4DF_FTYPE_V4DF_V2DF_INT:
23899 type = v4df_ftype_v4df_v2df_int;
23901 case V4SF_FTYPE_V4SF_V4SF_INT:
23902 type = v4sf_ftype_v4sf_v4sf_int;
23904 case V2DI_FTYPE_V2DI_V2DI_INT:
23905 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23906 type = v2di_ftype_v2di_v2di_int;
23908 case V2DF_FTYPE_V2DF_V2DF_INT:
23909 type = v2df_ftype_v2df_v2df_int;
23911 case V2DI_FTYPE_V2DI_UINT_UINT:
23912 type = v2di_ftype_v2di_unsigned_unsigned;
23914 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23915 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23917 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23918 type = v1di_ftype_v1di_v1di_int;
23921 gcc_unreachable ();
23924 def_builtin_const (d->mask, d->name, type, d->code);
23927 /* pcmpestr[im] insns. */
23928 for (i = 0, d = bdesc_pcmpestr;
23929 i < ARRAY_SIZE (bdesc_pcmpestr);
23932 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23933 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23935 ftype = int_ftype_v16qi_int_v16qi_int_int;
23936 def_builtin_const (d->mask, d->name, ftype, d->code);
23939 /* pcmpistr[im] insns. */
23940 for (i = 0, d = bdesc_pcmpistr;
23941 i < ARRAY_SIZE (bdesc_pcmpistr);
23944 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23945 ftype = v16qi_ftype_v16qi_v16qi_int;
23947 ftype = int_ftype_v16qi_v16qi_int;
23948 def_builtin_const (d->mask, d->name, ftype, d->code);
23951 /* comi/ucomi insns. */
23952 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23953 if (d->mask == OPTION_MASK_ISA_SSE2)
23954 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23956 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23959 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23960 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23962 /* SSE or 3DNow!A */
23963 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23966 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23968 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23969 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23972 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23973 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23976 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23977 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23978 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23979 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23980 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23981 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23984 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23987 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23988 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23990 /* Access to the vec_init patterns. */
23991 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23992 integer_type_node, NULL_TREE);
23993 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23995 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23996 short_integer_type_node,
23997 short_integer_type_node,
23998 short_integer_type_node, NULL_TREE);
23999 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
24001 ftype = build_function_type_list (V8QI_type_node, char_type_node,
24002 char_type_node, char_type_node,
24003 char_type_node, char_type_node,
24004 char_type_node, char_type_node,
24005 char_type_node, NULL_TREE);
24006 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
24008 /* Access to the vec_extract patterns. */
24009 ftype = build_function_type_list (double_type_node, V2DF_type_node,
24010 integer_type_node, NULL_TREE);
24011 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
24013 ftype = build_function_type_list (long_long_integer_type_node,
24014 V2DI_type_node, integer_type_node,
24016 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
24018 ftype = build_function_type_list (float_type_node, V4SF_type_node,
24019 integer_type_node, NULL_TREE);
24020 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
24022 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
24023 integer_type_node, NULL_TREE);
24024 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
24026 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
24027 integer_type_node, NULL_TREE);
24028 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
24030 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
24031 integer_type_node, NULL_TREE);
24032 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
24034 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
24035 integer_type_node, NULL_TREE);
24036 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
24038 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
24039 integer_type_node, NULL_TREE);
24040 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
24042 /* Access to the vec_set patterns. */
24043 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
24045 integer_type_node, NULL_TREE);
24046 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
24048 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
24050 integer_type_node, NULL_TREE);
24051 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
24053 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
24055 integer_type_node, NULL_TREE);
24056 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
24058 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
24060 integer_type_node, NULL_TREE);
24061 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
24063 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
24065 integer_type_node, NULL_TREE);
24066 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
24068 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
24070 integer_type_node, NULL_TREE);
24071 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
24073 /* Add SSE5 multi-arg argument instructions */
24074 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24076 tree mtype = NULL_TREE;
24081 switch ((enum multi_arg_type)d->flag)
24083 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
24084 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
24085 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
24086 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
24087 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
24088 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
24089 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
24090 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
24091 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
24092 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
24093 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
24094 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
24095 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
24096 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
24097 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
24098 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
24099 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
24100 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
24101 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
24102 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
24103 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
24104 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
24105 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
24106 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
24107 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
24108 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
24109 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
24110 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
24111 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
24112 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
24113 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
24114 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
24115 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
24116 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
24117 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
24118 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
24119 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
24120 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
24121 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
24122 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
24123 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
24124 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
24125 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
24126 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
24127 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
24128 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
24129 case MULTI_ARG_UNKNOWN:
24131 gcc_unreachable ();
24135 def_builtin_const (d->mask, d->name, mtype, d->code);
24139 /* Internal method for ix86_init_builtins. */
24142 ix86_init_builtins_va_builtins_abi (void)
24144 tree ms_va_ref, sysv_va_ref;
24145 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24146 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24147 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24148 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24152 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24153 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24154 ms_va_ref = build_reference_type (ms_va_list_type_node);
24156 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24159 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24160 fnvoid_va_start_ms =
24161 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24162 fnvoid_va_end_sysv =
24163 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24164 fnvoid_va_start_sysv =
24165 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24167 fnvoid_va_copy_ms =
24168 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24170 fnvoid_va_copy_sysv =
24171 build_function_type_list (void_type_node, sysv_va_ref,
24172 sysv_va_ref, NULL_TREE);
24174 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24175 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24176 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24177 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24178 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24179 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24180 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24181 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24182 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24183 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24184 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24185 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24189 ix86_init_builtins (void)
24191 tree float128_type_node = make_node (REAL_TYPE);
24194 /* The __float80 type. */
24195 if (TYPE_MODE (long_double_type_node) == XFmode)
24196 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
24200 /* The __float80 type. */
24201 tree float80_type_node = make_node (REAL_TYPE);
24203 TYPE_PRECISION (float80_type_node) = 80;
24204 layout_type (float80_type_node);
24205 (*lang_hooks.types.register_builtin_type) (float80_type_node,
24209 /* The __float128 type. */
24210 TYPE_PRECISION (float128_type_node) = 128;
24211 layout_type (float128_type_node);
24212 (*lang_hooks.types.register_builtin_type) (float128_type_node,
24215 /* TFmode support builtins. */
24216 ftype = build_function_type (float128_type_node, void_list_node);
24217 decl = add_builtin_function ("__builtin_infq", ftype,
24218 IX86_BUILTIN_INFQ, BUILT_IN_MD,
24220 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
24222 decl = add_builtin_function ("__builtin_huge_valq", ftype,
24223 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
24225 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
24227 /* We will expand them to normal call if SSE2 isn't available since
24228 they are used by libgcc. */
24229 ftype = build_function_type_list (float128_type_node,
24230 float128_type_node,
24232 decl = add_builtin_function ("__builtin_fabsq", ftype,
24233 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
24234 "__fabstf2", NULL_TREE);
24235 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
24236 TREE_READONLY (decl) = 1;
24238 ftype = build_function_type_list (float128_type_node,
24239 float128_type_node,
24240 float128_type_node,
24242 decl = add_builtin_function ("__builtin_copysignq", ftype,
24243 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
24244 "__copysigntf3", NULL_TREE);
24245 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
24246 TREE_READONLY (decl) = 1;
24248 ix86_init_mmx_sse_builtins ();
24250 ix86_init_builtins_va_builtins_abi ();
24253 /* Errors in the source file can cause expand_expr to return const0_rtx
24254 where we expect a vector. To avoid crashing, use one of the vector
24255 clear instructions. */
24257 safe_vector_operand (rtx x, enum machine_mode mode)
24259 if (x == const0_rtx)
24260 x = CONST0_RTX (mode);
24264 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24267 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24270 tree arg0 = CALL_EXPR_ARG (exp, 0);
24271 tree arg1 = CALL_EXPR_ARG (exp, 1);
24272 rtx op0 = expand_normal (arg0);
24273 rtx op1 = expand_normal (arg1);
24274 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24275 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24276 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24278 if (VECTOR_MODE_P (mode0))
24279 op0 = safe_vector_operand (op0, mode0);
24280 if (VECTOR_MODE_P (mode1))
24281 op1 = safe_vector_operand (op1, mode1);
24283 if (optimize || !target
24284 || GET_MODE (target) != tmode
24285 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24286 target = gen_reg_rtx (tmode);
24288 if (GET_MODE (op1) == SImode && mode1 == TImode)
24290 rtx x = gen_reg_rtx (V4SImode);
24291 emit_insn (gen_sse2_loadd (x, op1));
24292 op1 = gen_lowpart (TImode, x);
24295 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24296 op0 = copy_to_mode_reg (mode0, op0);
24297 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24298 op1 = copy_to_mode_reg (mode1, op1);
24300 pat = GEN_FCN (icode) (target, op0, op1);
24309 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24312 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24313 enum multi_arg_type m_type,
24314 enum rtx_code sub_code)
24319 bool comparison_p = false;
24321 bool last_arg_constant = false;
24322 int num_memory = 0;
24325 enum machine_mode mode;
24328 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24332 case MULTI_ARG_3_SF:
24333 case MULTI_ARG_3_DF:
24334 case MULTI_ARG_3_DI:
24335 case MULTI_ARG_3_SI:
24336 case MULTI_ARG_3_SI_DI:
24337 case MULTI_ARG_3_HI:
24338 case MULTI_ARG_3_HI_SI:
24339 case MULTI_ARG_3_QI:
24340 case MULTI_ARG_3_PERMPS:
24341 case MULTI_ARG_3_PERMPD:
24345 case MULTI_ARG_2_SF:
24346 case MULTI_ARG_2_DF:
24347 case MULTI_ARG_2_DI:
24348 case MULTI_ARG_2_SI:
24349 case MULTI_ARG_2_HI:
24350 case MULTI_ARG_2_QI:
24354 case MULTI_ARG_2_DI_IMM:
24355 case MULTI_ARG_2_SI_IMM:
24356 case MULTI_ARG_2_HI_IMM:
24357 case MULTI_ARG_2_QI_IMM:
24359 last_arg_constant = true;
24362 case MULTI_ARG_1_SF:
24363 case MULTI_ARG_1_DF:
24364 case MULTI_ARG_1_DI:
24365 case MULTI_ARG_1_SI:
24366 case MULTI_ARG_1_HI:
24367 case MULTI_ARG_1_QI:
24368 case MULTI_ARG_1_SI_DI:
24369 case MULTI_ARG_1_HI_DI:
24370 case MULTI_ARG_1_HI_SI:
24371 case MULTI_ARG_1_QI_DI:
24372 case MULTI_ARG_1_QI_SI:
24373 case MULTI_ARG_1_QI_HI:
24374 case MULTI_ARG_1_PH2PS:
24375 case MULTI_ARG_1_PS2PH:
24379 case MULTI_ARG_2_SF_CMP:
24380 case MULTI_ARG_2_DF_CMP:
24381 case MULTI_ARG_2_DI_CMP:
24382 case MULTI_ARG_2_SI_CMP:
24383 case MULTI_ARG_2_HI_CMP:
24384 case MULTI_ARG_2_QI_CMP:
24386 comparison_p = true;
24389 case MULTI_ARG_2_SF_TF:
24390 case MULTI_ARG_2_DF_TF:
24391 case MULTI_ARG_2_DI_TF:
24392 case MULTI_ARG_2_SI_TF:
24393 case MULTI_ARG_2_HI_TF:
24394 case MULTI_ARG_2_QI_TF:
24399 case MULTI_ARG_UNKNOWN:
24401 gcc_unreachable ();
24404 if (optimize || !target
24405 || GET_MODE (target) != tmode
24406 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24407 target = gen_reg_rtx (tmode);
24409 gcc_assert (nargs <= 4);
24411 for (i = 0; i < nargs; i++)
24413 tree arg = CALL_EXPR_ARG (exp, i);
24414 rtx op = expand_normal (arg);
24415 int adjust = (comparison_p) ? 1 : 0;
24416 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24418 if (last_arg_constant && i == nargs-1)
24420 if (!CONST_INT_P (op))
24422 error ("last argument must be an immediate");
24423 return gen_reg_rtx (tmode);
24428 if (VECTOR_MODE_P (mode))
24429 op = safe_vector_operand (op, mode);
24431 /* If we aren't optimizing, only allow one memory operand to be
24433 if (memory_operand (op, mode))
24436 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24439 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24441 op = force_reg (mode, op);
24445 args[i].mode = mode;
24451 pat = GEN_FCN (icode) (target, args[0].op);
24456 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24457 GEN_INT ((int)sub_code));
24458 else if (! comparison_p)
24459 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24462 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24466 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24471 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24475 gcc_unreachable ();
24485 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24486 insns with vec_merge. */
24489 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24493 tree arg0 = CALL_EXPR_ARG (exp, 0);
24494 rtx op1, op0 = expand_normal (arg0);
24495 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24496 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24498 if (optimize || !target
24499 || GET_MODE (target) != tmode
24500 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24501 target = gen_reg_rtx (tmode);
24503 if (VECTOR_MODE_P (mode0))
24504 op0 = safe_vector_operand (op0, mode0);
24506 if ((optimize && !register_operand (op0, mode0))
24507 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24508 op0 = copy_to_mode_reg (mode0, op0);
24511 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24512 op1 = copy_to_mode_reg (mode0, op1);
24514 pat = GEN_FCN (icode) (target, op0, op1);
24521 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24524 ix86_expand_sse_compare (const struct builtin_description *d,
24525 tree exp, rtx target, bool swap)
24528 tree arg0 = CALL_EXPR_ARG (exp, 0);
24529 tree arg1 = CALL_EXPR_ARG (exp, 1);
24530 rtx op0 = expand_normal (arg0);
24531 rtx op1 = expand_normal (arg1);
24533 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24534 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24535 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24536 enum rtx_code comparison = d->comparison;
24538 if (VECTOR_MODE_P (mode0))
24539 op0 = safe_vector_operand (op0, mode0);
24540 if (VECTOR_MODE_P (mode1))
24541 op1 = safe_vector_operand (op1, mode1);
24543 /* Swap operands if we have a comparison that isn't available in
24547 rtx tmp = gen_reg_rtx (mode1);
24548 emit_move_insn (tmp, op1);
24553 if (optimize || !target
24554 || GET_MODE (target) != tmode
24555 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24556 target = gen_reg_rtx (tmode);
24558 if ((optimize && !register_operand (op0, mode0))
24559 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24560 op0 = copy_to_mode_reg (mode0, op0);
24561 if ((optimize && !register_operand (op1, mode1))
24562 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24563 op1 = copy_to_mode_reg (mode1, op1);
24565 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24566 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24573 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24576 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24580 tree arg0 = CALL_EXPR_ARG (exp, 0);
24581 tree arg1 = CALL_EXPR_ARG (exp, 1);
24582 rtx op0 = expand_normal (arg0);
24583 rtx op1 = expand_normal (arg1);
24584 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24585 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24586 enum rtx_code comparison = d->comparison;
24588 if (VECTOR_MODE_P (mode0))
24589 op0 = safe_vector_operand (op0, mode0);
24590 if (VECTOR_MODE_P (mode1))
24591 op1 = safe_vector_operand (op1, mode1);
24593 /* Swap operands if we have a comparison that isn't available in
24595 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24602 target = gen_reg_rtx (SImode);
24603 emit_move_insn (target, const0_rtx);
24604 target = gen_rtx_SUBREG (QImode, target, 0);
24606 if ((optimize && !register_operand (op0, mode0))
24607 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24608 op0 = copy_to_mode_reg (mode0, op0);
24609 if ((optimize && !register_operand (op1, mode1))
24610 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24611 op1 = copy_to_mode_reg (mode1, op1);
24613 pat = GEN_FCN (d->icode) (op0, op1);
24617 emit_insn (gen_rtx_SET (VOIDmode,
24618 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24619 gen_rtx_fmt_ee (comparison, QImode,
24623 return SUBREG_REG (target);
24626 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24629 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24633 tree arg0 = CALL_EXPR_ARG (exp, 0);
24634 tree arg1 = CALL_EXPR_ARG (exp, 1);
24635 rtx op0 = expand_normal (arg0);
24636 rtx op1 = expand_normal (arg1);
24637 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24638 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24639 enum rtx_code comparison = d->comparison;
24641 if (VECTOR_MODE_P (mode0))
24642 op0 = safe_vector_operand (op0, mode0);
24643 if (VECTOR_MODE_P (mode1))
24644 op1 = safe_vector_operand (op1, mode1);
24646 target = gen_reg_rtx (SImode);
24647 emit_move_insn (target, const0_rtx);
24648 target = gen_rtx_SUBREG (QImode, target, 0);
24650 if ((optimize && !register_operand (op0, mode0))
24651 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24652 op0 = copy_to_mode_reg (mode0, op0);
24653 if ((optimize && !register_operand (op1, mode1))
24654 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24655 op1 = copy_to_mode_reg (mode1, op1);
24657 pat = GEN_FCN (d->icode) (op0, op1);
24661 emit_insn (gen_rtx_SET (VOIDmode,
24662 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24663 gen_rtx_fmt_ee (comparison, QImode,
24667 return SUBREG_REG (target);
24670 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24673 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24674 tree exp, rtx target)
24677 tree arg0 = CALL_EXPR_ARG (exp, 0);
24678 tree arg1 = CALL_EXPR_ARG (exp, 1);
24679 tree arg2 = CALL_EXPR_ARG (exp, 2);
24680 tree arg3 = CALL_EXPR_ARG (exp, 3);
24681 tree arg4 = CALL_EXPR_ARG (exp, 4);
24682 rtx scratch0, scratch1;
24683 rtx op0 = expand_normal (arg0);
24684 rtx op1 = expand_normal (arg1);
24685 rtx op2 = expand_normal (arg2);
24686 rtx op3 = expand_normal (arg3);
24687 rtx op4 = expand_normal (arg4);
24688 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24690 tmode0 = insn_data[d->icode].operand[0].mode;
24691 tmode1 = insn_data[d->icode].operand[1].mode;
24692 modev2 = insn_data[d->icode].operand[2].mode;
24693 modei3 = insn_data[d->icode].operand[3].mode;
24694 modev4 = insn_data[d->icode].operand[4].mode;
24695 modei5 = insn_data[d->icode].operand[5].mode;
24696 modeimm = insn_data[d->icode].operand[6].mode;
24698 if (VECTOR_MODE_P (modev2))
24699 op0 = safe_vector_operand (op0, modev2);
24700 if (VECTOR_MODE_P (modev4))
24701 op2 = safe_vector_operand (op2, modev4);
24703 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24704 op0 = copy_to_mode_reg (modev2, op0);
24705 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24706 op1 = copy_to_mode_reg (modei3, op1);
24707 if ((optimize && !register_operand (op2, modev4))
24708 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24709 op2 = copy_to_mode_reg (modev4, op2);
24710 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24711 op3 = copy_to_mode_reg (modei5, op3);
24713 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24715 error ("the fifth argument must be a 8-bit immediate");
24719 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24721 if (optimize || !target
24722 || GET_MODE (target) != tmode0
24723 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24724 target = gen_reg_rtx (tmode0);
24726 scratch1 = gen_reg_rtx (tmode1);
24728 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24730 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24732 if (optimize || !target
24733 || GET_MODE (target) != tmode1
24734 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24735 target = gen_reg_rtx (tmode1);
24737 scratch0 = gen_reg_rtx (tmode0);
24739 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24743 gcc_assert (d->flag);
24745 scratch0 = gen_reg_rtx (tmode0);
24746 scratch1 = gen_reg_rtx (tmode1);
24748 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24758 target = gen_reg_rtx (SImode);
24759 emit_move_insn (target, const0_rtx);
24760 target = gen_rtx_SUBREG (QImode, target, 0);
24763 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24764 gen_rtx_fmt_ee (EQ, QImode,
24765 gen_rtx_REG ((enum machine_mode) d->flag,
24768 return SUBREG_REG (target);
24775 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24778 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24779 tree exp, rtx target)
24782 tree arg0 = CALL_EXPR_ARG (exp, 0);
24783 tree arg1 = CALL_EXPR_ARG (exp, 1);
24784 tree arg2 = CALL_EXPR_ARG (exp, 2);
24785 rtx scratch0, scratch1;
24786 rtx op0 = expand_normal (arg0);
24787 rtx op1 = expand_normal (arg1);
24788 rtx op2 = expand_normal (arg2);
24789 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24791 tmode0 = insn_data[d->icode].operand[0].mode;
24792 tmode1 = insn_data[d->icode].operand[1].mode;
24793 modev2 = insn_data[d->icode].operand[2].mode;
24794 modev3 = insn_data[d->icode].operand[3].mode;
24795 modeimm = insn_data[d->icode].operand[4].mode;
24797 if (VECTOR_MODE_P (modev2))
24798 op0 = safe_vector_operand (op0, modev2);
24799 if (VECTOR_MODE_P (modev3))
24800 op1 = safe_vector_operand (op1, modev3);
24802 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24803 op0 = copy_to_mode_reg (modev2, op0);
24804 if ((optimize && !register_operand (op1, modev3))
24805 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24806 op1 = copy_to_mode_reg (modev3, op1);
24808 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24810 error ("the third argument must be a 8-bit immediate");
24814 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24816 if (optimize || !target
24817 || GET_MODE (target) != tmode0
24818 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24819 target = gen_reg_rtx (tmode0);
24821 scratch1 = gen_reg_rtx (tmode1);
24823 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24825 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24827 if (optimize || !target
24828 || GET_MODE (target) != tmode1
24829 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24830 target = gen_reg_rtx (tmode1);
24832 scratch0 = gen_reg_rtx (tmode0);
24834 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24838 gcc_assert (d->flag);
24840 scratch0 = gen_reg_rtx (tmode0);
24841 scratch1 = gen_reg_rtx (tmode1);
24843 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24853 target = gen_reg_rtx (SImode);
24854 emit_move_insn (target, const0_rtx);
24855 target = gen_rtx_SUBREG (QImode, target, 0);
24858 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24859 gen_rtx_fmt_ee (EQ, QImode,
24860 gen_rtx_REG ((enum machine_mode) d->flag,
24863 return SUBREG_REG (target);
24869 /* Subroutine of ix86_expand_builtin to take care of insns with
24870 variable number of operands. */
24873 ix86_expand_args_builtin (const struct builtin_description *d,
24874 tree exp, rtx target)
24876 rtx pat, real_target;
24877 unsigned int i, nargs;
24878 unsigned int nargs_constant = 0;
24879 int num_memory = 0;
24883 enum machine_mode mode;
24885 bool last_arg_count = false;
24886 enum insn_code icode = d->icode;
24887 const struct insn_data *insn_p = &insn_data[icode];
24888 enum machine_mode tmode = insn_p->operand[0].mode;
24889 enum machine_mode rmode = VOIDmode;
24891 enum rtx_code comparison = d->comparison;
24893 switch ((enum ix86_builtin_type) d->flag)
24895 case INT_FTYPE_V8SF_V8SF_PTEST:
24896 case INT_FTYPE_V4DI_V4DI_PTEST:
24897 case INT_FTYPE_V4DF_V4DF_PTEST:
24898 case INT_FTYPE_V4SF_V4SF_PTEST:
24899 case INT_FTYPE_V2DI_V2DI_PTEST:
24900 case INT_FTYPE_V2DF_V2DF_PTEST:
24901 return ix86_expand_sse_ptest (d, exp, target);
24902 case FLOAT128_FTYPE_FLOAT128:
24903 case FLOAT_FTYPE_FLOAT:
24904 case INT64_FTYPE_V4SF:
24905 case INT64_FTYPE_V2DF:
24906 case INT_FTYPE_V16QI:
24907 case INT_FTYPE_V8QI:
24908 case INT_FTYPE_V8SF:
24909 case INT_FTYPE_V4DF:
24910 case INT_FTYPE_V4SF:
24911 case INT_FTYPE_V2DF:
24912 case V16QI_FTYPE_V16QI:
24913 case V8SI_FTYPE_V8SF:
24914 case V8SI_FTYPE_V4SI:
24915 case V8HI_FTYPE_V8HI:
24916 case V8HI_FTYPE_V16QI:
24917 case V8QI_FTYPE_V8QI:
24918 case V8SF_FTYPE_V8SF:
24919 case V8SF_FTYPE_V8SI:
24920 case V8SF_FTYPE_V4SF:
24921 case V4SI_FTYPE_V4SI:
24922 case V4SI_FTYPE_V16QI:
24923 case V4SI_FTYPE_V4SF:
24924 case V4SI_FTYPE_V8SI:
24925 case V4SI_FTYPE_V8HI:
24926 case V4SI_FTYPE_V4DF:
24927 case V4SI_FTYPE_V2DF:
24928 case V4HI_FTYPE_V4HI:
24929 case V4DF_FTYPE_V4DF:
24930 case V4DF_FTYPE_V4SI:
24931 case V4DF_FTYPE_V4SF:
24932 case V4DF_FTYPE_V2DF:
24933 case V4SF_FTYPE_V4SF:
24934 case V4SF_FTYPE_V4SI:
24935 case V4SF_FTYPE_V8SF:
24936 case V4SF_FTYPE_V4DF:
24937 case V4SF_FTYPE_V2DF:
24938 case V2DI_FTYPE_V2DI:
24939 case V2DI_FTYPE_V16QI:
24940 case V2DI_FTYPE_V8HI:
24941 case V2DI_FTYPE_V4SI:
24942 case V2DF_FTYPE_V2DF:
24943 case V2DF_FTYPE_V4SI:
24944 case V2DF_FTYPE_V4DF:
24945 case V2DF_FTYPE_V4SF:
24946 case V2DF_FTYPE_V2SI:
24947 case V2SI_FTYPE_V2SI:
24948 case V2SI_FTYPE_V4SF:
24949 case V2SI_FTYPE_V2SF:
24950 case V2SI_FTYPE_V2DF:
24951 case V2SF_FTYPE_V2SF:
24952 case V2SF_FTYPE_V2SI:
24955 case V4SF_FTYPE_V4SF_VEC_MERGE:
24956 case V2DF_FTYPE_V2DF_VEC_MERGE:
24957 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24958 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24959 case V16QI_FTYPE_V16QI_V16QI:
24960 case V16QI_FTYPE_V8HI_V8HI:
24961 case V8QI_FTYPE_V8QI_V8QI:
24962 case V8QI_FTYPE_V4HI_V4HI:
24963 case V8HI_FTYPE_V8HI_V8HI:
24964 case V8HI_FTYPE_V16QI_V16QI:
24965 case V8HI_FTYPE_V4SI_V4SI:
24966 case V8SF_FTYPE_V8SF_V8SF:
24967 case V8SF_FTYPE_V8SF_V8SI:
24968 case V4SI_FTYPE_V4SI_V4SI:
24969 case V4SI_FTYPE_V8HI_V8HI:
24970 case V4SI_FTYPE_V4SF_V4SF:
24971 case V4SI_FTYPE_V2DF_V2DF:
24972 case V4HI_FTYPE_V4HI_V4HI:
24973 case V4HI_FTYPE_V8QI_V8QI:
24974 case V4HI_FTYPE_V2SI_V2SI:
24975 case V4DF_FTYPE_V4DF_V4DF:
24976 case V4DF_FTYPE_V4DF_V4DI:
24977 case V4SF_FTYPE_V4SF_V4SF:
24978 case V4SF_FTYPE_V4SF_V4SI:
24979 case V4SF_FTYPE_V4SF_V2SI:
24980 case V4SF_FTYPE_V4SF_V2DF:
24981 case V4SF_FTYPE_V4SF_DI:
24982 case V4SF_FTYPE_V4SF_SI:
24983 case V2DI_FTYPE_V2DI_V2DI:
24984 case V2DI_FTYPE_V16QI_V16QI:
24985 case V2DI_FTYPE_V4SI_V4SI:
24986 case V2DI_FTYPE_V2DI_V16QI:
24987 case V2DI_FTYPE_V2DF_V2DF:
24988 case V2SI_FTYPE_V2SI_V2SI:
24989 case V2SI_FTYPE_V4HI_V4HI:
24990 case V2SI_FTYPE_V2SF_V2SF:
24991 case V2DF_FTYPE_V2DF_V2DF:
24992 case V2DF_FTYPE_V2DF_V4SF:
24993 case V2DF_FTYPE_V2DF_V2DI:
24994 case V2DF_FTYPE_V2DF_DI:
24995 case V2DF_FTYPE_V2DF_SI:
24996 case V2SF_FTYPE_V2SF_V2SF:
24997 case V1DI_FTYPE_V1DI_V1DI:
24998 case V1DI_FTYPE_V8QI_V8QI:
24999 case V1DI_FTYPE_V2SI_V2SI:
25000 if (comparison == UNKNOWN)
25001 return ix86_expand_binop_builtin (icode, exp, target);
25004 case V4SF_FTYPE_V4SF_V4SF_SWAP:
25005 case V2DF_FTYPE_V2DF_V2DF_SWAP:
25006 gcc_assert (comparison != UNKNOWN);
25010 case V8HI_FTYPE_V8HI_V8HI_COUNT:
25011 case V8HI_FTYPE_V8HI_SI_COUNT:
25012 case V4SI_FTYPE_V4SI_V4SI_COUNT:
25013 case V4SI_FTYPE_V4SI_SI_COUNT:
25014 case V4HI_FTYPE_V4HI_V4HI_COUNT:
25015 case V4HI_FTYPE_V4HI_SI_COUNT:
25016 case V2DI_FTYPE_V2DI_V2DI_COUNT:
25017 case V2DI_FTYPE_V2DI_SI_COUNT:
25018 case V2SI_FTYPE_V2SI_V2SI_COUNT:
25019 case V2SI_FTYPE_V2SI_SI_COUNT:
25020 case V1DI_FTYPE_V1DI_V1DI_COUNT:
25021 case V1DI_FTYPE_V1DI_SI_COUNT:
25023 last_arg_count = true;
25025 case UINT64_FTYPE_UINT64_UINT64:
25026 case UINT_FTYPE_UINT_UINT:
25027 case UINT_FTYPE_UINT_USHORT:
25028 case UINT_FTYPE_UINT_UCHAR:
25031 case V2DI2TI_FTYPE_V2DI_INT:
25034 nargs_constant = 1;
25036 case V8HI_FTYPE_V8HI_INT:
25037 case V8SF_FTYPE_V8SF_INT:
25038 case V4SI_FTYPE_V4SI_INT:
25039 case V4SI_FTYPE_V8SI_INT:
25040 case V4HI_FTYPE_V4HI_INT:
25041 case V4DF_FTYPE_V4DF_INT:
25042 case V4SF_FTYPE_V4SF_INT:
25043 case V4SF_FTYPE_V8SF_INT:
25044 case V2DI_FTYPE_V2DI_INT:
25045 case V2DF_FTYPE_V2DF_INT:
25046 case V2DF_FTYPE_V4DF_INT:
25048 nargs_constant = 1;
25050 case V16QI_FTYPE_V16QI_V16QI_V16QI:
25051 case V8SF_FTYPE_V8SF_V8SF_V8SF:
25052 case V4DF_FTYPE_V4DF_V4DF_V4DF:
25053 case V4SF_FTYPE_V4SF_V4SF_V4SF:
25054 case V2DF_FTYPE_V2DF_V2DF_V2DF:
25057 case V16QI_FTYPE_V16QI_V16QI_INT:
25058 case V8HI_FTYPE_V8HI_V8HI_INT:
25059 case V8SI_FTYPE_V8SI_V8SI_INT:
25060 case V8SI_FTYPE_V8SI_V4SI_INT:
25061 case V8SF_FTYPE_V8SF_V8SF_INT:
25062 case V8SF_FTYPE_V8SF_V4SF_INT:
25063 case V4SI_FTYPE_V4SI_V4SI_INT:
25064 case V4DF_FTYPE_V4DF_V4DF_INT:
25065 case V4DF_FTYPE_V4DF_V2DF_INT:
25066 case V4SF_FTYPE_V4SF_V4SF_INT:
25067 case V2DI_FTYPE_V2DI_V2DI_INT:
25068 case V2DF_FTYPE_V2DF_V2DF_INT:
25070 nargs_constant = 1;
25072 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
25075 nargs_constant = 1;
25077 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
25080 nargs_constant = 1;
25082 case V2DI_FTYPE_V2DI_UINT_UINT:
25084 nargs_constant = 2;
25086 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25088 nargs_constant = 2;
25091 gcc_unreachable ();
25094 gcc_assert (nargs <= ARRAY_SIZE (args));
25096 if (comparison != UNKNOWN)
25098 gcc_assert (nargs == 2);
25099 return ix86_expand_sse_compare (d, exp, target, swap);
25102 if (rmode == VOIDmode || rmode == tmode)
25106 || GET_MODE (target) != tmode
25107 || ! (*insn_p->operand[0].predicate) (target, tmode))
25108 target = gen_reg_rtx (tmode);
25109 real_target = target;
25113 target = gen_reg_rtx (rmode);
25114 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25117 for (i = 0; i < nargs; i++)
25119 tree arg = CALL_EXPR_ARG (exp, i);
25120 rtx op = expand_normal (arg);
25121 enum machine_mode mode = insn_p->operand[i + 1].mode;
25122 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
25124 if (last_arg_count && (i + 1) == nargs)
25126 /* SIMD shift insns take either an 8-bit immediate or
25127 register as count. But builtin functions take int as
25128 count. If count doesn't match, we put it in register. */
25131 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25132 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
25133 op = copy_to_reg (op);
25136 else if ((nargs - i) <= nargs_constant)
25141 case CODE_FOR_sse4_1_roundpd:
25142 case CODE_FOR_sse4_1_roundps:
25143 case CODE_FOR_sse4_1_roundsd:
25144 case CODE_FOR_sse4_1_roundss:
25145 case CODE_FOR_sse4_1_blendps:
25146 case CODE_FOR_avx_blendpd256:
25147 case CODE_FOR_avx_vpermilv4df:
25148 case CODE_FOR_avx_roundpd256:
25149 case CODE_FOR_avx_roundps256:
25150 error ("the last argument must be a 4-bit immediate");
25153 case CODE_FOR_sse4_1_blendpd:
25154 case CODE_FOR_avx_vpermilv2df:
25155 error ("the last argument must be a 2-bit immediate");
25158 case CODE_FOR_avx_vextractf128v4df:
25159 case CODE_FOR_avx_vextractf128v8sf:
25160 case CODE_FOR_avx_vextractf128v8si:
25161 case CODE_FOR_avx_vinsertf128v4df:
25162 case CODE_FOR_avx_vinsertf128v8sf:
25163 case CODE_FOR_avx_vinsertf128v8si:
25164 error ("the last argument must be a 1-bit immediate");
25167 case CODE_FOR_avx_cmpsdv2df3:
25168 case CODE_FOR_avx_cmpssv4sf3:
25169 case CODE_FOR_avx_cmppdv2df3:
25170 case CODE_FOR_avx_cmppsv4sf3:
25171 case CODE_FOR_avx_cmppdv4df3:
25172 case CODE_FOR_avx_cmppsv8sf3:
25173 error ("the last argument must be a 5-bit immediate");
25177 switch (nargs_constant)
25180 if ((nargs - i) == nargs_constant)
25182 error ("the next to last argument must be an 8-bit immediate");
25186 error ("the last argument must be an 8-bit immediate");
25189 gcc_unreachable ();
25196 if (VECTOR_MODE_P (mode))
25197 op = safe_vector_operand (op, mode);
25199 /* If we aren't optimizing, only allow one memory operand to
25201 if (memory_operand (op, mode))
25204 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25206 if (optimize || !match || num_memory > 1)
25207 op = copy_to_mode_reg (mode, op);
25211 op = copy_to_reg (op);
25212 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25217 args[i].mode = mode;
25223 pat = GEN_FCN (icode) (real_target, args[0].op);
25226 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25229 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25233 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25234 args[2].op, args[3].op);
25237 gcc_unreachable ();
25247 /* Subroutine of ix86_expand_builtin to take care of special insns
25248 with variable number of operands. */
25251 ix86_expand_special_args_builtin (const struct builtin_description *d,
25252 tree exp, rtx target)
25256 unsigned int i, nargs, arg_adjust, memory;
25260 enum machine_mode mode;
25262 enum insn_code icode = d->icode;
25263 bool last_arg_constant = false;
25264 const struct insn_data *insn_p = &insn_data[icode];
25265 enum machine_mode tmode = insn_p->operand[0].mode;
25266 enum { load, store } klass;
25268 switch ((enum ix86_special_builtin_type) d->flag)
25270 case VOID_FTYPE_VOID:
25271 emit_insn (GEN_FCN (icode) (target));
25273 case V2DI_FTYPE_PV2DI:
25274 case V32QI_FTYPE_PCCHAR:
25275 case V16QI_FTYPE_PCCHAR:
25276 case V8SF_FTYPE_PCV4SF:
25277 case V8SF_FTYPE_PCFLOAT:
25278 case V4SF_FTYPE_PCFLOAT:
25279 case V4DF_FTYPE_PCV2DF:
25280 case V4DF_FTYPE_PCDOUBLE:
25281 case V2DF_FTYPE_PCDOUBLE:
25286 case VOID_FTYPE_PV2SF_V4SF:
25287 case VOID_FTYPE_PV4DI_V4DI:
25288 case VOID_FTYPE_PV2DI_V2DI:
25289 case VOID_FTYPE_PCHAR_V32QI:
25290 case VOID_FTYPE_PCHAR_V16QI:
25291 case VOID_FTYPE_PFLOAT_V8SF:
25292 case VOID_FTYPE_PFLOAT_V4SF:
25293 case VOID_FTYPE_PDOUBLE_V4DF:
25294 case VOID_FTYPE_PDOUBLE_V2DF:
25295 case VOID_FTYPE_PDI_DI:
25296 case VOID_FTYPE_PINT_INT:
25299 /* Reserve memory operand for target. */
25300 memory = ARRAY_SIZE (args);
25302 case V4SF_FTYPE_V4SF_PCV2SF:
25303 case V2DF_FTYPE_V2DF_PCDOUBLE:
25308 case V8SF_FTYPE_PCV8SF_V8SF:
25309 case V4DF_FTYPE_PCV4DF_V4DF:
25310 case V4SF_FTYPE_PCV4SF_V4SF:
25311 case V2DF_FTYPE_PCV2DF_V2DF:
25316 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25317 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25318 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25319 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25322 /* Reserve memory operand for target. */
25323 memory = ARRAY_SIZE (args);
25326 gcc_unreachable ();
25329 gcc_assert (nargs <= ARRAY_SIZE (args));
25331 if (klass == store)
25333 arg = CALL_EXPR_ARG (exp, 0);
25334 op = expand_normal (arg);
25335 gcc_assert (target == 0);
25336 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25344 || GET_MODE (target) != tmode
25345 || ! (*insn_p->operand[0].predicate) (target, tmode))
25346 target = gen_reg_rtx (tmode);
25349 for (i = 0; i < nargs; i++)
25351 enum machine_mode mode = insn_p->operand[i + 1].mode;
25354 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25355 op = expand_normal (arg);
25356 match = (*insn_p->operand[i + 1].predicate) (op, mode);
25358 if (last_arg_constant && (i + 1) == nargs)
25364 error ("the last argument must be an 8-bit immediate");
25372 /* This must be the memory operand. */
25373 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25374 gcc_assert (GET_MODE (op) == mode
25375 || GET_MODE (op) == VOIDmode);
25379 /* This must be register. */
25380 if (VECTOR_MODE_P (mode))
25381 op = safe_vector_operand (op, mode);
25383 gcc_assert (GET_MODE (op) == mode
25384 || GET_MODE (op) == VOIDmode);
25385 op = copy_to_mode_reg (mode, op);
25390 args[i].mode = mode;
25396 pat = GEN_FCN (icode) (target, args[0].op);
25399 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25402 gcc_unreachable ();
25408 return klass == store ? 0 : target;
25411 /* Return the integer constant in ARG. Constrain it to be in the range
25412 of the subparts of VEC_TYPE; issue an error if not. */
25415 get_element_number (tree vec_type, tree arg)
25417 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25419 if (!host_integerp (arg, 1)
25420 || (elt = tree_low_cst (arg, 1), elt > max))
25422 error ("selector must be an integer constant in the range 0..%wi", max);
25429 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25430 ix86_expand_vector_init. We DO have language-level syntax for this, in
25431 the form of (type){ init-list }. Except that since we can't place emms
25432 instructions from inside the compiler, we can't allow the use of MMX
25433 registers unless the user explicitly asks for it. So we do *not* define
25434 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25435 we have builtins invoked by mmintrin.h that gives us license to emit
25436 these sorts of instructions. */
25439 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25441 enum machine_mode tmode = TYPE_MODE (type);
25442 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25443 int i, n_elt = GET_MODE_NUNITS (tmode);
25444 rtvec v = rtvec_alloc (n_elt);
25446 gcc_assert (VECTOR_MODE_P (tmode));
25447 gcc_assert (call_expr_nargs (exp) == n_elt);
25449 for (i = 0; i < n_elt; ++i)
25451 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25452 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25455 if (!target || !register_operand (target, tmode))
25456 target = gen_reg_rtx (tmode);
25458 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25462 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25463 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25464 had a language-level syntax for referencing vector elements. */
25467 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25469 enum machine_mode tmode, mode0;
25474 arg0 = CALL_EXPR_ARG (exp, 0);
25475 arg1 = CALL_EXPR_ARG (exp, 1);
25477 op0 = expand_normal (arg0);
25478 elt = get_element_number (TREE_TYPE (arg0), arg1);
25480 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25481 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25482 gcc_assert (VECTOR_MODE_P (mode0));
25484 op0 = force_reg (mode0, op0);
25486 if (optimize || !target || !register_operand (target, tmode))
25487 target = gen_reg_rtx (tmode);
25489 ix86_expand_vector_extract (true, target, op0, elt);
25494 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25495 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25496 a language-level syntax for referencing vector elements. */
25499 ix86_expand_vec_set_builtin (tree exp)
25501 enum machine_mode tmode, mode1;
25502 tree arg0, arg1, arg2;
25504 rtx op0, op1, target;
25506 arg0 = CALL_EXPR_ARG (exp, 0);
25507 arg1 = CALL_EXPR_ARG (exp, 1);
25508 arg2 = CALL_EXPR_ARG (exp, 2);
25510 tmode = TYPE_MODE (TREE_TYPE (arg0));
25511 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25512 gcc_assert (VECTOR_MODE_P (tmode));
25514 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25515 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25516 elt = get_element_number (TREE_TYPE (arg0), arg2);
25518 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25519 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25521 op0 = force_reg (tmode, op0);
25522 op1 = force_reg (mode1, op1);
25524 /* OP0 is the source of these builtin functions and shouldn't be
25525 modified. Create a copy, use it and return it as target. */
25526 target = gen_reg_rtx (tmode);
25527 emit_move_insn (target, op0);
25528 ix86_expand_vector_set (true, target, op1, elt);
25533 /* Expand an expression EXP that calls a built-in function,
25534 with result going to TARGET if that's convenient
25535 (and in mode MODE if that's convenient).
25536 SUBTARGET may be used as the target for computing one of EXP's operands.
25537 IGNORE is nonzero if the value is to be ignored. */
25540 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25541 enum machine_mode mode ATTRIBUTE_UNUSED,
25542 int ignore ATTRIBUTE_UNUSED)
25544 const struct builtin_description *d;
25546 enum insn_code icode;
25547 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25548 tree arg0, arg1, arg2;
25549 rtx op0, op1, op2, pat;
25550 enum machine_mode mode0, mode1, mode2;
25551 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25553 /* Determine whether the builtin function is available under the current ISA.
25554 Originally the builtin was not created if it wasn't applicable to the
25555 current ISA based on the command line switches. With function specific
25556 options, we need to check in the context of the function making the call
25557 whether it is supported. */
25558 if (ix86_builtins_isa[fcode].isa
25559 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25561 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25562 NULL, NULL, false);
25565 error ("%qE needs unknown isa option", fndecl);
25568 gcc_assert (opts != NULL);
25569 error ("%qE needs isa option %s", fndecl, opts);
25577 case IX86_BUILTIN_MASKMOVQ:
25578 case IX86_BUILTIN_MASKMOVDQU:
25579 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25580 ? CODE_FOR_mmx_maskmovq
25581 : CODE_FOR_sse2_maskmovdqu);
25582 /* Note the arg order is different from the operand order. */
25583 arg1 = CALL_EXPR_ARG (exp, 0);
25584 arg2 = CALL_EXPR_ARG (exp, 1);
25585 arg0 = CALL_EXPR_ARG (exp, 2);
25586 op0 = expand_normal (arg0);
25587 op1 = expand_normal (arg1);
25588 op2 = expand_normal (arg2);
25589 mode0 = insn_data[icode].operand[0].mode;
25590 mode1 = insn_data[icode].operand[1].mode;
25591 mode2 = insn_data[icode].operand[2].mode;
25593 op0 = force_reg (Pmode, op0);
25594 op0 = gen_rtx_MEM (mode1, op0);
25596 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25597 op0 = copy_to_mode_reg (mode0, op0);
25598 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25599 op1 = copy_to_mode_reg (mode1, op1);
25600 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25601 op2 = copy_to_mode_reg (mode2, op2);
25602 pat = GEN_FCN (icode) (op0, op1, op2);
25608 case IX86_BUILTIN_LDMXCSR:
25609 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25610 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25611 emit_move_insn (target, op0);
25612 emit_insn (gen_sse_ldmxcsr (target));
25615 case IX86_BUILTIN_STMXCSR:
25616 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25617 emit_insn (gen_sse_stmxcsr (target));
25618 return copy_to_mode_reg (SImode, target);
25620 case IX86_BUILTIN_CLFLUSH:
25621 arg0 = CALL_EXPR_ARG (exp, 0);
25622 op0 = expand_normal (arg0);
25623 icode = CODE_FOR_sse2_clflush;
25624 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25625 op0 = copy_to_mode_reg (Pmode, op0);
25627 emit_insn (gen_sse2_clflush (op0));
25630 case IX86_BUILTIN_MONITOR:
25631 arg0 = CALL_EXPR_ARG (exp, 0);
25632 arg1 = CALL_EXPR_ARG (exp, 1);
25633 arg2 = CALL_EXPR_ARG (exp, 2);
25634 op0 = expand_normal (arg0);
25635 op1 = expand_normal (arg1);
25636 op2 = expand_normal (arg2);
25638 op0 = copy_to_mode_reg (Pmode, op0);
25640 op1 = copy_to_mode_reg (SImode, op1);
25642 op2 = copy_to_mode_reg (SImode, op2);
25643 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25646 case IX86_BUILTIN_MWAIT:
25647 arg0 = CALL_EXPR_ARG (exp, 0);
25648 arg1 = CALL_EXPR_ARG (exp, 1);
25649 op0 = expand_normal (arg0);
25650 op1 = expand_normal (arg1);
25652 op0 = copy_to_mode_reg (SImode, op0);
25654 op1 = copy_to_mode_reg (SImode, op1);
25655 emit_insn (gen_sse3_mwait (op0, op1));
25658 case IX86_BUILTIN_VEC_INIT_V2SI:
25659 case IX86_BUILTIN_VEC_INIT_V4HI:
25660 case IX86_BUILTIN_VEC_INIT_V8QI:
25661 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25663 case IX86_BUILTIN_VEC_EXT_V2DF:
25664 case IX86_BUILTIN_VEC_EXT_V2DI:
25665 case IX86_BUILTIN_VEC_EXT_V4SF:
25666 case IX86_BUILTIN_VEC_EXT_V4SI:
25667 case IX86_BUILTIN_VEC_EXT_V8HI:
25668 case IX86_BUILTIN_VEC_EXT_V2SI:
25669 case IX86_BUILTIN_VEC_EXT_V4HI:
25670 case IX86_BUILTIN_VEC_EXT_V16QI:
25671 return ix86_expand_vec_ext_builtin (exp, target);
25673 case IX86_BUILTIN_VEC_SET_V2DI:
25674 case IX86_BUILTIN_VEC_SET_V4SF:
25675 case IX86_BUILTIN_VEC_SET_V4SI:
25676 case IX86_BUILTIN_VEC_SET_V8HI:
25677 case IX86_BUILTIN_VEC_SET_V4HI:
25678 case IX86_BUILTIN_VEC_SET_V16QI:
25679 return ix86_expand_vec_set_builtin (exp);
25681 case IX86_BUILTIN_INFQ:
25682 case IX86_BUILTIN_HUGE_VALQ:
25684 REAL_VALUE_TYPE inf;
25688 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25690 tmp = validize_mem (force_const_mem (mode, tmp));
25693 target = gen_reg_rtx (mode);
25695 emit_move_insn (target, tmp);
25703 for (i = 0, d = bdesc_special_args;
25704 i < ARRAY_SIZE (bdesc_special_args);
25706 if (d->code == fcode)
25707 return ix86_expand_special_args_builtin (d, exp, target);
25709 for (i = 0, d = bdesc_args;
25710 i < ARRAY_SIZE (bdesc_args);
25712 if (d->code == fcode)
25715 case IX86_BUILTIN_FABSQ:
25716 case IX86_BUILTIN_COPYSIGNQ:
25718 /* Emit a normal call if SSE2 isn't available. */
25719 return expand_call (exp, target, ignore);
25721 return ix86_expand_args_builtin (d, exp, target);
25724 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25725 if (d->code == fcode)
25726 return ix86_expand_sse_comi (d, exp, target);
25728 for (i = 0, d = bdesc_pcmpestr;
25729 i < ARRAY_SIZE (bdesc_pcmpestr);
25731 if (d->code == fcode)
25732 return ix86_expand_sse_pcmpestr (d, exp, target);
25734 for (i = 0, d = bdesc_pcmpistr;
25735 i < ARRAY_SIZE (bdesc_pcmpistr);
25737 if (d->code == fcode)
25738 return ix86_expand_sse_pcmpistr (d, exp, target);
25740 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25741 if (d->code == fcode)
25742 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25743 (enum multi_arg_type)d->flag,
25746 gcc_unreachable ();
25749 /* Returns a function decl for a vectorized version of the builtin function
25750 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25751 if it is not available. */
25754 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25757 enum machine_mode in_mode, out_mode;
25760 if (TREE_CODE (type_out) != VECTOR_TYPE
25761 || TREE_CODE (type_in) != VECTOR_TYPE)
25764 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25765 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25766 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25767 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25771 case BUILT_IN_SQRT:
25772 if (out_mode == DFmode && out_n == 2
25773 && in_mode == DFmode && in_n == 2)
25774 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25777 case BUILT_IN_SQRTF:
25778 if (out_mode == SFmode && out_n == 4
25779 && in_mode == SFmode && in_n == 4)
25780 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25783 case BUILT_IN_LRINT:
25784 if (out_mode == SImode && out_n == 4
25785 && in_mode == DFmode && in_n == 2)
25786 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25789 case BUILT_IN_LRINTF:
25790 if (out_mode == SImode && out_n == 4
25791 && in_mode == SFmode && in_n == 4)
25792 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25799 /* Dispatch to a handler for a vectorization library. */
25800 if (ix86_veclib_handler)
25801 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25807 /* Handler for an SVML-style interface to
25808 a library with vectorized intrinsics. */
25811 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25814 tree fntype, new_fndecl, args;
25817 enum machine_mode el_mode, in_mode;
25820 /* The SVML is suitable for unsafe math only. */
25821 if (!flag_unsafe_math_optimizations)
25824 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25825 n = TYPE_VECTOR_SUBPARTS (type_out);
25826 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25827 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25828 if (el_mode != in_mode
25836 case BUILT_IN_LOG10:
25838 case BUILT_IN_TANH:
25840 case BUILT_IN_ATAN:
25841 case BUILT_IN_ATAN2:
25842 case BUILT_IN_ATANH:
25843 case BUILT_IN_CBRT:
25844 case BUILT_IN_SINH:
25846 case BUILT_IN_ASINH:
25847 case BUILT_IN_ASIN:
25848 case BUILT_IN_COSH:
25850 case BUILT_IN_ACOSH:
25851 case BUILT_IN_ACOS:
25852 if (el_mode != DFmode || n != 2)
25856 case BUILT_IN_EXPF:
25857 case BUILT_IN_LOGF:
25858 case BUILT_IN_LOG10F:
25859 case BUILT_IN_POWF:
25860 case BUILT_IN_TANHF:
25861 case BUILT_IN_TANF:
25862 case BUILT_IN_ATANF:
25863 case BUILT_IN_ATAN2F:
25864 case BUILT_IN_ATANHF:
25865 case BUILT_IN_CBRTF:
25866 case BUILT_IN_SINHF:
25867 case BUILT_IN_SINF:
25868 case BUILT_IN_ASINHF:
25869 case BUILT_IN_ASINF:
25870 case BUILT_IN_COSHF:
25871 case BUILT_IN_COSF:
25872 case BUILT_IN_ACOSHF:
25873 case BUILT_IN_ACOSF:
25874 if (el_mode != SFmode || n != 4)
25882 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25884 if (fn == BUILT_IN_LOGF)
25885 strcpy (name, "vmlsLn4");
25886 else if (fn == BUILT_IN_LOG)
25887 strcpy (name, "vmldLn2");
25890 sprintf (name, "vmls%s", bname+10);
25891 name[strlen (name)-1] = '4';
25894 sprintf (name, "vmld%s2", bname+10);
25896 /* Convert to uppercase. */
25900 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25901 args = TREE_CHAIN (args))
25905 fntype = build_function_type_list (type_out, type_in, NULL);
25907 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25909 /* Build a function declaration for the vectorized function. */
25910 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25911 TREE_PUBLIC (new_fndecl) = 1;
25912 DECL_EXTERNAL (new_fndecl) = 1;
25913 DECL_IS_NOVOPS (new_fndecl) = 1;
25914 TREE_READONLY (new_fndecl) = 1;
25919 /* Handler for an ACML-style interface to
25920 a library with vectorized intrinsics. */
25923 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25925 char name[20] = "__vr.._";
25926 tree fntype, new_fndecl, args;
25929 enum machine_mode el_mode, in_mode;
25932 /* The ACML is 64bits only and suitable for unsafe math only as
25933 it does not correctly support parts of IEEE with the required
25934 precision such as denormals. */
25936 || !flag_unsafe_math_optimizations)
25939 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25940 n = TYPE_VECTOR_SUBPARTS (type_out);
25941 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25942 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25943 if (el_mode != in_mode
25953 case BUILT_IN_LOG2:
25954 case BUILT_IN_LOG10:
25957 if (el_mode != DFmode
25962 case BUILT_IN_SINF:
25963 case BUILT_IN_COSF:
25964 case BUILT_IN_EXPF:
25965 case BUILT_IN_POWF:
25966 case BUILT_IN_LOGF:
25967 case BUILT_IN_LOG2F:
25968 case BUILT_IN_LOG10F:
25971 if (el_mode != SFmode
25980 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25981 sprintf (name + 7, "%s", bname+10);
25984 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25985 args = TREE_CHAIN (args))
25989 fntype = build_function_type_list (type_out, type_in, NULL);
25991 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25993 /* Build a function declaration for the vectorized function. */
25994 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25995 TREE_PUBLIC (new_fndecl) = 1;
25996 DECL_EXTERNAL (new_fndecl) = 1;
25997 DECL_IS_NOVOPS (new_fndecl) = 1;
25998 TREE_READONLY (new_fndecl) = 1;
26004 /* Returns a decl of a function that implements conversion of an integer vector
26005 into a floating-point vector, or vice-versa. TYPE is the type of the integer
26006 side of the conversion.
26007 Return NULL_TREE if it is not available. */
26010 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
26012 if (TREE_CODE (type) != VECTOR_TYPE
26013 /* There are only conversions from/to signed integers. */
26014 || TYPE_UNSIGNED (TREE_TYPE (type)))
26020 switch (TYPE_MODE (type))
26023 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
26028 case FIX_TRUNC_EXPR:
26029 switch (TYPE_MODE (type))
26032 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
26042 /* Returns a code for a target-specific builtin that implements
26043 reciprocal of the function, or NULL_TREE if not available. */
26046 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
26047 bool sqrt ATTRIBUTE_UNUSED)
26049 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
26050 && flag_finite_math_only && !flag_trapping_math
26051 && flag_unsafe_math_optimizations))
26055 /* Machine dependent builtins. */
26058 /* Vectorized version of sqrt to rsqrt conversion. */
26059 case IX86_BUILTIN_SQRTPS_NR:
26060 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26066 /* Normal builtins. */
26069 /* Sqrt to rsqrt conversion. */
26070 case BUILT_IN_SQRTF:
26071 return ix86_builtins[IX86_BUILTIN_RSQRTF];
26078 /* Store OPERAND to the memory after reload is completed. This means
26079 that we can't easily use assign_stack_local. */
26081 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26085 gcc_assert (reload_completed);
26086 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
26088 result = gen_rtx_MEM (mode,
26089 gen_rtx_PLUS (Pmode,
26091 GEN_INT (-RED_ZONE_SIZE)));
26092 emit_move_insn (result, operand);
26094 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
26100 operand = gen_lowpart (DImode, operand);
26104 gen_rtx_SET (VOIDmode,
26105 gen_rtx_MEM (DImode,
26106 gen_rtx_PRE_DEC (DImode,
26107 stack_pointer_rtx)),
26111 gcc_unreachable ();
26113 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26122 split_di (&operand, 1, operands, operands + 1);
26124 gen_rtx_SET (VOIDmode,
26125 gen_rtx_MEM (SImode,
26126 gen_rtx_PRE_DEC (Pmode,
26127 stack_pointer_rtx)),
26130 gen_rtx_SET (VOIDmode,
26131 gen_rtx_MEM (SImode,
26132 gen_rtx_PRE_DEC (Pmode,
26133 stack_pointer_rtx)),
26138 /* Store HImodes as SImodes. */
26139 operand = gen_lowpart (SImode, operand);
26143 gen_rtx_SET (VOIDmode,
26144 gen_rtx_MEM (GET_MODE (operand),
26145 gen_rtx_PRE_DEC (SImode,
26146 stack_pointer_rtx)),
26150 gcc_unreachable ();
26152 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26157 /* Free operand from the memory. */
26159 ix86_free_from_memory (enum machine_mode mode)
26161 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
26165 if (mode == DImode || TARGET_64BIT)
26169 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26170 to pop or add instruction if registers are available. */
26171 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26172 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26177 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26178 QImode must go into class Q_REGS.
26179 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26180 movdf to do mem-to-mem moves through integer regs. */
26182 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26184 enum machine_mode mode = GET_MODE (x);
26186 /* We're only allowed to return a subclass of CLASS. Many of the
26187 following checks fail for NO_REGS, so eliminate that early. */
26188 if (regclass == NO_REGS)
26191 /* All classes can load zeros. */
26192 if (x == CONST0_RTX (mode))
26195 /* Force constants into memory if we are loading a (nonzero) constant into
26196 an MMX or SSE register. This is because there are no MMX/SSE instructions
26197 to load from a constant. */
26199 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26202 /* Prefer SSE regs only, if we can use them for math. */
26203 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26204 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26206 /* Floating-point constants need more complex checks. */
26207 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26209 /* General regs can load everything. */
26210 if (reg_class_subset_p (regclass, GENERAL_REGS))
26213 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26214 zero above. We only want to wind up preferring 80387 registers if
26215 we plan on doing computation with them. */
26217 && standard_80387_constant_p (x))
26219 /* Limit class to non-sse. */
26220 if (regclass == FLOAT_SSE_REGS)
26222 if (regclass == FP_TOP_SSE_REGS)
26224 if (regclass == FP_SECOND_SSE_REGS)
26225 return FP_SECOND_REG;
26226 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26233 /* Generally when we see PLUS here, it's the function invariant
26234 (plus soft-fp const_int). Which can only be computed into general
26236 if (GET_CODE (x) == PLUS)
26237 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26239 /* QImode constants are easy to load, but non-constant QImode data
26240 must go into Q_REGS. */
26241 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26243 if (reg_class_subset_p (regclass, Q_REGS))
26245 if (reg_class_subset_p (Q_REGS, regclass))
26253 /* Discourage putting floating-point values in SSE registers unless
26254 SSE math is being used, and likewise for the 387 registers. */
26256 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26258 enum machine_mode mode = GET_MODE (x);
26260 /* Restrict the output reload class to the register bank that we are doing
26261 math on. If we would like not to return a subset of CLASS, reject this
26262 alternative: if reload cannot do this, it will still use its choice. */
26263 mode = GET_MODE (x);
26264 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26265 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26267 if (X87_FLOAT_MODE_P (mode))
26269 if (regclass == FP_TOP_SSE_REGS)
26271 else if (regclass == FP_SECOND_SSE_REGS)
26272 return FP_SECOND_REG;
26274 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26280 static enum reg_class
26281 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
26282 enum machine_mode mode,
26283 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26285 /* QImode spills from non-QI registers require
26286 intermediate register on 32bit targets. */
26287 if (!in_p && mode == QImode && !TARGET_64BIT
26288 && (rclass == GENERAL_REGS
26289 || rclass == LEGACY_REGS
26290 || rclass == INDEX_REGS))
26299 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26300 regno = true_regnum (x);
26302 /* Return Q_REGS if the operand is in memory. */
26310 /* If we are copying between general and FP registers, we need a memory
26311 location. The same is true for SSE and MMX registers.
26313 To optimize register_move_cost performance, allow inline variant.
26315 The macro can't work reliably when one of the CLASSES is class containing
26316 registers from multiple units (SSE, MMX, integer). We avoid this by never
26317 combining those units in single alternative in the machine description.
26318 Ensure that this constraint holds to avoid unexpected surprises.
26320 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26321 enforce these sanity checks. */
26324 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26325 enum machine_mode mode, int strict)
26327 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26328 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26329 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26330 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26331 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26332 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26334 gcc_assert (!strict);
26338 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26341 /* ??? This is a lie. We do have moves between mmx/general, and for
26342 mmx/sse2. But by saying we need secondary memory we discourage the
26343 register allocator from using the mmx registers unless needed. */
26344 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26347 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26349 /* SSE1 doesn't have any direct moves from other classes. */
26353 /* If the target says that inter-unit moves are more expensive
26354 than moving through memory, then don't generate them. */
26355 if (!TARGET_INTER_UNIT_MOVES)
26358 /* Between SSE and general, we have moves no larger than word size. */
26359 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26367 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26368 enum machine_mode mode, int strict)
26370 return inline_secondary_memory_needed (class1, class2, mode, strict);
26373 /* Return true if the registers in CLASS cannot represent the change from
26374 modes FROM to TO. */
26377 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26378 enum reg_class regclass)
26383 /* x87 registers can't do subreg at all, as all values are reformatted
26384 to extended precision. */
26385 if (MAYBE_FLOAT_CLASS_P (regclass))
26388 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26390 /* Vector registers do not support QI or HImode loads. If we don't
26391 disallow a change to these modes, reload will assume it's ok to
26392 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26393 the vec_dupv4hi pattern. */
26394 if (GET_MODE_SIZE (from) < 4)
26397 /* Vector registers do not support subreg with nonzero offsets, which
26398 are otherwise valid for integer registers. Since we can't see
26399 whether we have a nonzero offset from here, prohibit all
26400 nonparadoxical subregs changing size. */
26401 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26408 /* Return the cost of moving data of mode M between a
26409 register and memory. A value of 2 is the default; this cost is
26410 relative to those in `REGISTER_MOVE_COST'.
26412 This function is used extensively by register_move_cost that is used to
26413 build tables at startup. Make it inline in this case.
26414 When IN is 2, return maximum of in and out move cost.
26416 If moving between registers and memory is more expensive than
26417 between two registers, you should define this macro to express the
26420 Model also increased moving costs of QImode registers in non
26424 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26428 if (FLOAT_CLASS_P (regclass))
26446 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26447 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26449 if (SSE_CLASS_P (regclass))
26452 switch (GET_MODE_SIZE (mode))
26467 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26468 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26470 if (MMX_CLASS_P (regclass))
26473 switch (GET_MODE_SIZE (mode))
26485 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26486 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26488 switch (GET_MODE_SIZE (mode))
26491 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26494 return ix86_cost->int_store[0];
26495 if (TARGET_PARTIAL_REG_DEPENDENCY
26496 && optimize_function_for_speed_p (cfun))
26497 cost = ix86_cost->movzbl_load;
26499 cost = ix86_cost->int_load[0];
26501 return MAX (cost, ix86_cost->int_store[0]);
26507 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26509 return ix86_cost->movzbl_load;
26511 return ix86_cost->int_store[0] + 4;
26516 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26517 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26519 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26520 if (mode == TFmode)
26523 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26525 cost = ix86_cost->int_load[2];
26527 cost = ix86_cost->int_store[2];
26528 return (cost * (((int) GET_MODE_SIZE (mode)
26529 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26534 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26536 return inline_memory_move_cost (mode, regclass, in);
26540 /* Return the cost of moving data from a register in class CLASS1 to
26541 one in class CLASS2.
26543 It is not required that the cost always equal 2 when FROM is the same as TO;
26544 on some machines it is expensive to move between registers if they are not
26545 general registers. */
26548 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26549 enum reg_class class2)
26551 /* In case we require secondary memory, compute cost of the store followed
26552 by load. In order to avoid bad register allocation choices, we need
26553 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26555 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26559 cost += inline_memory_move_cost (mode, class1, 2);
26560 cost += inline_memory_move_cost (mode, class2, 2);
26562 /* In case of copying from general_purpose_register we may emit multiple
26563 stores followed by single load causing memory size mismatch stall.
26564 Count this as arbitrarily high cost of 20. */
26565 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26568 /* In the case of FP/MMX moves, the registers actually overlap, and we
26569 have to switch modes in order to treat them differently. */
26570 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26571 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26577 /* Moves between SSE/MMX and integer unit are expensive. */
26578 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26579 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26581 /* ??? By keeping returned value relatively high, we limit the number
26582 of moves between integer and MMX/SSE registers for all targets.
26583 Additionally, high value prevents problem with x86_modes_tieable_p(),
26584 where integer modes in MMX/SSE registers are not tieable
26585 because of missing QImode and HImode moves to, from or between
26586 MMX/SSE registers. */
26587 return MAX (8, ix86_cost->mmxsse_to_integer);
26589 if (MAYBE_FLOAT_CLASS_P (class1))
26590 return ix86_cost->fp_move;
26591 if (MAYBE_SSE_CLASS_P (class1))
26592 return ix86_cost->sse_move;
26593 if (MAYBE_MMX_CLASS_P (class1))
26594 return ix86_cost->mmx_move;
26598 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26601 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26603 /* Flags and only flags can only hold CCmode values. */
26604 if (CC_REGNO_P (regno))
26605 return GET_MODE_CLASS (mode) == MODE_CC;
26606 if (GET_MODE_CLASS (mode) == MODE_CC
26607 || GET_MODE_CLASS (mode) == MODE_RANDOM
26608 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26610 if (FP_REGNO_P (regno))
26611 return VALID_FP_MODE_P (mode);
26612 if (SSE_REGNO_P (regno))
26614 /* We implement the move patterns for all vector modes into and
26615 out of SSE registers, even when no operation instructions
26616 are available. OImode move is available only when AVX is
26618 return ((TARGET_AVX && mode == OImode)
26619 || VALID_AVX256_REG_MODE (mode)
26620 || VALID_SSE_REG_MODE (mode)
26621 || VALID_SSE2_REG_MODE (mode)
26622 || VALID_MMX_REG_MODE (mode)
26623 || VALID_MMX_REG_MODE_3DNOW (mode));
26625 if (MMX_REGNO_P (regno))
26627 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26628 so if the register is available at all, then we can move data of
26629 the given mode into or out of it. */
26630 return (VALID_MMX_REG_MODE (mode)
26631 || VALID_MMX_REG_MODE_3DNOW (mode));
26634 if (mode == QImode)
26636 /* Take care for QImode values - they can be in non-QI regs,
26637 but then they do cause partial register stalls. */
26638 if (regno <= BX_REG || TARGET_64BIT)
26640 if (!TARGET_PARTIAL_REG_STALL)
26642 return reload_in_progress || reload_completed;
26644 /* We handle both integer and floats in the general purpose registers. */
26645 else if (VALID_INT_MODE_P (mode))
26647 else if (VALID_FP_MODE_P (mode))
26649 else if (VALID_DFP_MODE_P (mode))
26651 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26652 on to use that value in smaller contexts, this can easily force a
26653 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26654 supporting DImode, allow it. */
26655 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26661 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26662 tieable integer mode. */
26665 ix86_tieable_integer_mode_p (enum machine_mode mode)
26674 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26677 return TARGET_64BIT;
26684 /* Return true if MODE1 is accessible in a register that can hold MODE2
26685 without copying. That is, all register classes that can hold MODE2
26686 can also hold MODE1. */
26689 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26691 if (mode1 == mode2)
26694 if (ix86_tieable_integer_mode_p (mode1)
26695 && ix86_tieable_integer_mode_p (mode2))
26698 /* MODE2 being XFmode implies fp stack or general regs, which means we
26699 can tie any smaller floating point modes to it. Note that we do not
26700 tie this with TFmode. */
26701 if (mode2 == XFmode)
26702 return mode1 == SFmode || mode1 == DFmode;
26704 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26705 that we can tie it with SFmode. */
26706 if (mode2 == DFmode)
26707 return mode1 == SFmode;
26709 /* If MODE2 is only appropriate for an SSE register, then tie with
26710 any other mode acceptable to SSE registers. */
26711 if (GET_MODE_SIZE (mode2) == 16
26712 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26713 return (GET_MODE_SIZE (mode1) == 16
26714 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26716 /* If MODE2 is appropriate for an MMX register, then tie
26717 with any other mode acceptable to MMX registers. */
26718 if (GET_MODE_SIZE (mode2) == 8
26719 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26720 return (GET_MODE_SIZE (mode1) == 8
26721 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26726 /* Compute a (partial) cost for rtx X. Return true if the complete
26727 cost has been computed, and false if subexpressions should be
26728 scanned. In either case, *TOTAL contains the cost result. */
26731 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26733 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26734 enum machine_mode mode = GET_MODE (x);
26735 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26743 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26745 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26747 else if (flag_pic && SYMBOLIC_CONST (x)
26749 || (!GET_CODE (x) != LABEL_REF
26750 && (GET_CODE (x) != SYMBOL_REF
26751 || !SYMBOL_REF_LOCAL_P (x)))))
26758 if (mode == VOIDmode)
26761 switch (standard_80387_constant_p (x))
26766 default: /* Other constants */
26771 /* Start with (MEM (SYMBOL_REF)), since that's where
26772 it'll probably end up. Add a penalty for size. */
26773 *total = (COSTS_N_INSNS (1)
26774 + (flag_pic != 0 && !TARGET_64BIT)
26775 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26781 /* The zero extensions is often completely free on x86_64, so make
26782 it as cheap as possible. */
26783 if (TARGET_64BIT && mode == DImode
26784 && GET_MODE (XEXP (x, 0)) == SImode)
26786 else if (TARGET_ZERO_EXTEND_WITH_AND)
26787 *total = cost->add;
26789 *total = cost->movzx;
26793 *total = cost->movsx;
26797 if (CONST_INT_P (XEXP (x, 1))
26798 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26800 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26803 *total = cost->add;
26806 if ((value == 2 || value == 3)
26807 && cost->lea <= cost->shift_const)
26809 *total = cost->lea;
26819 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26821 if (CONST_INT_P (XEXP (x, 1)))
26823 if (INTVAL (XEXP (x, 1)) > 32)
26824 *total = cost->shift_const + COSTS_N_INSNS (2);
26826 *total = cost->shift_const * 2;
26830 if (GET_CODE (XEXP (x, 1)) == AND)
26831 *total = cost->shift_var * 2;
26833 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26838 if (CONST_INT_P (XEXP (x, 1)))
26839 *total = cost->shift_const;
26841 *total = cost->shift_var;
26846 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26848 /* ??? SSE scalar cost should be used here. */
26849 *total = cost->fmul;
26852 else if (X87_FLOAT_MODE_P (mode))
26854 *total = cost->fmul;
26857 else if (FLOAT_MODE_P (mode))
26859 /* ??? SSE vector cost should be used here. */
26860 *total = cost->fmul;
26865 rtx op0 = XEXP (x, 0);
26866 rtx op1 = XEXP (x, 1);
26868 if (CONST_INT_P (XEXP (x, 1)))
26870 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26871 for (nbits = 0; value != 0; value &= value - 1)
26875 /* This is arbitrary. */
26878 /* Compute costs correctly for widening multiplication. */
26879 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26880 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26881 == GET_MODE_SIZE (mode))
26883 int is_mulwiden = 0;
26884 enum machine_mode inner_mode = GET_MODE (op0);
26886 if (GET_CODE (op0) == GET_CODE (op1))
26887 is_mulwiden = 1, op1 = XEXP (op1, 0);
26888 else if (CONST_INT_P (op1))
26890 if (GET_CODE (op0) == SIGN_EXTEND)
26891 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26894 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26898 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26901 *total = (cost->mult_init[MODE_INDEX (mode)]
26902 + nbits * cost->mult_bit
26903 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26912 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26913 /* ??? SSE cost should be used here. */
26914 *total = cost->fdiv;
26915 else if (X87_FLOAT_MODE_P (mode))
26916 *total = cost->fdiv;
26917 else if (FLOAT_MODE_P (mode))
26918 /* ??? SSE vector cost should be used here. */
26919 *total = cost->fdiv;
26921 *total = cost->divide[MODE_INDEX (mode)];
26925 if (GET_MODE_CLASS (mode) == MODE_INT
26926 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26928 if (GET_CODE (XEXP (x, 0)) == PLUS
26929 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26930 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26931 && CONSTANT_P (XEXP (x, 1)))
26933 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26934 if (val == 2 || val == 4 || val == 8)
26936 *total = cost->lea;
26937 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26938 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26939 outer_code, speed);
26940 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26944 else if (GET_CODE (XEXP (x, 0)) == MULT
26945 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26947 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26948 if (val == 2 || val == 4 || val == 8)
26950 *total = cost->lea;
26951 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26952 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26956 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26958 *total = cost->lea;
26959 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26960 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26961 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26968 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26970 /* ??? SSE cost should be used here. */
26971 *total = cost->fadd;
26974 else if (X87_FLOAT_MODE_P (mode))
26976 *total = cost->fadd;
26979 else if (FLOAT_MODE_P (mode))
26981 /* ??? SSE vector cost should be used here. */
26982 *total = cost->fadd;
26990 if (!TARGET_64BIT && mode == DImode)
26992 *total = (cost->add * 2
26993 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26994 << (GET_MODE (XEXP (x, 0)) != DImode))
26995 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26996 << (GET_MODE (XEXP (x, 1)) != DImode)));
27002 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27004 /* ??? SSE cost should be used here. */
27005 *total = cost->fchs;
27008 else if (X87_FLOAT_MODE_P (mode))
27010 *total = cost->fchs;
27013 else if (FLOAT_MODE_P (mode))
27015 /* ??? SSE vector cost should be used here. */
27016 *total = cost->fchs;
27022 if (!TARGET_64BIT && mode == DImode)
27023 *total = cost->add * 2;
27025 *total = cost->add;
27029 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27030 && XEXP (XEXP (x, 0), 1) == const1_rtx
27031 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27032 && XEXP (x, 1) == const0_rtx)
27034 /* This kind of construct is implemented using test[bwl].
27035 Treat it as if we had an AND. */
27036 *total = (cost->add
27037 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27038 + rtx_cost (const1_rtx, outer_code, speed));
27044 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27049 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27050 /* ??? SSE cost should be used here. */
27051 *total = cost->fabs;
27052 else if (X87_FLOAT_MODE_P (mode))
27053 *total = cost->fabs;
27054 else if (FLOAT_MODE_P (mode))
27055 /* ??? SSE vector cost should be used here. */
27056 *total = cost->fabs;
27060 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27061 /* ??? SSE cost should be used here. */
27062 *total = cost->fsqrt;
27063 else if (X87_FLOAT_MODE_P (mode))
27064 *total = cost->fsqrt;
27065 else if (FLOAT_MODE_P (mode))
27066 /* ??? SSE vector cost should be used here. */
27067 *total = cost->fsqrt;
27071 if (XINT (x, 1) == UNSPEC_TP)
27082 static int current_machopic_label_num;
27084 /* Given a symbol name and its associated stub, write out the
27085 definition of the stub. */
27088 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27090 unsigned int length;
27091 char *binder_name, *symbol_name, lazy_ptr_name[32];
27092 int label = ++current_machopic_label_num;
27094 /* For 64-bit we shouldn't get here. */
27095 gcc_assert (!TARGET_64BIT);
27097 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27098 symb = (*targetm.strip_name_encoding) (symb);
27100 length = strlen (stub);
27101 binder_name = XALLOCAVEC (char, length + 32);
27102 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27104 length = strlen (symb);
27105 symbol_name = XALLOCAVEC (char, length + 32);
27106 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27108 sprintf (lazy_ptr_name, "L%d$lz", label);
27111 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27113 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27115 fprintf (file, "%s:\n", stub);
27116 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27120 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27121 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27122 fprintf (file, "\tjmp\t*%%edx\n");
27125 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27127 fprintf (file, "%s:\n", binder_name);
27131 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27132 fprintf (file, "\tpushl\t%%eax\n");
27135 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27137 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
27139 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27140 fprintf (file, "%s:\n", lazy_ptr_name);
27141 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27142 fprintf (file, "\t.long %s\n", binder_name);
27146 darwin_x86_file_end (void)
27148 darwin_file_end ();
27151 #endif /* TARGET_MACHO */
27153 /* Order the registers for register allocator. */
27156 x86_order_regs_for_local_alloc (void)
27161 /* First allocate the local general purpose registers. */
27162 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27163 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27164 reg_alloc_order [pos++] = i;
27166 /* Global general purpose registers. */
27167 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27168 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27169 reg_alloc_order [pos++] = i;
27171 /* x87 registers come first in case we are doing FP math
27173 if (!TARGET_SSE_MATH)
27174 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27175 reg_alloc_order [pos++] = i;
27177 /* SSE registers. */
27178 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27179 reg_alloc_order [pos++] = i;
27180 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27181 reg_alloc_order [pos++] = i;
27183 /* x87 registers. */
27184 if (TARGET_SSE_MATH)
27185 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27186 reg_alloc_order [pos++] = i;
27188 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27189 reg_alloc_order [pos++] = i;
27191 /* Initialize the rest of array as we do not allocate some registers
27193 while (pos < FIRST_PSEUDO_REGISTER)
27194 reg_alloc_order [pos++] = 0;
27197 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27198 struct attribute_spec.handler. */
27200 ix86_handle_abi_attribute (tree *node, tree name,
27201 tree args ATTRIBUTE_UNUSED,
27202 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27204 if (TREE_CODE (*node) != FUNCTION_TYPE
27205 && TREE_CODE (*node) != METHOD_TYPE
27206 && TREE_CODE (*node) != FIELD_DECL
27207 && TREE_CODE (*node) != TYPE_DECL)
27209 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27211 *no_add_attrs = true;
27216 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27218 *no_add_attrs = true;
27222 /* Can combine regparm with all attributes but fastcall. */
27223 if (is_attribute_p ("ms_abi", name))
27225 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27227 error ("ms_abi and sysv_abi attributes are not compatible");
27232 else if (is_attribute_p ("sysv_abi", name))
27234 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27236 error ("ms_abi and sysv_abi attributes are not compatible");
27245 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27246 struct attribute_spec.handler. */
27248 ix86_handle_struct_attribute (tree *node, tree name,
27249 tree args ATTRIBUTE_UNUSED,
27250 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27253 if (DECL_P (*node))
27255 if (TREE_CODE (*node) == TYPE_DECL)
27256 type = &TREE_TYPE (*node);
27261 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27262 || TREE_CODE (*type) == UNION_TYPE)))
27264 warning (OPT_Wattributes, "%qE attribute ignored",
27266 *no_add_attrs = true;
27269 else if ((is_attribute_p ("ms_struct", name)
27270 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27271 || ((is_attribute_p ("gcc_struct", name)
27272 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27274 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27276 *no_add_attrs = true;
27283 ix86_ms_bitfield_layout_p (const_tree record_type)
27285 return (TARGET_MS_BITFIELD_LAYOUT &&
27286 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27287 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27290 /* Returns an expression indicating where the this parameter is
27291 located on entry to the FUNCTION. */
27294 x86_this_parameter (tree function)
27296 tree type = TREE_TYPE (function);
27297 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27302 const int *parm_regs;
27304 if (ix86_function_type_abi (type) == MS_ABI)
27305 parm_regs = x86_64_ms_abi_int_parameter_registers;
27307 parm_regs = x86_64_int_parameter_registers;
27308 return gen_rtx_REG (DImode, parm_regs[aggr]);
27311 nregs = ix86_function_regparm (type, function);
27313 if (nregs > 0 && !stdarg_p (type))
27317 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27318 regno = aggr ? DX_REG : CX_REG;
27326 return gen_rtx_MEM (SImode,
27327 plus_constant (stack_pointer_rtx, 4));
27330 return gen_rtx_REG (SImode, regno);
27333 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27336 /* Determine whether x86_output_mi_thunk can succeed. */
27339 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27340 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27341 HOST_WIDE_INT vcall_offset, const_tree function)
27343 /* 64-bit can handle anything. */
27347 /* For 32-bit, everything's fine if we have one free register. */
27348 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27351 /* Need a free register for vcall_offset. */
27355 /* Need a free register for GOT references. */
27356 if (flag_pic && !(*targetm.binds_local_p) (function))
27359 /* Otherwise ok. */
27363 /* Output the assembler code for a thunk function. THUNK_DECL is the
27364 declaration for the thunk function itself, FUNCTION is the decl for
27365 the target function. DELTA is an immediate constant offset to be
27366 added to THIS. If VCALL_OFFSET is nonzero, the word at
27367 *(*this + vcall_offset) should be added to THIS. */
27370 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27371 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27372 HOST_WIDE_INT vcall_offset, tree function)
27375 rtx this_param = x86_this_parameter (function);
27378 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27379 pull it in now and let DELTA benefit. */
27380 if (REG_P (this_param))
27381 this_reg = this_param;
27382 else if (vcall_offset)
27384 /* Put the this parameter into %eax. */
27385 xops[0] = this_param;
27386 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27387 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27390 this_reg = NULL_RTX;
27392 /* Adjust the this parameter by a fixed constant. */
27395 xops[0] = GEN_INT (delta);
27396 xops[1] = this_reg ? this_reg : this_param;
27399 if (!x86_64_general_operand (xops[0], DImode))
27401 tmp = gen_rtx_REG (DImode, R10_REG);
27403 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27405 xops[1] = this_param;
27407 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27410 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27413 /* Adjust the this parameter by a value stored in the vtable. */
27417 tmp = gen_rtx_REG (DImode, R10_REG);
27420 int tmp_regno = CX_REG;
27421 if (lookup_attribute ("fastcall",
27422 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27423 tmp_regno = AX_REG;
27424 tmp = gen_rtx_REG (SImode, tmp_regno);
27427 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27429 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27431 /* Adjust the this parameter. */
27432 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27433 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27435 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27436 xops[0] = GEN_INT (vcall_offset);
27438 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27439 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27441 xops[1] = this_reg;
27442 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27445 /* If necessary, drop THIS back to its stack slot. */
27446 if (this_reg && this_reg != this_param)
27448 xops[0] = this_reg;
27449 xops[1] = this_param;
27450 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27453 xops[0] = XEXP (DECL_RTL (function), 0);
27456 if (!flag_pic || (*targetm.binds_local_p) (function))
27457 output_asm_insn ("jmp\t%P0", xops);
27458 /* All thunks should be in the same object as their target,
27459 and thus binds_local_p should be true. */
27460 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27461 gcc_unreachable ();
27464 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27465 tmp = gen_rtx_CONST (Pmode, tmp);
27466 tmp = gen_rtx_MEM (QImode, tmp);
27468 output_asm_insn ("jmp\t%A0", xops);
27473 if (!flag_pic || (*targetm.binds_local_p) (function))
27474 output_asm_insn ("jmp\t%P0", xops);
27479 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27480 tmp = (gen_rtx_SYMBOL_REF
27482 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27483 tmp = gen_rtx_MEM (QImode, tmp);
27485 output_asm_insn ("jmp\t%0", xops);
27488 #endif /* TARGET_MACHO */
27490 tmp = gen_rtx_REG (SImode, CX_REG);
27491 output_set_got (tmp, NULL_RTX);
27494 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27495 output_asm_insn ("jmp\t{*}%1", xops);
27501 x86_file_start (void)
27503 default_file_start ();
27505 darwin_file_start ();
27507 if (X86_FILE_START_VERSION_DIRECTIVE)
27508 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27509 if (X86_FILE_START_FLTUSED)
27510 fputs ("\t.global\t__fltused\n", asm_out_file);
27511 if (ix86_asm_dialect == ASM_INTEL)
27512 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27516 x86_field_alignment (tree field, int computed)
27518 enum machine_mode mode;
27519 tree type = TREE_TYPE (field);
27521 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27523 mode = TYPE_MODE (strip_array_types (type));
27524 if (mode == DFmode || mode == DCmode
27525 || GET_MODE_CLASS (mode) == MODE_INT
27526 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27527 return MIN (32, computed);
27531 /* Output assembler code to FILE to increment profiler label # LABELNO
27532 for profiling a function entry. */
27534 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27538 #ifndef NO_PROFILE_COUNTERS
27539 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27542 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27543 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27545 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27549 #ifndef NO_PROFILE_COUNTERS
27550 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27551 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27553 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27557 #ifndef NO_PROFILE_COUNTERS
27558 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27559 PROFILE_COUNT_REGISTER);
27561 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27565 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27566 /* We don't have exact information about the insn sizes, but we may assume
27567 quite safely that we are informed about all 1 byte insns and memory
27568 address sizes. This is enough to eliminate unnecessary padding in
27572 min_insn_size (rtx insn)
27576 if (!INSN_P (insn) || !active_insn_p (insn))
27579 /* Discard alignments we've emit and jump instructions. */
27580 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27581 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27583 if (JUMP_TABLE_DATA_P (insn))
27586 /* Important case - calls are always 5 bytes.
27587 It is common to have many calls in the row. */
27589 && symbolic_reference_mentioned_p (PATTERN (insn))
27590 && !SIBLING_CALL_P (insn))
27592 len = get_attr_length (insn);
27596 /* For normal instructions we rely on get_attr_length being exact,
27597 with a few exceptions. */
27598 if (!JUMP_P (insn))
27600 enum attr_type type = get_attr_type (insn);
27605 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27606 || asm_noperands (PATTERN (insn)) >= 0)
27613 /* Otherwise trust get_attr_length. */
27617 l = get_attr_length_address (insn);
27618 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27627 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27631 ix86_avoid_jump_mispredicts (void)
27633 rtx insn, start = get_insns ();
27634 int nbytes = 0, njumps = 0;
27637 /* Look for all minimal intervals of instructions containing 4 jumps.
27638 The intervals are bounded by START and INSN. NBYTES is the total
27639 size of instructions in the interval including INSN and not including
27640 START. When the NBYTES is smaller than 16 bytes, it is possible
27641 that the end of START and INSN ends up in the same 16byte page.
27643 The smallest offset in the page INSN can start is the case where START
27644 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27645 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27647 for (insn = start; insn; insn = NEXT_INSN (insn))
27651 if (LABEL_P (insn))
27653 int align = label_to_alignment (insn);
27654 int max_skip = label_to_max_skip (insn);
27658 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27659 already in the current 16 byte page, because otherwise
27660 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27661 bytes to reach 16 byte boundary. */
27663 || (align <= 3 && max_skip != (1 << align) - 1))
27666 fprintf (dump_file, "Label %i with max_skip %i\n",
27667 INSN_UID (insn), max_skip);
27670 while (nbytes + max_skip >= 16)
27672 start = NEXT_INSN (start);
27673 if ((JUMP_P (start)
27674 && GET_CODE (PATTERN (start)) != ADDR_VEC
27675 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27677 njumps--, isjump = 1;
27680 nbytes -= min_insn_size (start);
27686 min_size = min_insn_size (insn);
27687 nbytes += min_size;
27689 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27690 INSN_UID (insn), min_size);
27692 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27693 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27701 start = NEXT_INSN (start);
27702 if ((JUMP_P (start)
27703 && GET_CODE (PATTERN (start)) != ADDR_VEC
27704 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27706 njumps--, isjump = 1;
27709 nbytes -= min_insn_size (start);
27711 gcc_assert (njumps >= 0);
27713 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27714 INSN_UID (start), INSN_UID (insn), nbytes);
27716 if (njumps == 3 && isjump && nbytes < 16)
27718 int padsize = 15 - nbytes + min_insn_size (insn);
27721 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27722 INSN_UID (insn), padsize);
27723 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27729 /* AMD Athlon works faster
27730 when RET is not destination of conditional jump or directly preceded
27731 by other jump instruction. We avoid the penalty by inserting NOP just
27732 before the RET instructions in such cases. */
27734 ix86_pad_returns (void)
27739 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27741 basic_block bb = e->src;
27742 rtx ret = BB_END (bb);
27744 bool replace = false;
27746 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27747 || optimize_bb_for_size_p (bb))
27749 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27750 if (active_insn_p (prev) || LABEL_P (prev))
27752 if (prev && LABEL_P (prev))
27757 FOR_EACH_EDGE (e, ei, bb->preds)
27758 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27759 && !(e->flags & EDGE_FALLTHRU))
27764 prev = prev_active_insn (ret);
27766 && ((JUMP_P (prev) && any_condjump_p (prev))
27769 /* Empty functions get branch mispredict even when the jump destination
27770 is not visible to us. */
27771 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27776 emit_jump_insn_before (gen_return_internal_long (), ret);
27782 /* Implement machine specific optimizations. We implement padding of returns
27783 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27787 if (optimize && optimize_function_for_speed_p (cfun))
27789 if (TARGET_PAD_RETURNS)
27790 ix86_pad_returns ();
27791 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27792 if (TARGET_FOUR_JUMP_LIMIT)
27793 ix86_avoid_jump_mispredicts ();
27798 /* Return nonzero when QImode register that must be represented via REX prefix
27801 x86_extended_QIreg_mentioned_p (rtx insn)
27804 extract_insn_cached (insn);
27805 for (i = 0; i < recog_data.n_operands; i++)
27806 if (REG_P (recog_data.operand[i])
27807 && REGNO (recog_data.operand[i]) > BX_REG)
27812 /* Return nonzero when P points to register encoded via REX prefix.
27813 Called via for_each_rtx. */
27815 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27817 unsigned int regno;
27820 regno = REGNO (*p);
27821 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27824 /* Return true when INSN mentions register that must be encoded using REX
27827 x86_extended_reg_mentioned_p (rtx insn)
27829 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27830 extended_reg_mentioned_1, NULL);
27833 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27834 optabs would emit if we didn't have TFmode patterns. */
27837 x86_emit_floatuns (rtx operands[2])
27839 rtx neglab, donelab, i0, i1, f0, in, out;
27840 enum machine_mode mode, inmode;
27842 inmode = GET_MODE (operands[1]);
27843 gcc_assert (inmode == SImode || inmode == DImode);
27846 in = force_reg (inmode, operands[1]);
27847 mode = GET_MODE (out);
27848 neglab = gen_label_rtx ();
27849 donelab = gen_label_rtx ();
27850 f0 = gen_reg_rtx (mode);
27852 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27854 expand_float (out, in, 0);
27856 emit_jump_insn (gen_jump (donelab));
27859 emit_label (neglab);
27861 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27863 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27865 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27867 expand_float (f0, i0, 0);
27869 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27871 emit_label (donelab);
27874 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27875 with all elements equal to VAR. Return true if successful. */
27878 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27879 rtx target, rtx val)
27881 enum machine_mode hmode, smode, wsmode, wvmode;
27896 val = force_reg (GET_MODE_INNER (mode), val);
27897 x = gen_rtx_VEC_DUPLICATE (mode, val);
27898 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27904 if (TARGET_SSE || TARGET_3DNOW_A)
27906 val = gen_lowpart (SImode, val);
27907 x = gen_rtx_TRUNCATE (HImode, val);
27908 x = gen_rtx_VEC_DUPLICATE (mode, x);
27909 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27931 /* Extend HImode to SImode using a paradoxical SUBREG. */
27932 tmp1 = gen_reg_rtx (SImode);
27933 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27934 /* Insert the SImode value as low element of V4SImode vector. */
27935 tmp2 = gen_reg_rtx (V4SImode);
27936 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27937 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27938 CONST0_RTX (V4SImode),
27940 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27941 /* Cast the V4SImode vector back to a V8HImode vector. */
27942 tmp1 = gen_reg_rtx (V8HImode);
27943 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27944 /* Duplicate the low short through the whole low SImode word. */
27945 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27946 /* Cast the V8HImode vector back to a V4SImode vector. */
27947 tmp2 = gen_reg_rtx (V4SImode);
27948 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27949 /* Replicate the low element of the V4SImode vector. */
27950 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27951 /* Cast the V2SImode back to V8HImode, and store in target. */
27952 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27963 /* Extend QImode to SImode using a paradoxical SUBREG. */
27964 tmp1 = gen_reg_rtx (SImode);
27965 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27966 /* Insert the SImode value as low element of V4SImode vector. */
27967 tmp2 = gen_reg_rtx (V4SImode);
27968 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27969 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27970 CONST0_RTX (V4SImode),
27972 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27973 /* Cast the V4SImode vector back to a V16QImode vector. */
27974 tmp1 = gen_reg_rtx (V16QImode);
27975 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27976 /* Duplicate the low byte through the whole low SImode word. */
27977 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27978 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27979 /* Cast the V16QImode vector back to a V4SImode vector. */
27980 tmp2 = gen_reg_rtx (V4SImode);
27981 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27982 /* Replicate the low element of the V4SImode vector. */
27983 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27984 /* Cast the V2SImode back to V16QImode, and store in target. */
27985 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27993 /* Replicate the value once into the next wider mode and recurse. */
27994 val = convert_modes (wsmode, smode, val, true);
27995 x = expand_simple_binop (wsmode, ASHIFT, val,
27996 GEN_INT (GET_MODE_BITSIZE (smode)),
27997 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27998 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28000 x = gen_reg_rtx (wvmode);
28001 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
28002 gcc_unreachable ();
28003 emit_move_insn (target, gen_lowpart (mode, x));
28026 rtx tmp = gen_reg_rtx (hmode);
28027 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
28028 emit_insn (gen_rtx_SET (VOIDmode, target,
28029 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
28038 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28039 whose ONE_VAR element is VAR, and other elements are zero. Return true
28043 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28044 rtx target, rtx var, int one_var)
28046 enum machine_mode vsimode;
28049 bool use_vector_set = false;
28054 /* For SSE4.1, we normally use vector set. But if the second
28055 element is zero and inter-unit moves are OK, we use movq
28057 use_vector_set = (TARGET_64BIT
28059 && !(TARGET_INTER_UNIT_MOVES
28065 use_vector_set = TARGET_SSE4_1;
28068 use_vector_set = TARGET_SSE2;
28071 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28078 use_vector_set = TARGET_AVX;
28081 /* Use ix86_expand_vector_set in 64bit mode only. */
28082 use_vector_set = TARGET_AVX && TARGET_64BIT;
28088 if (use_vector_set)
28090 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28091 var = force_reg (GET_MODE_INNER (mode), var);
28092 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28108 var = force_reg (GET_MODE_INNER (mode), var);
28109 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28110 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28115 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28116 new_target = gen_reg_rtx (mode);
28118 new_target = target;
28119 var = force_reg (GET_MODE_INNER (mode), var);
28120 x = gen_rtx_VEC_DUPLICATE (mode, var);
28121 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28122 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28125 /* We need to shuffle the value to the correct position, so
28126 create a new pseudo to store the intermediate result. */
28128 /* With SSE2, we can use the integer shuffle insns. */
28129 if (mode != V4SFmode && TARGET_SSE2)
28131 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28133 GEN_INT (one_var == 1 ? 0 : 1),
28134 GEN_INT (one_var == 2 ? 0 : 1),
28135 GEN_INT (one_var == 3 ? 0 : 1)));
28136 if (target != new_target)
28137 emit_move_insn (target, new_target);
28141 /* Otherwise convert the intermediate result to V4SFmode and
28142 use the SSE1 shuffle instructions. */
28143 if (mode != V4SFmode)
28145 tmp = gen_reg_rtx (V4SFmode);
28146 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28151 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28153 GEN_INT (one_var == 1 ? 0 : 1),
28154 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28155 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28157 if (mode != V4SFmode)
28158 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28159 else if (tmp != target)
28160 emit_move_insn (target, tmp);
28162 else if (target != new_target)
28163 emit_move_insn (target, new_target);
28168 vsimode = V4SImode;
28174 vsimode = V2SImode;
28180 /* Zero extend the variable element to SImode and recurse. */
28181 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28183 x = gen_reg_rtx (vsimode);
28184 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28186 gcc_unreachable ();
28188 emit_move_insn (target, gen_lowpart (mode, x));
28196 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28197 consisting of the values in VALS. It is known that all elements
28198 except ONE_VAR are constants. Return true if successful. */
28201 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28202 rtx target, rtx vals, int one_var)
28204 rtx var = XVECEXP (vals, 0, one_var);
28205 enum machine_mode wmode;
28208 const_vec = copy_rtx (vals);
28209 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28210 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28218 /* For the two element vectors, it's just as easy to use
28219 the general case. */
28223 /* Use ix86_expand_vector_set in 64bit mode only. */
28246 /* There's no way to set one QImode entry easily. Combine
28247 the variable value with its adjacent constant value, and
28248 promote to an HImode set. */
28249 x = XVECEXP (vals, 0, one_var ^ 1);
28252 var = convert_modes (HImode, QImode, var, true);
28253 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28254 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28255 x = GEN_INT (INTVAL (x) & 0xff);
28259 var = convert_modes (HImode, QImode, var, true);
28260 x = gen_int_mode (INTVAL (x) << 8, HImode);
28262 if (x != const0_rtx)
28263 var = expand_simple_binop (HImode, IOR, var, x, var,
28264 1, OPTAB_LIB_WIDEN);
28266 x = gen_reg_rtx (wmode);
28267 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28268 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28270 emit_move_insn (target, gen_lowpart (mode, x));
28277 emit_move_insn (target, const_vec);
28278 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28282 /* A subroutine of ix86_expand_vector_init_general. Use vector
28283 concatenate to handle the most general case: all values variable,
28284 and none identical. */
28287 ix86_expand_vector_init_concat (enum machine_mode mode,
28288 rtx target, rtx *ops, int n)
28290 enum machine_mode cmode, hmode = VOIDmode;
28291 rtx first[8], second[4];
28331 gcc_unreachable ();
28334 if (!register_operand (ops[1], cmode))
28335 ops[1] = force_reg (cmode, ops[1]);
28336 if (!register_operand (ops[0], cmode))
28337 ops[0] = force_reg (cmode, ops[0]);
28338 emit_insn (gen_rtx_SET (VOIDmode, target,
28339 gen_rtx_VEC_CONCAT (mode, ops[0],
28359 gcc_unreachable ();
28375 gcc_unreachable ();
28380 /* FIXME: We process inputs backward to help RA. PR 36222. */
28383 for (; i > 0; i -= 2, j--)
28385 first[j] = gen_reg_rtx (cmode);
28386 v = gen_rtvec (2, ops[i - 1], ops[i]);
28387 ix86_expand_vector_init (false, first[j],
28388 gen_rtx_PARALLEL (cmode, v));
28394 gcc_assert (hmode != VOIDmode);
28395 for (i = j = 0; i < n; i += 2, j++)
28397 second[j] = gen_reg_rtx (hmode);
28398 ix86_expand_vector_init_concat (hmode, second [j],
28402 ix86_expand_vector_init_concat (mode, target, second, n);
28405 ix86_expand_vector_init_concat (mode, target, first, n);
28409 gcc_unreachable ();
28413 /* A subroutine of ix86_expand_vector_init_general. Use vector
28414 interleave to handle the most general case: all values variable,
28415 and none identical. */
28418 ix86_expand_vector_init_interleave (enum machine_mode mode,
28419 rtx target, rtx *ops, int n)
28421 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28424 rtx (*gen_load_even) (rtx, rtx, rtx);
28425 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28426 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28431 gen_load_even = gen_vec_setv8hi;
28432 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28433 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28434 inner_mode = HImode;
28435 first_imode = V4SImode;
28436 second_imode = V2DImode;
28437 third_imode = VOIDmode;
28440 gen_load_even = gen_vec_setv16qi;
28441 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28442 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28443 inner_mode = QImode;
28444 first_imode = V8HImode;
28445 second_imode = V4SImode;
28446 third_imode = V2DImode;
28449 gcc_unreachable ();
28452 for (i = 0; i < n; i++)
28454 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28455 op0 = gen_reg_rtx (SImode);
28456 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28458 /* Insert the SImode value as low element of V4SImode vector. */
28459 op1 = gen_reg_rtx (V4SImode);
28460 op0 = gen_rtx_VEC_MERGE (V4SImode,
28461 gen_rtx_VEC_DUPLICATE (V4SImode,
28463 CONST0_RTX (V4SImode),
28465 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28467 /* Cast the V4SImode vector back to a vector in orignal mode. */
28468 op0 = gen_reg_rtx (mode);
28469 emit_move_insn (op0, gen_lowpart (mode, op1));
28471 /* Load even elements into the second positon. */
28472 emit_insn ((*gen_load_even) (op0,
28473 force_reg (inner_mode,
28477 /* Cast vector to FIRST_IMODE vector. */
28478 ops[i] = gen_reg_rtx (first_imode);
28479 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28482 /* Interleave low FIRST_IMODE vectors. */
28483 for (i = j = 0; i < n; i += 2, j++)
28485 op0 = gen_reg_rtx (first_imode);
28486 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28488 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28489 ops[j] = gen_reg_rtx (second_imode);
28490 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28493 /* Interleave low SECOND_IMODE vectors. */
28494 switch (second_imode)
28497 for (i = j = 0; i < n / 2; i += 2, j++)
28499 op0 = gen_reg_rtx (second_imode);
28500 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28503 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28505 ops[j] = gen_reg_rtx (third_imode);
28506 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28508 second_imode = V2DImode;
28509 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28513 op0 = gen_reg_rtx (second_imode);
28514 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28517 /* Cast the SECOND_IMODE vector back to a vector on original
28519 emit_insn (gen_rtx_SET (VOIDmode, target,
28520 gen_lowpart (mode, op0)));
28524 gcc_unreachable ();
28528 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28529 all values variable, and none identical. */
28532 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28533 rtx target, rtx vals)
28535 rtx ops[32], op0, op1;
28536 enum machine_mode half_mode = VOIDmode;
28543 if (!mmx_ok && !TARGET_SSE)
28555 n = GET_MODE_NUNITS (mode);
28556 for (i = 0; i < n; i++)
28557 ops[i] = XVECEXP (vals, 0, i);
28558 ix86_expand_vector_init_concat (mode, target, ops, n);
28562 half_mode = V16QImode;
28566 half_mode = V8HImode;
28570 n = GET_MODE_NUNITS (mode);
28571 for (i = 0; i < n; i++)
28572 ops[i] = XVECEXP (vals, 0, i);
28573 op0 = gen_reg_rtx (half_mode);
28574 op1 = gen_reg_rtx (half_mode);
28575 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28577 ix86_expand_vector_init_interleave (half_mode, op1,
28578 &ops [n >> 1], n >> 2);
28579 emit_insn (gen_rtx_SET (VOIDmode, target,
28580 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28584 if (!TARGET_SSE4_1)
28592 /* Don't use ix86_expand_vector_init_interleave if we can't
28593 move from GPR to SSE register directly. */
28594 if (!TARGET_INTER_UNIT_MOVES)
28597 n = GET_MODE_NUNITS (mode);
28598 for (i = 0; i < n; i++)
28599 ops[i] = XVECEXP (vals, 0, i);
28600 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28608 gcc_unreachable ();
28612 int i, j, n_elts, n_words, n_elt_per_word;
28613 enum machine_mode inner_mode;
28614 rtx words[4], shift;
28616 inner_mode = GET_MODE_INNER (mode);
28617 n_elts = GET_MODE_NUNITS (mode);
28618 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28619 n_elt_per_word = n_elts / n_words;
28620 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28622 for (i = 0; i < n_words; ++i)
28624 rtx word = NULL_RTX;
28626 for (j = 0; j < n_elt_per_word; ++j)
28628 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28629 elt = convert_modes (word_mode, inner_mode, elt, true);
28635 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28636 word, 1, OPTAB_LIB_WIDEN);
28637 word = expand_simple_binop (word_mode, IOR, word, elt,
28638 word, 1, OPTAB_LIB_WIDEN);
28646 emit_move_insn (target, gen_lowpart (mode, words[0]));
28647 else if (n_words == 2)
28649 rtx tmp = gen_reg_rtx (mode);
28650 emit_clobber (tmp);
28651 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28652 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28653 emit_move_insn (target, tmp);
28655 else if (n_words == 4)
28657 rtx tmp = gen_reg_rtx (V4SImode);
28658 gcc_assert (word_mode == SImode);
28659 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28660 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28661 emit_move_insn (target, gen_lowpart (mode, tmp));
28664 gcc_unreachable ();
28668 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28669 instructions unless MMX_OK is true. */
28672 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28674 enum machine_mode mode = GET_MODE (target);
28675 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28676 int n_elts = GET_MODE_NUNITS (mode);
28677 int n_var = 0, one_var = -1;
28678 bool all_same = true, all_const_zero = true;
28682 for (i = 0; i < n_elts; ++i)
28684 x = XVECEXP (vals, 0, i);
28685 if (!(CONST_INT_P (x)
28686 || GET_CODE (x) == CONST_DOUBLE
28687 || GET_CODE (x) == CONST_FIXED))
28688 n_var++, one_var = i;
28689 else if (x != CONST0_RTX (inner_mode))
28690 all_const_zero = false;
28691 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28695 /* Constants are best loaded from the constant pool. */
28698 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28702 /* If all values are identical, broadcast the value. */
28704 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28705 XVECEXP (vals, 0, 0)))
28708 /* Values where only one field is non-constant are best loaded from
28709 the pool and overwritten via move later. */
28713 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28714 XVECEXP (vals, 0, one_var),
28718 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28722 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28726 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28728 enum machine_mode mode = GET_MODE (target);
28729 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28730 enum machine_mode half_mode;
28731 bool use_vec_merge = false;
28733 static rtx (*gen_extract[6][2]) (rtx, rtx)
28735 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28736 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28737 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28738 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28739 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28740 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28742 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28744 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28745 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28746 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28747 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28748 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28749 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28759 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28760 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28762 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28764 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28765 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28771 use_vec_merge = TARGET_SSE4_1;
28779 /* For the two element vectors, we implement a VEC_CONCAT with
28780 the extraction of the other element. */
28782 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28783 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28786 op0 = val, op1 = tmp;
28788 op0 = tmp, op1 = val;
28790 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28791 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28796 use_vec_merge = TARGET_SSE4_1;
28803 use_vec_merge = true;
28807 /* tmp = target = A B C D */
28808 tmp = copy_to_reg (target);
28809 /* target = A A B B */
28810 emit_insn (gen_sse_unpcklps (target, target, target));
28811 /* target = X A B B */
28812 ix86_expand_vector_set (false, target, val, 0);
28813 /* target = A X C D */
28814 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28815 GEN_INT (1), GEN_INT (0),
28816 GEN_INT (2+4), GEN_INT (3+4)));
28820 /* tmp = target = A B C D */
28821 tmp = copy_to_reg (target);
28822 /* tmp = X B C D */
28823 ix86_expand_vector_set (false, tmp, val, 0);
28824 /* target = A B X D */
28825 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28826 GEN_INT (0), GEN_INT (1),
28827 GEN_INT (0+4), GEN_INT (3+4)));
28831 /* tmp = target = A B C D */
28832 tmp = copy_to_reg (target);
28833 /* tmp = X B C D */
28834 ix86_expand_vector_set (false, tmp, val, 0);
28835 /* target = A B X D */
28836 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28837 GEN_INT (0), GEN_INT (1),
28838 GEN_INT (2+4), GEN_INT (0+4)));
28842 gcc_unreachable ();
28847 use_vec_merge = TARGET_SSE4_1;
28851 /* Element 0 handled by vec_merge below. */
28854 use_vec_merge = true;
28860 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28861 store into element 0, then shuffle them back. */
28865 order[0] = GEN_INT (elt);
28866 order[1] = const1_rtx;
28867 order[2] = const2_rtx;
28868 order[3] = GEN_INT (3);
28869 order[elt] = const0_rtx;
28871 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28872 order[1], order[2], order[3]));
28874 ix86_expand_vector_set (false, target, val, 0);
28876 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28877 order[1], order[2], order[3]));
28881 /* For SSE1, we have to reuse the V4SF code. */
28882 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28883 gen_lowpart (SFmode, val), elt);
28888 use_vec_merge = TARGET_SSE2;
28891 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28895 use_vec_merge = TARGET_SSE4_1;
28902 half_mode = V16QImode;
28908 half_mode = V8HImode;
28914 half_mode = V4SImode;
28920 half_mode = V2DImode;
28926 half_mode = V4SFmode;
28932 half_mode = V2DFmode;
28938 /* Compute offset. */
28942 gcc_assert (i <= 1);
28944 /* Extract the half. */
28945 tmp = gen_reg_rtx (half_mode);
28946 emit_insn ((*gen_extract[j][i]) (tmp, target));
28948 /* Put val in tmp at elt. */
28949 ix86_expand_vector_set (false, tmp, val, elt);
28952 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28961 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28962 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28963 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28967 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28969 emit_move_insn (mem, target);
28971 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28972 emit_move_insn (tmp, val);
28974 emit_move_insn (target, mem);
28979 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28981 enum machine_mode mode = GET_MODE (vec);
28982 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28983 bool use_vec_extr = false;
28996 use_vec_extr = true;
29000 use_vec_extr = TARGET_SSE4_1;
29012 tmp = gen_reg_rtx (mode);
29013 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29014 GEN_INT (elt), GEN_INT (elt),
29015 GEN_INT (elt+4), GEN_INT (elt+4)));
29019 tmp = gen_reg_rtx (mode);
29020 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
29024 gcc_unreachable ();
29027 use_vec_extr = true;
29032 use_vec_extr = TARGET_SSE4_1;
29046 tmp = gen_reg_rtx (mode);
29047 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29048 GEN_INT (elt), GEN_INT (elt),
29049 GEN_INT (elt), GEN_INT (elt)));
29053 tmp = gen_reg_rtx (mode);
29054 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
29058 gcc_unreachable ();
29061 use_vec_extr = true;
29066 /* For SSE1, we have to reuse the V4SF code. */
29067 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29068 gen_lowpart (V4SFmode, vec), elt);
29074 use_vec_extr = TARGET_SSE2;
29077 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29081 use_vec_extr = TARGET_SSE4_1;
29085 /* ??? Could extract the appropriate HImode element and shift. */
29092 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29093 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29095 /* Let the rtl optimizers know about the zero extension performed. */
29096 if (inner_mode == QImode || inner_mode == HImode)
29098 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29099 target = gen_lowpart (SImode, target);
29102 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29106 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29108 emit_move_insn (mem, vec);
29110 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29111 emit_move_insn (target, tmp);
29115 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29116 pattern to reduce; DEST is the destination; IN is the input vector. */
29119 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29121 rtx tmp1, tmp2, tmp3;
29123 tmp1 = gen_reg_rtx (V4SFmode);
29124 tmp2 = gen_reg_rtx (V4SFmode);
29125 tmp3 = gen_reg_rtx (V4SFmode);
29127 emit_insn (gen_sse_movhlps (tmp1, in, in));
29128 emit_insn (fn (tmp2, tmp1, in));
29130 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29131 GEN_INT (1), GEN_INT (1),
29132 GEN_INT (1+4), GEN_INT (1+4)));
29133 emit_insn (fn (dest, tmp2, tmp3));
29136 /* Target hook for scalar_mode_supported_p. */
29138 ix86_scalar_mode_supported_p (enum machine_mode mode)
29140 if (DECIMAL_FLOAT_MODE_P (mode))
29142 else if (mode == TFmode)
29145 return default_scalar_mode_supported_p (mode);
29148 /* Implements target hook vector_mode_supported_p. */
29150 ix86_vector_mode_supported_p (enum machine_mode mode)
29152 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29154 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29156 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29158 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29160 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29165 /* Target hook for c_mode_for_suffix. */
29166 static enum machine_mode
29167 ix86_c_mode_for_suffix (char suffix)
29177 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29179 We do this in the new i386 backend to maintain source compatibility
29180 with the old cc0-based compiler. */
29183 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29184 tree inputs ATTRIBUTE_UNUSED,
29187 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29189 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29194 /* Implements target vector targetm.asm.encode_section_info. This
29195 is not used by netware. */
29197 static void ATTRIBUTE_UNUSED
29198 ix86_encode_section_info (tree decl, rtx rtl, int first)
29200 default_encode_section_info (decl, rtl, first);
29202 if (TREE_CODE (decl) == VAR_DECL
29203 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29204 && ix86_in_large_data_p (decl))
29205 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29208 /* Worker function for REVERSE_CONDITION. */
29211 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29213 return (mode != CCFPmode && mode != CCFPUmode
29214 ? reverse_condition (code)
29215 : reverse_condition_maybe_unordered (code));
29218 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29222 output_387_reg_move (rtx insn, rtx *operands)
29224 if (REG_P (operands[0]))
29226 if (REG_P (operands[1])
29227 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29229 if (REGNO (operands[0]) == FIRST_STACK_REG)
29230 return output_387_ffreep (operands, 0);
29231 return "fstp\t%y0";
29233 if (STACK_TOP_P (operands[0]))
29234 return "fld%Z1\t%y1";
29237 else if (MEM_P (operands[0]))
29239 gcc_assert (REG_P (operands[1]));
29240 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29241 return "fstp%Z0\t%y0";
29244 /* There is no non-popping store to memory for XFmode.
29245 So if we need one, follow the store with a load. */
29246 if (GET_MODE (operands[0]) == XFmode)
29247 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29249 return "fst%Z0\t%y0";
29256 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29257 FP status register is set. */
29260 ix86_emit_fp_unordered_jump (rtx label)
29262 rtx reg = gen_reg_rtx (HImode);
29265 emit_insn (gen_x86_fnstsw_1 (reg));
29267 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29269 emit_insn (gen_x86_sahf_1 (reg));
29271 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29272 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29276 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29278 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29279 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29282 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29283 gen_rtx_LABEL_REF (VOIDmode, label),
29285 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29287 emit_jump_insn (temp);
29288 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29291 /* Output code to perform a log1p XFmode calculation. */
29293 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29295 rtx label1 = gen_label_rtx ();
29296 rtx label2 = gen_label_rtx ();
29298 rtx tmp = gen_reg_rtx (XFmode);
29299 rtx tmp2 = gen_reg_rtx (XFmode);
29302 emit_insn (gen_absxf2 (tmp, op1));
29303 test = gen_rtx_GE (VOIDmode, tmp,
29304 CONST_DOUBLE_FROM_REAL_VALUE (
29305 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29307 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29309 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29310 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29311 emit_jump (label2);
29313 emit_label (label1);
29314 emit_move_insn (tmp, CONST1_RTX (XFmode));
29315 emit_insn (gen_addxf3 (tmp, op1, tmp));
29316 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29317 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29319 emit_label (label2);
29322 /* Output code to perform a Newton-Rhapson approximation of a single precision
29323 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29325 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29327 rtx x0, x1, e0, e1, two;
29329 x0 = gen_reg_rtx (mode);
29330 e0 = gen_reg_rtx (mode);
29331 e1 = gen_reg_rtx (mode);
29332 x1 = gen_reg_rtx (mode);
29334 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29336 if (VECTOR_MODE_P (mode))
29337 two = ix86_build_const_vector (SFmode, true, two);
29339 two = force_reg (mode, two);
29341 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29343 /* x0 = rcp(b) estimate */
29344 emit_insn (gen_rtx_SET (VOIDmode, x0,
29345 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29348 emit_insn (gen_rtx_SET (VOIDmode, e0,
29349 gen_rtx_MULT (mode, x0, b)));
29351 emit_insn (gen_rtx_SET (VOIDmode, e1,
29352 gen_rtx_MINUS (mode, two, e0)));
29354 emit_insn (gen_rtx_SET (VOIDmode, x1,
29355 gen_rtx_MULT (mode, x0, e1)));
29357 emit_insn (gen_rtx_SET (VOIDmode, res,
29358 gen_rtx_MULT (mode, a, x1)));
29361 /* Output code to perform a Newton-Rhapson approximation of a
29362 single precision floating point [reciprocal] square root. */
29364 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29367 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29370 x0 = gen_reg_rtx (mode);
29371 e0 = gen_reg_rtx (mode);
29372 e1 = gen_reg_rtx (mode);
29373 e2 = gen_reg_rtx (mode);
29374 e3 = gen_reg_rtx (mode);
29376 real_from_integer (&r, VOIDmode, -3, -1, 0);
29377 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29379 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29380 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29382 if (VECTOR_MODE_P (mode))
29384 mthree = ix86_build_const_vector (SFmode, true, mthree);
29385 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29388 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29389 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29391 /* x0 = rsqrt(a) estimate */
29392 emit_insn (gen_rtx_SET (VOIDmode, x0,
29393 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29396 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29401 zero = gen_reg_rtx (mode);
29402 mask = gen_reg_rtx (mode);
29404 zero = force_reg (mode, CONST0_RTX(mode));
29405 emit_insn (gen_rtx_SET (VOIDmode, mask,
29406 gen_rtx_NE (mode, zero, a)));
29408 emit_insn (gen_rtx_SET (VOIDmode, x0,
29409 gen_rtx_AND (mode, x0, mask)));
29413 emit_insn (gen_rtx_SET (VOIDmode, e0,
29414 gen_rtx_MULT (mode, x0, a)));
29416 emit_insn (gen_rtx_SET (VOIDmode, e1,
29417 gen_rtx_MULT (mode, e0, x0)));
29420 mthree = force_reg (mode, mthree);
29421 emit_insn (gen_rtx_SET (VOIDmode, e2,
29422 gen_rtx_PLUS (mode, e1, mthree)));
29424 mhalf = force_reg (mode, mhalf);
29426 /* e3 = -.5 * x0 */
29427 emit_insn (gen_rtx_SET (VOIDmode, e3,
29428 gen_rtx_MULT (mode, x0, mhalf)));
29430 /* e3 = -.5 * e0 */
29431 emit_insn (gen_rtx_SET (VOIDmode, e3,
29432 gen_rtx_MULT (mode, e0, mhalf)));
29433 /* ret = e2 * e3 */
29434 emit_insn (gen_rtx_SET (VOIDmode, res,
29435 gen_rtx_MULT (mode, e2, e3)));
29438 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29440 static void ATTRIBUTE_UNUSED
29441 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29444 /* With Binutils 2.15, the "@unwind" marker must be specified on
29445 every occurrence of the ".eh_frame" section, not just the first
29448 && strcmp (name, ".eh_frame") == 0)
29450 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29451 flags & SECTION_WRITE ? "aw" : "a");
29454 default_elf_asm_named_section (name, flags, decl);
29457 /* Return the mangling of TYPE if it is an extended fundamental type. */
29459 static const char *
29460 ix86_mangle_type (const_tree type)
29462 type = TYPE_MAIN_VARIANT (type);
29464 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29465 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29468 switch (TYPE_MODE (type))
29471 /* __float128 is "g". */
29474 /* "long double" or __float80 is "e". */
29481 /* For 32-bit code we can save PIC register setup by using
29482 __stack_chk_fail_local hidden function instead of calling
29483 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29484 register, so it is better to call __stack_chk_fail directly. */
29487 ix86_stack_protect_fail (void)
29489 return TARGET_64BIT
29490 ? default_external_stack_protect_fail ()
29491 : default_hidden_stack_protect_fail ();
29494 /* Select a format to encode pointers in exception handling data. CODE
29495 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29496 true if the symbol may be affected by dynamic relocations.
29498 ??? All x86 object file formats are capable of representing this.
29499 After all, the relocation needed is the same as for the call insn.
29500 Whether or not a particular assembler allows us to enter such, I
29501 guess we'll have to see. */
29503 asm_preferred_eh_data_format (int code, int global)
29507 int type = DW_EH_PE_sdata8;
29509 || ix86_cmodel == CM_SMALL_PIC
29510 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29511 type = DW_EH_PE_sdata4;
29512 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29514 if (ix86_cmodel == CM_SMALL
29515 || (ix86_cmodel == CM_MEDIUM && code))
29516 return DW_EH_PE_udata4;
29517 return DW_EH_PE_absptr;
29520 /* Expand copysign from SIGN to the positive value ABS_VALUE
29521 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29524 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29526 enum machine_mode mode = GET_MODE (sign);
29527 rtx sgn = gen_reg_rtx (mode);
29528 if (mask == NULL_RTX)
29530 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29531 if (!VECTOR_MODE_P (mode))
29533 /* We need to generate a scalar mode mask in this case. */
29534 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29535 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29536 mask = gen_reg_rtx (mode);
29537 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29541 mask = gen_rtx_NOT (mode, mask);
29542 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29543 gen_rtx_AND (mode, mask, sign)));
29544 emit_insn (gen_rtx_SET (VOIDmode, result,
29545 gen_rtx_IOR (mode, abs_value, sgn)));
29548 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29549 mask for masking out the sign-bit is stored in *SMASK, if that is
29552 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29554 enum machine_mode mode = GET_MODE (op0);
29557 xa = gen_reg_rtx (mode);
29558 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29559 if (!VECTOR_MODE_P (mode))
29561 /* We need to generate a scalar mode mask in this case. */
29562 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29563 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29564 mask = gen_reg_rtx (mode);
29565 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29567 emit_insn (gen_rtx_SET (VOIDmode, xa,
29568 gen_rtx_AND (mode, op0, mask)));
29576 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29577 swapping the operands if SWAP_OPERANDS is true. The expanded
29578 code is a forward jump to a newly created label in case the
29579 comparison is true. The generated label rtx is returned. */
29581 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29582 bool swap_operands)
29593 label = gen_label_rtx ();
29594 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29595 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29596 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29597 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29598 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29599 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29600 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29601 JUMP_LABEL (tmp) = label;
29606 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29607 using comparison code CODE. Operands are swapped for the comparison if
29608 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29610 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29611 bool swap_operands)
29613 enum machine_mode mode = GET_MODE (op0);
29614 rtx mask = gen_reg_rtx (mode);
29623 if (mode == DFmode)
29624 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29625 gen_rtx_fmt_ee (code, mode, op0, op1)));
29627 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29628 gen_rtx_fmt_ee (code, mode, op0, op1)));
29633 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29634 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29636 ix86_gen_TWO52 (enum machine_mode mode)
29638 REAL_VALUE_TYPE TWO52r;
29641 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29642 TWO52 = const_double_from_real_value (TWO52r, mode);
29643 TWO52 = force_reg (mode, TWO52);
29648 /* Expand SSE sequence for computing lround from OP1 storing
29651 ix86_expand_lround (rtx op0, rtx op1)
29653 /* C code for the stuff we're doing below:
29654 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29657 enum machine_mode mode = GET_MODE (op1);
29658 const struct real_format *fmt;
29659 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29662 /* load nextafter (0.5, 0.0) */
29663 fmt = REAL_MODE_FORMAT (mode);
29664 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29665 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29667 /* adj = copysign (0.5, op1) */
29668 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29669 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29671 /* adj = op1 + adj */
29672 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29674 /* op0 = (imode)adj */
29675 expand_fix (op0, adj, 0);
29678 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29681 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29683 /* C code for the stuff we're doing below (for do_floor):
29685 xi -= (double)xi > op1 ? 1 : 0;
29688 enum machine_mode fmode = GET_MODE (op1);
29689 enum machine_mode imode = GET_MODE (op0);
29690 rtx ireg, freg, label, tmp;
29692 /* reg = (long)op1 */
29693 ireg = gen_reg_rtx (imode);
29694 expand_fix (ireg, op1, 0);
29696 /* freg = (double)reg */
29697 freg = gen_reg_rtx (fmode);
29698 expand_float (freg, ireg, 0);
29700 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29701 label = ix86_expand_sse_compare_and_jump (UNLE,
29702 freg, op1, !do_floor);
29703 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29704 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29705 emit_move_insn (ireg, tmp);
29707 emit_label (label);
29708 LABEL_NUSES (label) = 1;
29710 emit_move_insn (op0, ireg);
29713 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29714 result in OPERAND0. */
29716 ix86_expand_rint (rtx operand0, rtx operand1)
29718 /* C code for the stuff we're doing below:
29719 xa = fabs (operand1);
29720 if (!isless (xa, 2**52))
29722 xa = xa + 2**52 - 2**52;
29723 return copysign (xa, operand1);
29725 enum machine_mode mode = GET_MODE (operand0);
29726 rtx res, xa, label, TWO52, mask;
29728 res = gen_reg_rtx (mode);
29729 emit_move_insn (res, operand1);
29731 /* xa = abs (operand1) */
29732 xa = ix86_expand_sse_fabs (res, &mask);
29734 /* if (!isless (xa, TWO52)) goto label; */
29735 TWO52 = ix86_gen_TWO52 (mode);
29736 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29738 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29739 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29741 ix86_sse_copysign_to_positive (res, xa, res, mask);
29743 emit_label (label);
29744 LABEL_NUSES (label) = 1;
29746 emit_move_insn (operand0, res);
29749 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29752 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29754 /* C code for the stuff we expand below.
29755 double xa = fabs (x), x2;
29756 if (!isless (xa, TWO52))
29758 xa = xa + TWO52 - TWO52;
29759 x2 = copysign (xa, x);
29768 enum machine_mode mode = GET_MODE (operand0);
29769 rtx xa, TWO52, tmp, label, one, res, mask;
29771 TWO52 = ix86_gen_TWO52 (mode);
29773 /* Temporary for holding the result, initialized to the input
29774 operand to ease control flow. */
29775 res = gen_reg_rtx (mode);
29776 emit_move_insn (res, operand1);
29778 /* xa = abs (operand1) */
29779 xa = ix86_expand_sse_fabs (res, &mask);
29781 /* if (!isless (xa, TWO52)) goto label; */
29782 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29784 /* xa = xa + TWO52 - TWO52; */
29785 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29786 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29788 /* xa = copysign (xa, operand1) */
29789 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29791 /* generate 1.0 or -1.0 */
29792 one = force_reg (mode,
29793 const_double_from_real_value (do_floor
29794 ? dconst1 : dconstm1, mode));
29796 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29797 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29798 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29799 gen_rtx_AND (mode, one, tmp)));
29800 /* We always need to subtract here to preserve signed zero. */
29801 tmp = expand_simple_binop (mode, MINUS,
29802 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29803 emit_move_insn (res, tmp);
29805 emit_label (label);
29806 LABEL_NUSES (label) = 1;
29808 emit_move_insn (operand0, res);
29811 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29814 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29816 /* C code for the stuff we expand below.
29817 double xa = fabs (x), x2;
29818 if (!isless (xa, TWO52))
29820 x2 = (double)(long)x;
29827 if (HONOR_SIGNED_ZEROS (mode))
29828 return copysign (x2, x);
29831 enum machine_mode mode = GET_MODE (operand0);
29832 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29834 TWO52 = ix86_gen_TWO52 (mode);
29836 /* Temporary for holding the result, initialized to the input
29837 operand to ease control flow. */
29838 res = gen_reg_rtx (mode);
29839 emit_move_insn (res, operand1);
29841 /* xa = abs (operand1) */
29842 xa = ix86_expand_sse_fabs (res, &mask);
29844 /* if (!isless (xa, TWO52)) goto label; */
29845 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29847 /* xa = (double)(long)x */
29848 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29849 expand_fix (xi, res, 0);
29850 expand_float (xa, xi, 0);
29853 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29855 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29856 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29857 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29858 gen_rtx_AND (mode, one, tmp)));
29859 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29860 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29861 emit_move_insn (res, tmp);
29863 if (HONOR_SIGNED_ZEROS (mode))
29864 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29866 emit_label (label);
29867 LABEL_NUSES (label) = 1;
29869 emit_move_insn (operand0, res);
29872 /* Expand SSE sequence for computing round from OPERAND1 storing
29873 into OPERAND0. Sequence that works without relying on DImode truncation
29874 via cvttsd2siq that is only available on 64bit targets. */
29876 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29878 /* C code for the stuff we expand below.
29879 double xa = fabs (x), xa2, x2;
29880 if (!isless (xa, TWO52))
29882 Using the absolute value and copying back sign makes
29883 -0.0 -> -0.0 correct.
29884 xa2 = xa + TWO52 - TWO52;
29889 else if (dxa > 0.5)
29891 x2 = copysign (xa2, x);
29894 enum machine_mode mode = GET_MODE (operand0);
29895 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29897 TWO52 = ix86_gen_TWO52 (mode);
29899 /* Temporary for holding the result, initialized to the input
29900 operand to ease control flow. */
29901 res = gen_reg_rtx (mode);
29902 emit_move_insn (res, operand1);
29904 /* xa = abs (operand1) */
29905 xa = ix86_expand_sse_fabs (res, &mask);
29907 /* if (!isless (xa, TWO52)) goto label; */
29908 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29910 /* xa2 = xa + TWO52 - TWO52; */
29911 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29912 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29914 /* dxa = xa2 - xa; */
29915 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29917 /* generate 0.5, 1.0 and -0.5 */
29918 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29919 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29920 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29924 tmp = gen_reg_rtx (mode);
29925 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29926 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29927 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29928 gen_rtx_AND (mode, one, tmp)));
29929 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29930 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29931 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29932 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29933 gen_rtx_AND (mode, one, tmp)));
29934 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29936 /* res = copysign (xa2, operand1) */
29937 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29939 emit_label (label);
29940 LABEL_NUSES (label) = 1;
29942 emit_move_insn (operand0, res);
29945 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29948 ix86_expand_trunc (rtx operand0, rtx operand1)
29950 /* C code for SSE variant we expand below.
29951 double xa = fabs (x), x2;
29952 if (!isless (xa, TWO52))
29954 x2 = (double)(long)x;
29955 if (HONOR_SIGNED_ZEROS (mode))
29956 return copysign (x2, x);
29959 enum machine_mode mode = GET_MODE (operand0);
29960 rtx xa, xi, TWO52, label, res, mask;
29962 TWO52 = ix86_gen_TWO52 (mode);
29964 /* Temporary for holding the result, initialized to the input
29965 operand to ease control flow. */
29966 res = gen_reg_rtx (mode);
29967 emit_move_insn (res, operand1);
29969 /* xa = abs (operand1) */
29970 xa = ix86_expand_sse_fabs (res, &mask);
29972 /* if (!isless (xa, TWO52)) goto label; */
29973 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29975 /* x = (double)(long)x */
29976 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29977 expand_fix (xi, res, 0);
29978 expand_float (res, xi, 0);
29980 if (HONOR_SIGNED_ZEROS (mode))
29981 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29983 emit_label (label);
29984 LABEL_NUSES (label) = 1;
29986 emit_move_insn (operand0, res);
29989 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29992 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29994 enum machine_mode mode = GET_MODE (operand0);
29995 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29997 /* C code for SSE variant we expand below.
29998 double xa = fabs (x), x2;
29999 if (!isless (xa, TWO52))
30001 xa2 = xa + TWO52 - TWO52;
30005 x2 = copysign (xa2, x);
30009 TWO52 = ix86_gen_TWO52 (mode);
30011 /* Temporary for holding the result, initialized to the input
30012 operand to ease control flow. */
30013 res = gen_reg_rtx (mode);
30014 emit_move_insn (res, operand1);
30016 /* xa = abs (operand1) */
30017 xa = ix86_expand_sse_fabs (res, &smask);
30019 /* if (!isless (xa, TWO52)) goto label; */
30020 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30022 /* res = xa + TWO52 - TWO52; */
30023 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30024 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30025 emit_move_insn (res, tmp);
30028 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30030 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30031 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30032 emit_insn (gen_rtx_SET (VOIDmode, mask,
30033 gen_rtx_AND (mode, mask, one)));
30034 tmp = expand_simple_binop (mode, MINUS,
30035 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30036 emit_move_insn (res, tmp);
30038 /* res = copysign (res, operand1) */
30039 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30041 emit_label (label);
30042 LABEL_NUSES (label) = 1;
30044 emit_move_insn (operand0, res);
30047 /* Expand SSE sequence for computing round from OPERAND1 storing
30050 ix86_expand_round (rtx operand0, rtx operand1)
30052 /* C code for the stuff we're doing below:
30053 double xa = fabs (x);
30054 if (!isless (xa, TWO52))
30056 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30057 return copysign (xa, x);
30059 enum machine_mode mode = GET_MODE (operand0);
30060 rtx res, TWO52, xa, label, xi, half, mask;
30061 const struct real_format *fmt;
30062 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30064 /* Temporary for holding the result, initialized to the input
30065 operand to ease control flow. */
30066 res = gen_reg_rtx (mode);
30067 emit_move_insn (res, operand1);
30069 TWO52 = ix86_gen_TWO52 (mode);
30070 xa = ix86_expand_sse_fabs (res, &mask);
30071 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30073 /* load nextafter (0.5, 0.0) */
30074 fmt = REAL_MODE_FORMAT (mode);
30075 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30076 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30078 /* xa = xa + 0.5 */
30079 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30080 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30082 /* xa = (double)(int64_t)xa */
30083 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30084 expand_fix (xi, xa, 0);
30085 expand_float (xa, xi, 0);
30087 /* res = copysign (xa, operand1) */
30088 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30090 emit_label (label);
30091 LABEL_NUSES (label) = 1;
30093 emit_move_insn (operand0, res);
30097 /* Validate whether a SSE5 instruction is valid or not.
30098 OPERANDS is the array of operands.
30099 NUM is the number of operands.
30100 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
30101 NUM_MEMORY is the maximum number of memory operands to accept.
30102 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
30105 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
30106 bool uses_oc0, int num_memory, bool commutative)
30112 /* Count the number of memory arguments */
30115 for (i = 0; i < num; i++)
30117 enum machine_mode mode = GET_MODE (operands[i]);
30118 if (register_operand (operands[i], mode))
30121 else if (memory_operand (operands[i], mode))
30123 mem_mask |= (1 << i);
30129 rtx pattern = PATTERN (insn);
30131 /* allow 0 for pcmov */
30132 if (GET_CODE (pattern) != SET
30133 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
30135 || operands[i] != CONST0_RTX (mode))
30140 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
30141 a memory operation. */
30142 if (num_memory < 0)
30144 num_memory = -num_memory;
30145 if ((mem_mask & (1 << (num-1))) != 0)
30147 mem_mask &= ~(1 << (num-1));
30152 /* If there were no memory operations, allow the insn */
30156 /* Do not allow the destination register to be a memory operand. */
30157 else if (mem_mask & (1 << 0))
30160 /* If there are too many memory operations, disallow the instruction. While
30161 the hardware only allows 1 memory reference, before register allocation
30162 for some insns, we allow two memory operations sometimes in order to allow
30163 code like the following to be optimized:
30165 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
30167 or similar cases that are vectorized into using the fmaddss
30169 else if (mem_count > num_memory)
30172 /* Don't allow more than one memory operation if not optimizing. */
30173 else if (mem_count > 1 && !optimize)
30176 else if (num == 4 && mem_count == 1)
30178 /* formats (destination is the first argument), example fmaddss:
30179 xmm1, xmm1, xmm2, xmm3/mem
30180 xmm1, xmm1, xmm2/mem, xmm3
30181 xmm1, xmm2, xmm3/mem, xmm1
30182 xmm1, xmm2/mem, xmm3, xmm1 */
30184 return ((mem_mask == (1 << 1))
30185 || (mem_mask == (1 << 2))
30186 || (mem_mask == (1 << 3)));
30188 /* format, example pmacsdd:
30189 xmm1, xmm2, xmm3/mem, xmm1 */
30191 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
30193 return (mem_mask == (1 << 2));
30196 else if (num == 4 && num_memory == 2)
30198 /* If there are two memory operations, we can load one of the memory ops
30199 into the destination register. This is for optimizing the
30200 multiply/add ops, which the combiner has optimized both the multiply
30201 and the add insns to have a memory operation. We have to be careful
30202 that the destination doesn't overlap with the inputs. */
30203 rtx op0 = operands[0];
30205 if (reg_mentioned_p (op0, operands[1])
30206 || reg_mentioned_p (op0, operands[2])
30207 || reg_mentioned_p (op0, operands[3]))
30210 /* formats (destination is the first argument), example fmaddss:
30211 xmm1, xmm1, xmm2, xmm3/mem
30212 xmm1, xmm1, xmm2/mem, xmm3
30213 xmm1, xmm2, xmm3/mem, xmm1
30214 xmm1, xmm2/mem, xmm3, xmm1
30216 For the oc0 case, we will load either operands[1] or operands[3] into
30217 operands[0], so any combination of 2 memory operands is ok. */
30221 /* format, example pmacsdd:
30222 xmm1, xmm2, xmm3/mem, xmm1
30224 For the integer multiply/add instructions be more restrictive and
30225 require operands[2] and operands[3] to be the memory operands. */
30227 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
30229 return (mem_mask == ((1 << 2) | (1 << 3)));
30232 else if (num == 3 && num_memory == 1)
30234 /* formats, example protb:
30235 xmm1, xmm2, xmm3/mem
30236 xmm1, xmm2/mem, xmm3 */
30238 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
30240 /* format, example comeq:
30241 xmm1, xmm2, xmm3/mem */
30243 return (mem_mask == (1 << 2));
30247 gcc_unreachable ();
30253 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
30254 hardware will allow by using the destination register to load one of the
30255 memory operations. Presently this is used by the multiply/add routines to
30256 allow 2 memory references. */
30259 ix86_expand_sse5_multiple_memory (rtx operands[],
30261 enum machine_mode mode)
30263 rtx op0 = operands[0];
30265 || memory_operand (op0, mode)
30266 || reg_mentioned_p (op0, operands[1])
30267 || reg_mentioned_p (op0, operands[2])
30268 || reg_mentioned_p (op0, operands[3]))
30269 gcc_unreachable ();
30271 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
30272 the destination register. */
30273 if (memory_operand (operands[1], mode))
30275 emit_move_insn (op0, operands[1]);
30278 else if (memory_operand (operands[3], mode))
30280 emit_move_insn (op0, operands[3]);
30284 gcc_unreachable ();
30290 /* Table of valid machine attributes. */
30291 static const struct attribute_spec ix86_attribute_table[] =
30293 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30294 /* Stdcall attribute says callee is responsible for popping arguments
30295 if they are not variable. */
30296 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30297 /* Fastcall attribute says callee is responsible for popping arguments
30298 if they are not variable. */
30299 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30300 /* Cdecl attribute says the callee is a normal C declaration */
30301 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30302 /* Regparm attribute specifies how many integer arguments are to be
30303 passed in registers. */
30304 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30305 /* Sseregparm attribute says we are using x86_64 calling conventions
30306 for FP arguments. */
30307 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30308 /* force_align_arg_pointer says this function realigns the stack at entry. */
30309 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30310 false, true, true, ix86_handle_cconv_attribute },
30311 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30312 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30313 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30314 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30316 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30317 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30318 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30319 SUBTARGET_ATTRIBUTE_TABLE,
30321 /* ms_abi and sysv_abi calling convention function attributes. */
30322 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30323 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30325 { NULL, 0, 0, false, false, false, NULL }
30328 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30330 x86_builtin_vectorization_cost (bool runtime_test)
30332 /* If the branch of the runtime test is taken - i.e. - the vectorized
30333 version is skipped - this incurs a misprediction cost (because the
30334 vectorized version is expected to be the fall-through). So we subtract
30335 the latency of a mispredicted branch from the costs that are incured
30336 when the vectorized version is executed.
30338 TODO: The values in individual target tables have to be tuned or new
30339 fields may be needed. For eg. on K8, the default branch path is the
30340 not-taken path. If the taken path is predicted correctly, the minimum
30341 penalty of going down the taken-path is 1 cycle. If the taken-path is
30342 not predicted correctly, then the minimum penalty is 10 cycles. */
30346 return (-(ix86_cost->cond_taken_branch_cost));
30352 /* This function returns the calling abi specific va_list type node.
30353 It returns the FNDECL specific va_list type. */
30356 ix86_fn_abi_va_list (tree fndecl)
30359 return va_list_type_node;
30360 gcc_assert (fndecl != NULL_TREE);
30362 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30363 return ms_va_list_type_node;
30365 return sysv_va_list_type_node;
30368 /* Returns the canonical va_list type specified by TYPE. If there
30369 is no valid TYPE provided, it return NULL_TREE. */
30372 ix86_canonical_va_list_type (tree type)
30376 /* Resolve references and pointers to va_list type. */
30377 if (INDIRECT_REF_P (type))
30378 type = TREE_TYPE (type);
30379 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30380 type = TREE_TYPE (type);
30384 wtype = va_list_type_node;
30385 gcc_assert (wtype != NULL_TREE);
30387 if (TREE_CODE (wtype) == ARRAY_TYPE)
30389 /* If va_list is an array type, the argument may have decayed
30390 to a pointer type, e.g. by being passed to another function.
30391 In that case, unwrap both types so that we can compare the
30392 underlying records. */
30393 if (TREE_CODE (htype) == ARRAY_TYPE
30394 || POINTER_TYPE_P (htype))
30396 wtype = TREE_TYPE (wtype);
30397 htype = TREE_TYPE (htype);
30400 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30401 return va_list_type_node;
30402 wtype = sysv_va_list_type_node;
30403 gcc_assert (wtype != NULL_TREE);
30405 if (TREE_CODE (wtype) == ARRAY_TYPE)
30407 /* If va_list is an array type, the argument may have decayed
30408 to a pointer type, e.g. by being passed to another function.
30409 In that case, unwrap both types so that we can compare the
30410 underlying records. */
30411 if (TREE_CODE (htype) == ARRAY_TYPE
30412 || POINTER_TYPE_P (htype))
30414 wtype = TREE_TYPE (wtype);
30415 htype = TREE_TYPE (htype);
30418 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30419 return sysv_va_list_type_node;
30420 wtype = ms_va_list_type_node;
30421 gcc_assert (wtype != NULL_TREE);
30423 if (TREE_CODE (wtype) == ARRAY_TYPE)
30425 /* If va_list is an array type, the argument may have decayed
30426 to a pointer type, e.g. by being passed to another function.
30427 In that case, unwrap both types so that we can compare the
30428 underlying records. */
30429 if (TREE_CODE (htype) == ARRAY_TYPE
30430 || POINTER_TYPE_P (htype))
30432 wtype = TREE_TYPE (wtype);
30433 htype = TREE_TYPE (htype);
30436 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30437 return ms_va_list_type_node;
30440 return std_canonical_va_list_type (type);
30443 /* Iterate through the target-specific builtin types for va_list.
30444 IDX denotes the iterator, *PTREE is set to the result type of
30445 the va_list builtin, and *PNAME to its internal type.
30446 Returns zero if there is no element for this index, otherwise
30447 IDX should be increased upon the next call.
30448 Note, do not iterate a base builtin's name like __builtin_va_list.
30449 Used from c_common_nodes_and_builtins. */
30452 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30458 *ptree = ms_va_list_type_node;
30459 *pname = "__builtin_ms_va_list";
30462 *ptree = sysv_va_list_type_node;
30463 *pname = "__builtin_sysv_va_list";
30471 /* Initialize the GCC target structure. */
30472 #undef TARGET_RETURN_IN_MEMORY
30473 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30475 #undef TARGET_LEGITIMIZE_ADDRESS
30476 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30478 #undef TARGET_ATTRIBUTE_TABLE
30479 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30480 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30481 # undef TARGET_MERGE_DECL_ATTRIBUTES
30482 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30485 #undef TARGET_COMP_TYPE_ATTRIBUTES
30486 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30488 #undef TARGET_INIT_BUILTINS
30489 #define TARGET_INIT_BUILTINS ix86_init_builtins
30490 #undef TARGET_EXPAND_BUILTIN
30491 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30493 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30494 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30495 ix86_builtin_vectorized_function
30497 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30498 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30500 #undef TARGET_BUILTIN_RECIPROCAL
30501 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30503 #undef TARGET_ASM_FUNCTION_EPILOGUE
30504 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30506 #undef TARGET_ENCODE_SECTION_INFO
30507 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30508 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30510 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30513 #undef TARGET_ASM_OPEN_PAREN
30514 #define TARGET_ASM_OPEN_PAREN ""
30515 #undef TARGET_ASM_CLOSE_PAREN
30516 #define TARGET_ASM_CLOSE_PAREN ""
30518 #undef TARGET_ASM_ALIGNED_HI_OP
30519 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30520 #undef TARGET_ASM_ALIGNED_SI_OP
30521 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30523 #undef TARGET_ASM_ALIGNED_DI_OP
30524 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30527 #undef TARGET_ASM_UNALIGNED_HI_OP
30528 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30529 #undef TARGET_ASM_UNALIGNED_SI_OP
30530 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30531 #undef TARGET_ASM_UNALIGNED_DI_OP
30532 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30534 #undef TARGET_SCHED_ADJUST_COST
30535 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30536 #undef TARGET_SCHED_ISSUE_RATE
30537 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30538 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30539 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30540 ia32_multipass_dfa_lookahead
30542 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30543 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30546 #undef TARGET_HAVE_TLS
30547 #define TARGET_HAVE_TLS true
30549 #undef TARGET_CANNOT_FORCE_CONST_MEM
30550 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30551 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30552 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30554 #undef TARGET_DELEGITIMIZE_ADDRESS
30555 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30557 #undef TARGET_MS_BITFIELD_LAYOUT_P
30558 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30561 #undef TARGET_BINDS_LOCAL_P
30562 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30564 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30565 #undef TARGET_BINDS_LOCAL_P
30566 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30569 #undef TARGET_ASM_OUTPUT_MI_THUNK
30570 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30571 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30572 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30574 #undef TARGET_ASM_FILE_START
30575 #define TARGET_ASM_FILE_START x86_file_start
30577 #undef TARGET_DEFAULT_TARGET_FLAGS
30578 #define TARGET_DEFAULT_TARGET_FLAGS \
30580 | TARGET_SUBTARGET_DEFAULT \
30581 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30583 #undef TARGET_HANDLE_OPTION
30584 #define TARGET_HANDLE_OPTION ix86_handle_option
30586 #undef TARGET_RTX_COSTS
30587 #define TARGET_RTX_COSTS ix86_rtx_costs
30588 #undef TARGET_ADDRESS_COST
30589 #define TARGET_ADDRESS_COST ix86_address_cost
30591 #undef TARGET_FIXED_CONDITION_CODE_REGS
30592 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30593 #undef TARGET_CC_MODES_COMPATIBLE
30594 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30596 #undef TARGET_MACHINE_DEPENDENT_REORG
30597 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30599 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30600 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30602 #undef TARGET_BUILD_BUILTIN_VA_LIST
30603 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30605 #undef TARGET_FN_ABI_VA_LIST
30606 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30608 #undef TARGET_CANONICAL_VA_LIST_TYPE
30609 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30611 #undef TARGET_EXPAND_BUILTIN_VA_START
30612 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30614 #undef TARGET_MD_ASM_CLOBBERS
30615 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30617 #undef TARGET_PROMOTE_PROTOTYPES
30618 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30619 #undef TARGET_STRUCT_VALUE_RTX
30620 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30621 #undef TARGET_SETUP_INCOMING_VARARGS
30622 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30623 #undef TARGET_MUST_PASS_IN_STACK
30624 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30625 #undef TARGET_PASS_BY_REFERENCE
30626 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30627 #undef TARGET_INTERNAL_ARG_POINTER
30628 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30629 #undef TARGET_UPDATE_STACK_BOUNDARY
30630 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30631 #undef TARGET_GET_DRAP_RTX
30632 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30633 #undef TARGET_STRICT_ARGUMENT_NAMING
30634 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30636 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30637 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30639 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30640 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30642 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30643 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30645 #undef TARGET_C_MODE_FOR_SUFFIX
30646 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30649 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30650 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30653 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30654 #undef TARGET_INSERT_ATTRIBUTES
30655 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30658 #undef TARGET_MANGLE_TYPE
30659 #define TARGET_MANGLE_TYPE ix86_mangle_type
30661 #undef TARGET_STACK_PROTECT_FAIL
30662 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30664 #undef TARGET_FUNCTION_VALUE
30665 #define TARGET_FUNCTION_VALUE ix86_function_value
30667 #undef TARGET_SECONDARY_RELOAD
30668 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30670 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30671 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30673 #undef TARGET_SET_CURRENT_FUNCTION
30674 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30676 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30677 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30679 #undef TARGET_OPTION_SAVE
30680 #define TARGET_OPTION_SAVE ix86_function_specific_save
30682 #undef TARGET_OPTION_RESTORE
30683 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30685 #undef TARGET_OPTION_PRINT
30686 #define TARGET_OPTION_PRINT ix86_function_specific_print
30688 #undef TARGET_OPTION_CAN_INLINE_P
30689 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30691 #undef TARGET_EXPAND_TO_RTL_HOOK
30692 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30694 #undef TARGET_LEGITIMATE_ADDRESS_P
30695 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30697 struct gcc_target targetm = TARGET_INITIALIZER;
30699 #include "gt-i386.h"