1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1761 HOST_WIDE_INT frame;
1763 int outgoing_arguments_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1974 /* Define a set of ISAs which aren't available when a given ISA is
1975 disabled. MMX and SSE ISAs are handled separately. */
1977 #define OPTION_MASK_ISA_MMX_UNSET \
1978 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1979 #define OPTION_MASK_ISA_3DNOW_UNSET \
1980 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1981 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1983 #define OPTION_MASK_ISA_SSE_UNSET \
1984 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1985 #define OPTION_MASK_ISA_SSE2_UNSET \
1986 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1987 #define OPTION_MASK_ISA_SSE3_UNSET \
1988 (OPTION_MASK_ISA_SSE3 \
1989 | OPTION_MASK_ISA_SSSE3_UNSET \
1990 | OPTION_MASK_ISA_SSE4A_UNSET )
1991 #define OPTION_MASK_ISA_SSSE3_UNSET \
1992 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1993 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1994 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1995 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1996 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1997 #define OPTION_MASK_ISA_AVX_UNSET \
1998 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1999 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2001 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2003 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2005 #define OPTION_MASK_ISA_SSE4A_UNSET \
2006 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2007 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2008 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2009 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2010 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2011 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2012 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2013 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2014 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2016 /* Vectorization library interface and handlers. */
2017 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2018 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2019 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2021 /* Processor target table, indexed by processor number */
2024 const struct processor_costs *cost; /* Processor costs */
2025 const int align_loop; /* Default alignments. */
2026 const int align_loop_max_skip;
2027 const int align_jump;
2028 const int align_jump_max_skip;
2029 const int align_func;
2032 static const struct ptt processor_target_table[PROCESSOR_max] =
2034 {&i386_cost, 4, 3, 4, 3, 4},
2035 {&i486_cost, 16, 15, 16, 15, 16},
2036 {&pentium_cost, 16, 7, 16, 7, 16},
2037 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2038 {&geode_cost, 0, 0, 0, 0, 0},
2039 {&k6_cost, 32, 7, 32, 7, 32},
2040 {&athlon_cost, 16, 7, 16, 7, 16},
2041 {&pentium4_cost, 0, 0, 0, 0, 0},
2042 {&k8_cost, 16, 7, 16, 7, 16},
2043 {&nocona_cost, 0, 0, 0, 0, 0},
2044 {&core2_cost, 16, 10, 16, 10, 16},
2045 {&generic32_cost, 16, 7, 16, 7, 16},
2046 {&generic64_cost, 16, 10, 16, 10, 16},
2047 {&amdfam10_cost, 32, 24, 32, 7, 32},
2048 {&atom_cost, 16, 7, 16, 7, 16}
2051 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2077 /* Implement TARGET_HANDLE_OPTION. */
2080 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2087 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2088 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2092 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2093 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2100 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2101 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2105 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2106 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2116 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2117 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2121 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2122 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2129 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2134 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2135 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2142 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2143 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2147 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2148 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2155 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2156 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2160 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2161 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2168 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2169 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2173 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2174 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2181 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2182 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2186 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2187 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2194 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2195 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2199 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2200 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2207 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2208 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2212 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2213 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2218 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2219 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2223 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2224 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2230 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2231 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2235 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2236 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2243 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2244 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2248 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2249 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2256 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2257 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2261 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2262 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2269 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2270 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2274 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2275 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2282 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2283 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2287 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2288 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2295 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2296 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2300 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2301 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2308 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2309 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2313 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2314 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2321 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2322 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2326 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2327 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2334 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2335 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2339 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2340 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2349 /* Return a string the documents the current -m options. The caller is
2350 responsible for freeing the string. */
2353 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2354 const char *fpmath, bool add_nl_p)
2356 struct ix86_target_opts
2358 const char *option; /* option string */
2359 int mask; /* isa mask options */
2362 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2363 preceding options while match those first. */
2364 static struct ix86_target_opts isa_opts[] =
2366 { "-m64", OPTION_MASK_ISA_64BIT },
2367 { "-msse5", OPTION_MASK_ISA_SSE5 },
2368 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2369 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2370 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2371 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2372 { "-msse3", OPTION_MASK_ISA_SSE3 },
2373 { "-msse2", OPTION_MASK_ISA_SSE2 },
2374 { "-msse", OPTION_MASK_ISA_SSE },
2375 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2376 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2377 { "-mmmx", OPTION_MASK_ISA_MMX },
2378 { "-mabm", OPTION_MASK_ISA_ABM },
2379 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2380 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2381 { "-maes", OPTION_MASK_ISA_AES },
2382 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2386 static struct ix86_target_opts flag_opts[] =
2388 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2389 { "-m80387", MASK_80387 },
2390 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2391 { "-malign-double", MASK_ALIGN_DOUBLE },
2392 { "-mcld", MASK_CLD },
2393 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2394 { "-mieee-fp", MASK_IEEE_FP },
2395 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2396 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2397 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2398 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2399 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2400 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2401 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2402 { "-mno-red-zone", MASK_NO_RED_ZONE },
2403 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2404 { "-mrecip", MASK_RECIP },
2405 { "-mrtd", MASK_RTD },
2406 { "-msseregparm", MASK_SSEREGPARM },
2407 { "-mstack-arg-probe", MASK_STACK_PROBE },
2408 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2411 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2414 char target_other[40];
2423 memset (opts, '\0', sizeof (opts));
2425 /* Add -march= option. */
2428 opts[num][0] = "-march=";
2429 opts[num++][1] = arch;
2432 /* Add -mtune= option. */
2435 opts[num][0] = "-mtune=";
2436 opts[num++][1] = tune;
2439 /* Pick out the options in isa options. */
2440 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2442 if ((isa & isa_opts[i].mask) != 0)
2444 opts[num++][0] = isa_opts[i].option;
2445 isa &= ~ isa_opts[i].mask;
2449 if (isa && add_nl_p)
2451 opts[num++][0] = isa_other;
2452 sprintf (isa_other, "(other isa: 0x%x)", isa);
2455 /* Add flag options. */
2456 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2458 if ((flags & flag_opts[i].mask) != 0)
2460 opts[num++][0] = flag_opts[i].option;
2461 flags &= ~ flag_opts[i].mask;
2465 if (flags && add_nl_p)
2467 opts[num++][0] = target_other;
2468 sprintf (target_other, "(other flags: 0x%x)", isa);
2471 /* Add -fpmath= option. */
2474 opts[num][0] = "-mfpmath=";
2475 opts[num++][1] = fpmath;
2482 gcc_assert (num < ARRAY_SIZE (opts));
2484 /* Size the string. */
2486 sep_len = (add_nl_p) ? 3 : 1;
2487 for (i = 0; i < num; i++)
2490 for (j = 0; j < 2; j++)
2492 len += strlen (opts[i][j]);
2495 /* Build the string. */
2496 ret = ptr = (char *) xmalloc (len);
2499 for (i = 0; i < num; i++)
2503 for (j = 0; j < 2; j++)
2504 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2511 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2519 for (j = 0; j < 2; j++)
2522 memcpy (ptr, opts[i][j], len2[j]);
2524 line_len += len2[j];
2529 gcc_assert (ret + len >= ptr);
2534 /* Function that is callable from the debugger to print the current
2537 ix86_debug_options (void)
2539 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2540 ix86_arch_string, ix86_tune_string,
2541 ix86_fpmath_string, true);
2545 fprintf (stderr, "%s\n\n", opts);
2549 fprintf (stderr, "<no options>\n\n");
2554 /* Sometimes certain combinations of command options do not make
2555 sense on a particular target machine. You can define a macro
2556 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2557 defined, is executed once just after all the command options have
2560 Don't use this macro to turn on various extra optimizations for
2561 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2564 override_options (bool main_args_p)
2567 unsigned int ix86_arch_mask, ix86_tune_mask;
2572 /* Comes from final.c -- no real reason to change it. */
2573 #define MAX_CODE_ALIGN 16
2581 PTA_PREFETCH_SSE = 1 << 4,
2583 PTA_3DNOW_A = 1 << 6,
2587 PTA_POPCNT = 1 << 10,
2589 PTA_SSE4A = 1 << 12,
2590 PTA_NO_SAHF = 1 << 13,
2591 PTA_SSE4_1 = 1 << 14,
2592 PTA_SSE4_2 = 1 << 15,
2595 PTA_PCLMUL = 1 << 18,
2603 const char *const name; /* processor name or nickname. */
2604 const enum processor_type processor;
2605 const enum attr_cpu schedule;
2606 const unsigned /*enum pta_flags*/ flags;
2608 const processor_alias_table[] =
2610 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2611 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2612 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2613 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2614 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2615 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2616 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2617 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2618 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2619 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2620 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2621 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2622 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2624 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2626 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2627 PTA_MMX | PTA_SSE | PTA_SSE2},
2628 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2629 PTA_MMX |PTA_SSE | PTA_SSE2},
2630 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2631 PTA_MMX | PTA_SSE | PTA_SSE2},
2632 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2633 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2634 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2635 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2636 | PTA_CX16 | PTA_NO_SAHF},
2637 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2638 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2639 | PTA_SSSE3 | PTA_CX16},
2640 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2641 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2642 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2643 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2644 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2645 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2646 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2647 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2648 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2649 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2650 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2651 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2652 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2653 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2654 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2655 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2656 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2657 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2658 {"x86-64", PROCESSOR_K8, CPU_K8,
2659 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2660 {"k8", PROCESSOR_K8, CPU_K8,
2661 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2662 | PTA_SSE2 | PTA_NO_SAHF},
2663 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2664 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2665 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2666 {"opteron", PROCESSOR_K8, CPU_K8,
2667 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2668 | PTA_SSE2 | PTA_NO_SAHF},
2669 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2670 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2671 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2672 {"athlon64", PROCESSOR_K8, CPU_K8,
2673 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2674 | PTA_SSE2 | PTA_NO_SAHF},
2675 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2676 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2677 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2678 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2679 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2680 | PTA_SSE2 | PTA_NO_SAHF},
2681 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2682 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2683 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2684 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2685 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2686 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2687 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2688 0 /* flags are only used for -march switch. */ },
2689 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2690 PTA_64BIT /* flags are only used for -march switch. */ },
2693 int const pta_size = ARRAY_SIZE (processor_alias_table);
2695 /* Set up prefix/suffix so the error messages refer to either the command
2696 line argument, or the attribute(target). */
2705 prefix = "option(\"";
2710 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2711 SUBTARGET_OVERRIDE_OPTIONS;
2714 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2715 SUBSUBTARGET_OVERRIDE_OPTIONS;
2718 /* -fPIC is the default for x86_64. */
2719 if (TARGET_MACHO && TARGET_64BIT)
2722 /* Set the default values for switches whose default depends on TARGET_64BIT
2723 in case they weren't overwritten by command line options. */
2726 /* Mach-O doesn't support omitting the frame pointer for now. */
2727 if (flag_omit_frame_pointer == 2)
2728 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2729 if (flag_asynchronous_unwind_tables == 2)
2730 flag_asynchronous_unwind_tables = 1;
2731 if (flag_pcc_struct_return == 2)
2732 flag_pcc_struct_return = 0;
2736 if (flag_omit_frame_pointer == 2)
2737 flag_omit_frame_pointer = 0;
2738 if (flag_asynchronous_unwind_tables == 2)
2739 flag_asynchronous_unwind_tables = 0;
2740 if (flag_pcc_struct_return == 2)
2741 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2744 /* Need to check -mtune=generic first. */
2745 if (ix86_tune_string)
2747 if (!strcmp (ix86_tune_string, "generic")
2748 || !strcmp (ix86_tune_string, "i686")
2749 /* As special support for cross compilers we read -mtune=native
2750 as -mtune=generic. With native compilers we won't see the
2751 -mtune=native, as it was changed by the driver. */
2752 || !strcmp (ix86_tune_string, "native"))
2755 ix86_tune_string = "generic64";
2757 ix86_tune_string = "generic32";
2759 /* If this call is for setting the option attribute, allow the
2760 generic32/generic64 that was previously set. */
2761 else if (!main_args_p
2762 && (!strcmp (ix86_tune_string, "generic32")
2763 || !strcmp (ix86_tune_string, "generic64")))
2765 else if (!strncmp (ix86_tune_string, "generic", 7))
2766 error ("bad value (%s) for %stune=%s %s",
2767 ix86_tune_string, prefix, suffix, sw);
2771 if (ix86_arch_string)
2772 ix86_tune_string = ix86_arch_string;
2773 if (!ix86_tune_string)
2775 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2776 ix86_tune_defaulted = 1;
2779 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2780 need to use a sensible tune option. */
2781 if (!strcmp (ix86_tune_string, "generic")
2782 || !strcmp (ix86_tune_string, "x86-64")
2783 || !strcmp (ix86_tune_string, "i686"))
2786 ix86_tune_string = "generic64";
2788 ix86_tune_string = "generic32";
2791 if (ix86_stringop_string)
2793 if (!strcmp (ix86_stringop_string, "rep_byte"))
2794 stringop_alg = rep_prefix_1_byte;
2795 else if (!strcmp (ix86_stringop_string, "libcall"))
2796 stringop_alg = libcall;
2797 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2798 stringop_alg = rep_prefix_4_byte;
2799 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2801 /* rep; movq isn't available in 32-bit code. */
2802 stringop_alg = rep_prefix_8_byte;
2803 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2804 stringop_alg = loop_1_byte;
2805 else if (!strcmp (ix86_stringop_string, "loop"))
2806 stringop_alg = loop;
2807 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2808 stringop_alg = unrolled_loop;
2810 error ("bad value (%s) for %sstringop-strategy=%s %s",
2811 ix86_stringop_string, prefix, suffix, sw);
2813 if (!strcmp (ix86_tune_string, "x86-64"))
2814 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2815 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2816 prefix, suffix, prefix, suffix, prefix, suffix);
2818 if (!ix86_arch_string)
2819 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2821 ix86_arch_specified = 1;
2823 if (!strcmp (ix86_arch_string, "generic"))
2824 error ("generic CPU can be used only for %stune=%s %s",
2825 prefix, suffix, sw);
2826 if (!strncmp (ix86_arch_string, "generic", 7))
2827 error ("bad value (%s) for %sarch=%s %s",
2828 ix86_arch_string, prefix, suffix, sw);
2830 /* Validate -mabi= value. */
2831 if (ix86_abi_string)
2833 if (strcmp (ix86_abi_string, "sysv") == 0)
2834 ix86_abi = SYSV_ABI;
2835 else if (strcmp (ix86_abi_string, "ms") == 0)
2838 error ("unknown ABI (%s) for %sabi=%s %s",
2839 ix86_abi_string, prefix, suffix, sw);
2842 ix86_abi = DEFAULT_ABI;
2844 if (ix86_cmodel_string != 0)
2846 if (!strcmp (ix86_cmodel_string, "small"))
2847 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2848 else if (!strcmp (ix86_cmodel_string, "medium"))
2849 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2850 else if (!strcmp (ix86_cmodel_string, "large"))
2851 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2853 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2854 else if (!strcmp (ix86_cmodel_string, "32"))
2855 ix86_cmodel = CM_32;
2856 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2857 ix86_cmodel = CM_KERNEL;
2859 error ("bad value (%s) for %scmodel=%s %s",
2860 ix86_cmodel_string, prefix, suffix, sw);
2864 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2865 use of rip-relative addressing. This eliminates fixups that
2866 would otherwise be needed if this object is to be placed in a
2867 DLL, and is essentially just as efficient as direct addressing. */
2868 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2869 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2870 else if (TARGET_64BIT)
2871 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2873 ix86_cmodel = CM_32;
2875 if (ix86_asm_string != 0)
2878 && !strcmp (ix86_asm_string, "intel"))
2879 ix86_asm_dialect = ASM_INTEL;
2880 else if (!strcmp (ix86_asm_string, "att"))
2881 ix86_asm_dialect = ASM_ATT;
2883 error ("bad value (%s) for %sasm=%s %s",
2884 ix86_asm_string, prefix, suffix, sw);
2886 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2887 error ("code model %qs not supported in the %s bit mode",
2888 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2889 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2890 sorry ("%i-bit mode not compiled in",
2891 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2893 for (i = 0; i < pta_size; i++)
2894 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2896 ix86_schedule = processor_alias_table[i].schedule;
2897 ix86_arch = processor_alias_table[i].processor;
2898 /* Default cpu tuning to the architecture. */
2899 ix86_tune = ix86_arch;
2901 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2902 error ("CPU you selected does not support x86-64 "
2905 if (processor_alias_table[i].flags & PTA_MMX
2906 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2907 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2908 if (processor_alias_table[i].flags & PTA_3DNOW
2909 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2910 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2911 if (processor_alias_table[i].flags & PTA_3DNOW_A
2912 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2913 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2914 if (processor_alias_table[i].flags & PTA_SSE
2915 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2916 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2917 if (processor_alias_table[i].flags & PTA_SSE2
2918 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2919 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2920 if (processor_alias_table[i].flags & PTA_SSE3
2921 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2922 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2923 if (processor_alias_table[i].flags & PTA_SSSE3
2924 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2925 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2926 if (processor_alias_table[i].flags & PTA_SSE4_1
2927 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2928 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2929 if (processor_alias_table[i].flags & PTA_SSE4_2
2930 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2931 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2932 if (processor_alias_table[i].flags & PTA_AVX
2933 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2934 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2935 if (processor_alias_table[i].flags & PTA_FMA
2936 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2937 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2938 if (processor_alias_table[i].flags & PTA_SSE4A
2939 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2940 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2941 if (processor_alias_table[i].flags & PTA_SSE5
2942 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2943 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2944 if (processor_alias_table[i].flags & PTA_ABM
2945 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2946 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2947 if (processor_alias_table[i].flags & PTA_CX16
2948 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2949 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2950 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2951 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2952 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2953 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2954 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2955 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2956 if (processor_alias_table[i].flags & PTA_MOVBE
2957 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
2958 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
2959 if (processor_alias_table[i].flags & PTA_AES
2960 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2961 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2962 if (processor_alias_table[i].flags & PTA_PCLMUL
2963 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2964 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2965 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2966 x86_prefetch_sse = true;
2972 error ("bad value (%s) for %sarch=%s %s",
2973 ix86_arch_string, prefix, suffix, sw);
2975 ix86_arch_mask = 1u << ix86_arch;
2976 for (i = 0; i < X86_ARCH_LAST; ++i)
2977 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2979 for (i = 0; i < pta_size; i++)
2980 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2982 ix86_schedule = processor_alias_table[i].schedule;
2983 ix86_tune = processor_alias_table[i].processor;
2984 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2986 if (ix86_tune_defaulted)
2988 ix86_tune_string = "x86-64";
2989 for (i = 0; i < pta_size; i++)
2990 if (! strcmp (ix86_tune_string,
2991 processor_alias_table[i].name))
2993 ix86_schedule = processor_alias_table[i].schedule;
2994 ix86_tune = processor_alias_table[i].processor;
2997 error ("CPU you selected does not support x86-64 "
3000 /* Intel CPUs have always interpreted SSE prefetch instructions as
3001 NOPs; so, we can enable SSE prefetch instructions even when
3002 -mtune (rather than -march) points us to a processor that has them.
3003 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3004 higher processors. */
3006 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3007 x86_prefetch_sse = true;
3011 error ("bad value (%s) for %stune=%s %s",
3012 ix86_tune_string, prefix, suffix, sw);
3014 ix86_tune_mask = 1u << ix86_tune;
3015 for (i = 0; i < X86_TUNE_LAST; ++i)
3016 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3019 ix86_cost = &ix86_size_cost;
3021 ix86_cost = processor_target_table[ix86_tune].cost;
3023 /* Arrange to set up i386_stack_locals for all functions. */
3024 init_machine_status = ix86_init_machine_status;
3026 /* Validate -mregparm= value. */
3027 if (ix86_regparm_string)
3030 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3031 i = atoi (ix86_regparm_string);
3032 if (i < 0 || i > REGPARM_MAX)
3033 error ("%sregparm=%d%s is not between 0 and %d",
3034 prefix, i, suffix, REGPARM_MAX);
3039 ix86_regparm = REGPARM_MAX;
3041 /* If the user has provided any of the -malign-* options,
3042 warn and use that value only if -falign-* is not set.
3043 Remove this code in GCC 3.2 or later. */
3044 if (ix86_align_loops_string)
3046 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3047 prefix, suffix, suffix);
3048 if (align_loops == 0)
3050 i = atoi (ix86_align_loops_string);
3051 if (i < 0 || i > MAX_CODE_ALIGN)
3052 error ("%salign-loops=%d%s is not between 0 and %d",
3053 prefix, i, suffix, MAX_CODE_ALIGN);
3055 align_loops = 1 << i;
3059 if (ix86_align_jumps_string)
3061 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3062 prefix, suffix, suffix);
3063 if (align_jumps == 0)
3065 i = atoi (ix86_align_jumps_string);
3066 if (i < 0 || i > MAX_CODE_ALIGN)
3067 error ("%salign-loops=%d%s is not between 0 and %d",
3068 prefix, i, suffix, MAX_CODE_ALIGN);
3070 align_jumps = 1 << i;
3074 if (ix86_align_funcs_string)
3076 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3077 prefix, suffix, suffix);
3078 if (align_functions == 0)
3080 i = atoi (ix86_align_funcs_string);
3081 if (i < 0 || i > MAX_CODE_ALIGN)
3082 error ("%salign-loops=%d%s is not between 0 and %d",
3083 prefix, i, suffix, MAX_CODE_ALIGN);
3085 align_functions = 1 << i;
3089 /* Default align_* from the processor table. */
3090 if (align_loops == 0)
3092 align_loops = processor_target_table[ix86_tune].align_loop;
3093 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3095 if (align_jumps == 0)
3097 align_jumps = processor_target_table[ix86_tune].align_jump;
3098 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3100 if (align_functions == 0)
3102 align_functions = processor_target_table[ix86_tune].align_func;
3105 /* Validate -mbranch-cost= value, or provide default. */
3106 ix86_branch_cost = ix86_cost->branch_cost;
3107 if (ix86_branch_cost_string)
3109 i = atoi (ix86_branch_cost_string);
3111 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3113 ix86_branch_cost = i;
3115 if (ix86_section_threshold_string)
3117 i = atoi (ix86_section_threshold_string);
3119 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3121 ix86_section_threshold = i;
3124 if (ix86_tls_dialect_string)
3126 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3127 ix86_tls_dialect = TLS_DIALECT_GNU;
3128 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3129 ix86_tls_dialect = TLS_DIALECT_GNU2;
3130 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3131 ix86_tls_dialect = TLS_DIALECT_SUN;
3133 error ("bad value (%s) for %stls-dialect=%s %s",
3134 ix86_tls_dialect_string, prefix, suffix, sw);
3137 if (ix87_precision_string)
3139 i = atoi (ix87_precision_string);
3140 if (i != 32 && i != 64 && i != 80)
3141 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3146 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3148 /* Enable by default the SSE and MMX builtins. Do allow the user to
3149 explicitly disable any of these. In particular, disabling SSE and
3150 MMX for kernel code is extremely useful. */
3151 if (!ix86_arch_specified)
3153 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3154 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3157 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3161 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3163 if (!ix86_arch_specified)
3165 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3167 /* i386 ABI does not specify red zone. It still makes sense to use it
3168 when programmer takes care to stack from being destroyed. */
3169 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3170 target_flags |= MASK_NO_RED_ZONE;
3173 /* Keep nonleaf frame pointers. */
3174 if (flag_omit_frame_pointer)
3175 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3176 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3177 flag_omit_frame_pointer = 1;
3179 /* If we're doing fast math, we don't care about comparison order
3180 wrt NaNs. This lets us use a shorter comparison sequence. */
3181 if (flag_finite_math_only)
3182 target_flags &= ~MASK_IEEE_FP;
3184 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3185 since the insns won't need emulation. */
3186 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3187 target_flags &= ~MASK_NO_FANCY_MATH_387;
3189 /* Likewise, if the target doesn't have a 387, or we've specified
3190 software floating point, don't use 387 inline intrinsics. */
3192 target_flags |= MASK_NO_FANCY_MATH_387;
3194 /* Turn on MMX builtins for -msse. */
3197 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3198 x86_prefetch_sse = true;
3201 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3202 if (TARGET_SSE4_2 || TARGET_ABM)
3203 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3205 /* Validate -mpreferred-stack-boundary= value or default it to
3206 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3207 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3208 if (ix86_preferred_stack_boundary_string)
3210 i = atoi (ix86_preferred_stack_boundary_string);
3211 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3212 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3213 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3215 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3218 /* Set the default value for -mstackrealign. */
3219 if (ix86_force_align_arg_pointer == -1)
3220 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3222 /* Validate -mincoming-stack-boundary= value or default it to
3223 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3224 if (ix86_force_align_arg_pointer)
3225 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3227 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3228 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3229 if (ix86_incoming_stack_boundary_string)
3231 i = atoi (ix86_incoming_stack_boundary_string);
3232 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3233 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3234 i, TARGET_64BIT ? 4 : 2);
3237 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3238 ix86_incoming_stack_boundary
3239 = ix86_user_incoming_stack_boundary;
3243 /* Accept -msseregparm only if at least SSE support is enabled. */
3244 if (TARGET_SSEREGPARM
3246 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3248 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3249 if (ix86_fpmath_string != 0)
3251 if (! strcmp (ix86_fpmath_string, "387"))
3252 ix86_fpmath = FPMATH_387;
3253 else if (! strcmp (ix86_fpmath_string, "sse"))
3257 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3258 ix86_fpmath = FPMATH_387;
3261 ix86_fpmath = FPMATH_SSE;
3263 else if (! strcmp (ix86_fpmath_string, "387,sse")
3264 || ! strcmp (ix86_fpmath_string, "387+sse")
3265 || ! strcmp (ix86_fpmath_string, "sse,387")
3266 || ! strcmp (ix86_fpmath_string, "sse+387")
3267 || ! strcmp (ix86_fpmath_string, "both"))
3271 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3272 ix86_fpmath = FPMATH_387;
3274 else if (!TARGET_80387)
3276 warning (0, "387 instruction set disabled, using SSE arithmetics");
3277 ix86_fpmath = FPMATH_SSE;
3280 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3283 error ("bad value (%s) for %sfpmath=%s %s",
3284 ix86_fpmath_string, prefix, suffix, sw);
3287 /* If the i387 is disabled, then do not return values in it. */
3289 target_flags &= ~MASK_FLOAT_RETURNS;
3291 /* Use external vectorized library in vectorizing intrinsics. */
3292 if (ix86_veclibabi_string)
3294 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3295 ix86_veclib_handler = ix86_veclibabi_svml;
3296 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3297 ix86_veclib_handler = ix86_veclibabi_acml;
3299 error ("unknown vectorization library ABI type (%s) for "
3300 "%sveclibabi=%s %s", ix86_veclibabi_string,
3301 prefix, suffix, sw);
3304 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3305 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3307 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3309 /* ??? Unwind info is not correct around the CFG unless either a frame
3310 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3311 unwind info generation to be aware of the CFG and propagating states
3313 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3314 || flag_exceptions || flag_non_call_exceptions)
3315 && flag_omit_frame_pointer
3316 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3318 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3319 warning (0, "unwind tables currently require either a frame pointer "
3320 "or %saccumulate-outgoing-args%s for correctness",
3322 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3325 /* If stack probes are required, the space used for large function
3326 arguments on the stack must also be probed, so enable
3327 -maccumulate-outgoing-args so this happens in the prologue. */
3328 if (TARGET_STACK_PROBE
3329 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3331 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3332 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3333 "for correctness", prefix, suffix);
3334 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3337 /* For sane SSE instruction set generation we need fcomi instruction.
3338 It is safe to enable all CMOVE instructions. */
3342 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3345 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3346 p = strchr (internal_label_prefix, 'X');
3347 internal_label_prefix_len = p - internal_label_prefix;
3351 /* When scheduling description is not available, disable scheduler pass
3352 so it won't slow down the compilation and make x87 code slower. */
3353 if (!TARGET_SCHEDULE)
3354 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3356 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3357 set_param_value ("simultaneous-prefetches",
3358 ix86_cost->simultaneous_prefetches);
3359 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3360 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3361 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3362 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3363 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3364 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3366 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3367 can be optimized to ap = __builtin_next_arg (0). */
3369 targetm.expand_builtin_va_start = NULL;
3373 ix86_gen_leave = gen_leave_rex64;
3374 ix86_gen_pop1 = gen_popdi1;
3375 ix86_gen_add3 = gen_adddi3;
3376 ix86_gen_sub3 = gen_subdi3;
3377 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3378 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3379 ix86_gen_monitor = gen_sse3_monitor64;
3380 ix86_gen_andsp = gen_anddi3;
3384 ix86_gen_leave = gen_leave;
3385 ix86_gen_pop1 = gen_popsi1;
3386 ix86_gen_add3 = gen_addsi3;
3387 ix86_gen_sub3 = gen_subsi3;
3388 ix86_gen_sub3_carry = gen_subsi3_carry;
3389 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3390 ix86_gen_monitor = gen_sse3_monitor;
3391 ix86_gen_andsp = gen_andsi3;
3395 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3397 target_flags |= MASK_CLD & ~target_flags_explicit;
3400 /* Save the initial options in case the user does function specific options */
3402 target_option_default_node = target_option_current_node
3403 = build_target_option_node ();
3406 /* Save the current options */
3409 ix86_function_specific_save (struct cl_target_option *ptr)
3411 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3412 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3413 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3414 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3415 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3417 ptr->arch = ix86_arch;
3418 ptr->schedule = ix86_schedule;
3419 ptr->tune = ix86_tune;
3420 ptr->fpmath = ix86_fpmath;
3421 ptr->branch_cost = ix86_branch_cost;
3422 ptr->tune_defaulted = ix86_tune_defaulted;
3423 ptr->arch_specified = ix86_arch_specified;
3424 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3425 ptr->target_flags_explicit = target_flags_explicit;
3428 /* Restore the current options */
3431 ix86_function_specific_restore (struct cl_target_option *ptr)
3433 enum processor_type old_tune = ix86_tune;
3434 enum processor_type old_arch = ix86_arch;
3435 unsigned int ix86_arch_mask, ix86_tune_mask;
3438 ix86_arch = (enum processor_type) ptr->arch;
3439 ix86_schedule = (enum attr_cpu) ptr->schedule;
3440 ix86_tune = (enum processor_type) ptr->tune;
3441 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3442 ix86_branch_cost = ptr->branch_cost;
3443 ix86_tune_defaulted = ptr->tune_defaulted;
3444 ix86_arch_specified = ptr->arch_specified;
3445 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3446 target_flags_explicit = ptr->target_flags_explicit;
3448 /* Recreate the arch feature tests if the arch changed */
3449 if (old_arch != ix86_arch)
3451 ix86_arch_mask = 1u << ix86_arch;
3452 for (i = 0; i < X86_ARCH_LAST; ++i)
3453 ix86_arch_features[i]
3454 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3457 /* Recreate the tune optimization tests */
3458 if (old_tune != ix86_tune)
3460 ix86_tune_mask = 1u << ix86_tune;
3461 for (i = 0; i < X86_TUNE_LAST; ++i)
3462 ix86_tune_features[i]
3463 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3467 /* Print the current options */
3470 ix86_function_specific_print (FILE *file, int indent,
3471 struct cl_target_option *ptr)
3474 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3475 NULL, NULL, NULL, false);
3477 fprintf (file, "%*sarch = %d (%s)\n",
3480 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3481 ? cpu_names[ptr->arch]
3484 fprintf (file, "%*stune = %d (%s)\n",
3487 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3488 ? cpu_names[ptr->tune]
3491 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3492 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3493 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3494 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3498 fprintf (file, "%*s%s\n", indent, "", target_string);
3499 free (target_string);
3504 /* Inner function to process the attribute((target(...))), take an argument and
3505 set the current options from the argument. If we have a list, recursively go
3509 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3514 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3515 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3516 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3517 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3532 enum ix86_opt_type type;
3537 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3538 IX86_ATTR_ISA ("abm", OPT_mabm),
3539 IX86_ATTR_ISA ("aes", OPT_maes),
3540 IX86_ATTR_ISA ("avx", OPT_mavx),
3541 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3542 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3543 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3544 IX86_ATTR_ISA ("sse", OPT_msse),
3545 IX86_ATTR_ISA ("sse2", OPT_msse2),
3546 IX86_ATTR_ISA ("sse3", OPT_msse3),
3547 IX86_ATTR_ISA ("sse4", OPT_msse4),
3548 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3549 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3550 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3551 IX86_ATTR_ISA ("sse5", OPT_msse5),
3552 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3554 /* string options */
3555 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3556 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3557 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3560 IX86_ATTR_YES ("cld",
3564 IX86_ATTR_NO ("fancy-math-387",
3565 OPT_mfancy_math_387,
3566 MASK_NO_FANCY_MATH_387),
3568 IX86_ATTR_NO ("fused-madd",
3570 MASK_NO_FUSED_MADD),
3572 IX86_ATTR_YES ("ieee-fp",
3576 IX86_ATTR_YES ("inline-all-stringops",
3577 OPT_minline_all_stringops,
3578 MASK_INLINE_ALL_STRINGOPS),
3580 IX86_ATTR_YES ("inline-stringops-dynamically",
3581 OPT_minline_stringops_dynamically,
3582 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3584 IX86_ATTR_NO ("align-stringops",
3585 OPT_mno_align_stringops,
3586 MASK_NO_ALIGN_STRINGOPS),
3588 IX86_ATTR_YES ("recip",
3594 /* If this is a list, recurse to get the options. */
3595 if (TREE_CODE (args) == TREE_LIST)
3599 for (; args; args = TREE_CHAIN (args))
3600 if (TREE_VALUE (args)
3601 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3607 else if (TREE_CODE (args) != STRING_CST)
3610 /* Handle multiple arguments separated by commas. */
3611 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3613 while (next_optstr && *next_optstr != '\0')
3615 char *p = next_optstr;
3617 char *comma = strchr (next_optstr, ',');
3618 const char *opt_string;
3619 size_t len, opt_len;
3624 enum ix86_opt_type type = ix86_opt_unknown;
3630 len = comma - next_optstr;
3631 next_optstr = comma + 1;
3639 /* Recognize no-xxx. */
3640 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3649 /* Find the option. */
3652 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3654 type = attrs[i].type;
3655 opt_len = attrs[i].len;
3656 if (ch == attrs[i].string[0]
3657 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3658 && memcmp (p, attrs[i].string, opt_len) == 0)
3661 mask = attrs[i].mask;
3662 opt_string = attrs[i].string;
3667 /* Process the option. */
3670 error ("attribute(target(\"%s\")) is unknown", orig_p);
3674 else if (type == ix86_opt_isa)
3675 ix86_handle_option (opt, p, opt_set_p);
3677 else if (type == ix86_opt_yes || type == ix86_opt_no)
3679 if (type == ix86_opt_no)
3680 opt_set_p = !opt_set_p;
3683 target_flags |= mask;
3685 target_flags &= ~mask;
3688 else if (type == ix86_opt_str)
3692 error ("option(\"%s\") was already specified", opt_string);
3696 p_strings[opt] = xstrdup (p + opt_len);
3706 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3709 ix86_valid_target_attribute_tree (tree args)
3711 const char *orig_arch_string = ix86_arch_string;
3712 const char *orig_tune_string = ix86_tune_string;
3713 const char *orig_fpmath_string = ix86_fpmath_string;
3714 int orig_tune_defaulted = ix86_tune_defaulted;
3715 int orig_arch_specified = ix86_arch_specified;
3716 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3719 struct cl_target_option *def
3720 = TREE_TARGET_OPTION (target_option_default_node);
3722 /* Process each of the options on the chain. */
3723 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3726 /* If the changed options are different from the default, rerun override_options,
3727 and then save the options away. The string options are are attribute options,
3728 and will be undone when we copy the save structure. */
3729 if (ix86_isa_flags != def->ix86_isa_flags
3730 || target_flags != def->target_flags
3731 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3732 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3733 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3735 /* If we are using the default tune= or arch=, undo the string assigned,
3736 and use the default. */
3737 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3738 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3739 else if (!orig_arch_specified)
3740 ix86_arch_string = NULL;
3742 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3743 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3744 else if (orig_tune_defaulted)
3745 ix86_tune_string = NULL;
3747 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3748 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3749 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3750 else if (!TARGET_64BIT && TARGET_SSE)
3751 ix86_fpmath_string = "sse,387";
3753 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3754 override_options (false);
3756 /* Add any builtin functions with the new isa if any. */
3757 ix86_add_new_builtins (ix86_isa_flags);
3759 /* Save the current options unless we are validating options for
3761 t = build_target_option_node ();
3763 ix86_arch_string = orig_arch_string;
3764 ix86_tune_string = orig_tune_string;
3765 ix86_fpmath_string = orig_fpmath_string;
3767 /* Free up memory allocated to hold the strings */
3768 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3769 if (option_strings[i])
3770 free (option_strings[i]);
3776 /* Hook to validate attribute((target("string"))). */
3779 ix86_valid_target_attribute_p (tree fndecl,
3780 tree ARG_UNUSED (name),
3782 int ARG_UNUSED (flags))
3784 struct cl_target_option cur_target;
3786 tree old_optimize = build_optimization_node ();
3787 tree new_target, new_optimize;
3788 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3790 /* If the function changed the optimization levels as well as setting target
3791 options, start with the optimizations specified. */
3792 if (func_optimize && func_optimize != old_optimize)
3793 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3795 /* The target attributes may also change some optimization flags, so update
3796 the optimization options if necessary. */
3797 cl_target_option_save (&cur_target);
3798 new_target = ix86_valid_target_attribute_tree (args);
3799 new_optimize = build_optimization_node ();
3806 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3808 if (old_optimize != new_optimize)
3809 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3812 cl_target_option_restore (&cur_target);
3814 if (old_optimize != new_optimize)
3815 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3821 /* Hook to determine if one function can safely inline another. */
3824 ix86_can_inline_p (tree caller, tree callee)
3827 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3828 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3830 /* If callee has no option attributes, then it is ok to inline. */
3834 /* If caller has no option attributes, but callee does then it is not ok to
3836 else if (!caller_tree)
3841 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3842 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3844 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3845 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3847 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3848 != callee_opts->ix86_isa_flags)
3851 /* See if we have the same non-isa options. */
3852 else if (caller_opts->target_flags != callee_opts->target_flags)
3855 /* See if arch, tune, etc. are the same. */
3856 else if (caller_opts->arch != callee_opts->arch)
3859 else if (caller_opts->tune != callee_opts->tune)
3862 else if (caller_opts->fpmath != callee_opts->fpmath)
3865 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3876 /* Remember the last target of ix86_set_current_function. */
3877 static GTY(()) tree ix86_previous_fndecl;
3879 /* Establish appropriate back-end context for processing the function
3880 FNDECL. The argument might be NULL to indicate processing at top
3881 level, outside of any function scope. */
3883 ix86_set_current_function (tree fndecl)
3885 /* Only change the context if the function changes. This hook is called
3886 several times in the course of compiling a function, and we don't want to
3887 slow things down too much or call target_reinit when it isn't safe. */
3888 if (fndecl && fndecl != ix86_previous_fndecl)
3890 tree old_tree = (ix86_previous_fndecl
3891 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3894 tree new_tree = (fndecl
3895 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3898 ix86_previous_fndecl = fndecl;
3899 if (old_tree == new_tree)
3904 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3910 struct cl_target_option *def
3911 = TREE_TARGET_OPTION (target_option_current_node);
3913 cl_target_option_restore (def);
3920 /* Return true if this goes in large data/bss. */
3923 ix86_in_large_data_p (tree exp)
3925 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3928 /* Functions are never large data. */
3929 if (TREE_CODE (exp) == FUNCTION_DECL)
3932 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3934 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3935 if (strcmp (section, ".ldata") == 0
3936 || strcmp (section, ".lbss") == 0)
3942 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3944 /* If this is an incomplete type with size 0, then we can't put it
3945 in data because it might be too big when completed. */
3946 if (!size || size > ix86_section_threshold)
3953 /* Switch to the appropriate section for output of DECL.
3954 DECL is either a `VAR_DECL' node or a constant of some sort.
3955 RELOC indicates whether forming the initial value of DECL requires
3956 link-time relocations. */
3958 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3962 x86_64_elf_select_section (tree decl, int reloc,
3963 unsigned HOST_WIDE_INT align)
3965 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3966 && ix86_in_large_data_p (decl))
3968 const char *sname = NULL;
3969 unsigned int flags = SECTION_WRITE;
3970 switch (categorize_decl_for_section (decl, reloc))
3975 case SECCAT_DATA_REL:
3976 sname = ".ldata.rel";
3978 case SECCAT_DATA_REL_LOCAL:
3979 sname = ".ldata.rel.local";
3981 case SECCAT_DATA_REL_RO:
3982 sname = ".ldata.rel.ro";
3984 case SECCAT_DATA_REL_RO_LOCAL:
3985 sname = ".ldata.rel.ro.local";
3989 flags |= SECTION_BSS;
3992 case SECCAT_RODATA_MERGE_STR:
3993 case SECCAT_RODATA_MERGE_STR_INIT:
3994 case SECCAT_RODATA_MERGE_CONST:
3998 case SECCAT_SRODATA:
4005 /* We don't split these for medium model. Place them into
4006 default sections and hope for best. */
4008 case SECCAT_EMUTLS_VAR:
4009 case SECCAT_EMUTLS_TMPL:
4014 /* We might get called with string constants, but get_named_section
4015 doesn't like them as they are not DECLs. Also, we need to set
4016 flags in that case. */
4018 return get_section (sname, flags, NULL);
4019 return get_named_section (decl, sname, reloc);
4022 return default_elf_select_section (decl, reloc, align);
4025 /* Build up a unique section name, expressed as a
4026 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4027 RELOC indicates whether the initial value of EXP requires
4028 link-time relocations. */
4030 static void ATTRIBUTE_UNUSED
4031 x86_64_elf_unique_section (tree decl, int reloc)
4033 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4034 && ix86_in_large_data_p (decl))
4036 const char *prefix = NULL;
4037 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4038 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4040 switch (categorize_decl_for_section (decl, reloc))
4043 case SECCAT_DATA_REL:
4044 case SECCAT_DATA_REL_LOCAL:
4045 case SECCAT_DATA_REL_RO:
4046 case SECCAT_DATA_REL_RO_LOCAL:
4047 prefix = one_only ? ".ld" : ".ldata";
4050 prefix = one_only ? ".lb" : ".lbss";
4053 case SECCAT_RODATA_MERGE_STR:
4054 case SECCAT_RODATA_MERGE_STR_INIT:
4055 case SECCAT_RODATA_MERGE_CONST:
4056 prefix = one_only ? ".lr" : ".lrodata";
4058 case SECCAT_SRODATA:
4065 /* We don't split these for medium model. Place them into
4066 default sections and hope for best. */
4068 case SECCAT_EMUTLS_VAR:
4069 prefix = targetm.emutls.var_section;
4071 case SECCAT_EMUTLS_TMPL:
4072 prefix = targetm.emutls.tmpl_section;
4077 const char *name, *linkonce;
4080 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4081 name = targetm.strip_name_encoding (name);
4083 /* If we're using one_only, then there needs to be a .gnu.linkonce
4084 prefix to the section name. */
4085 linkonce = one_only ? ".gnu.linkonce" : "";
4087 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4089 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4093 default_unique_section (decl, reloc);
4096 #ifdef COMMON_ASM_OP
4097 /* This says how to output assembler code to declare an
4098 uninitialized external linkage data object.
4100 For medium model x86-64 we need to use .largecomm opcode for
4103 x86_elf_aligned_common (FILE *file,
4104 const char *name, unsigned HOST_WIDE_INT size,
4107 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4108 && size > (unsigned int)ix86_section_threshold)
4109 fprintf (file, ".largecomm\t");
4111 fprintf (file, "%s", COMMON_ASM_OP);
4112 assemble_name (file, name);
4113 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4114 size, align / BITS_PER_UNIT);
4118 /* Utility function for targets to use in implementing
4119 ASM_OUTPUT_ALIGNED_BSS. */
4122 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4123 const char *name, unsigned HOST_WIDE_INT size,
4126 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4127 && size > (unsigned int)ix86_section_threshold)
4128 switch_to_section (get_named_section (decl, ".lbss", 0));
4130 switch_to_section (bss_section);
4131 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4132 #ifdef ASM_DECLARE_OBJECT_NAME
4133 last_assemble_variable_decl = decl;
4134 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4136 /* Standard thing is just output label for the object. */
4137 ASM_OUTPUT_LABEL (file, name);
4138 #endif /* ASM_DECLARE_OBJECT_NAME */
4139 ASM_OUTPUT_SKIP (file, size ? size : 1);
4143 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4145 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4146 make the problem with not enough registers even worse. */
4147 #ifdef INSN_SCHEDULING
4149 flag_schedule_insns = 0;
4153 /* The Darwin libraries never set errno, so we might as well
4154 avoid calling them when that's the only reason we would. */
4155 flag_errno_math = 0;
4157 /* The default values of these switches depend on the TARGET_64BIT
4158 that is not known at this moment. Mark these values with 2 and
4159 let user the to override these. In case there is no command line option
4160 specifying them, we will set the defaults in override_options. */
4162 flag_omit_frame_pointer = 2;
4163 flag_pcc_struct_return = 2;
4164 flag_asynchronous_unwind_tables = 2;
4165 flag_vect_cost_model = 1;
4166 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4167 SUBTARGET_OPTIMIZATION_OPTIONS;
4171 /* Decide whether we can make a sibling call to a function. DECL is the
4172 declaration of the function being targeted by the call and EXP is the
4173 CALL_EXPR representing the call. */
4176 ix86_function_ok_for_sibcall (tree decl, tree exp)
4181 /* If we are generating position-independent code, we cannot sibcall
4182 optimize any indirect call, or a direct call to a global function,
4183 as the PLT requires %ebx be live. */
4184 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4191 func = TREE_TYPE (CALL_EXPR_FN (exp));
4192 if (POINTER_TYPE_P (func))
4193 func = TREE_TYPE (func);
4196 /* Check that the return value locations are the same. Like
4197 if we are returning floats on the 80387 register stack, we cannot
4198 make a sibcall from a function that doesn't return a float to a
4199 function that does or, conversely, from a function that does return
4200 a float to a function that doesn't; the necessary stack adjustment
4201 would not be executed. This is also the place we notice
4202 differences in the return value ABI. Note that it is ok for one
4203 of the functions to have void return type as long as the return
4204 value of the other is passed in a register. */
4205 a = ix86_function_value (TREE_TYPE (exp), func, false);
4206 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4208 if (STACK_REG_P (a) || STACK_REG_P (b))
4210 if (!rtx_equal_p (a, b))
4213 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4215 else if (!rtx_equal_p (a, b))
4218 /* If this call is indirect, we'll need to be able to use a call-clobbered
4219 register for the address of the target function. Make sure that all
4220 such registers are not used for passing parameters. */
4221 if (!decl && !TARGET_64BIT)
4225 /* We're looking at the CALL_EXPR, we need the type of the function. */
4226 type = CALL_EXPR_FN (exp); /* pointer expression */
4227 type = TREE_TYPE (type); /* pointer type */
4228 type = TREE_TYPE (type); /* function type */
4230 if (ix86_function_regparm (type, NULL) >= 3)
4232 /* ??? Need to count the actual number of registers to be used,
4233 not the possible number of registers. Fix later. */
4238 /* Dllimport'd functions are also called indirectly. */
4239 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4241 && decl && DECL_DLLIMPORT_P (decl)
4242 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4245 /* If we need to align the outgoing stack, then sibcalling would
4246 unalign the stack, which may break the called function. */
4247 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4250 /* Otherwise okay. That also includes certain types of indirect calls. */
4254 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4255 calling convention attributes;
4256 arguments as in struct attribute_spec.handler. */
4259 ix86_handle_cconv_attribute (tree *node, tree name,
4261 int flags ATTRIBUTE_UNUSED,
4264 if (TREE_CODE (*node) != FUNCTION_TYPE
4265 && TREE_CODE (*node) != METHOD_TYPE
4266 && TREE_CODE (*node) != FIELD_DECL
4267 && TREE_CODE (*node) != TYPE_DECL)
4269 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4271 *no_add_attrs = true;
4275 /* Can combine regparm with all attributes but fastcall. */
4276 if (is_attribute_p ("regparm", name))
4280 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4282 error ("fastcall and regparm attributes are not compatible");
4285 cst = TREE_VALUE (args);
4286 if (TREE_CODE (cst) != INTEGER_CST)
4288 warning (OPT_Wattributes,
4289 "%qE attribute requires an integer constant argument",
4291 *no_add_attrs = true;
4293 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4295 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4297 *no_add_attrs = true;
4305 /* Do not warn when emulating the MS ABI. */
4306 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4307 warning (OPT_Wattributes, "%qE attribute ignored",
4309 *no_add_attrs = true;
4313 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4314 if (is_attribute_p ("fastcall", name))
4316 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4318 error ("fastcall and cdecl attributes are not compatible");
4320 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4322 error ("fastcall and stdcall attributes are not compatible");
4324 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4326 error ("fastcall and regparm attributes are not compatible");
4330 /* Can combine stdcall with fastcall (redundant), regparm and
4332 else if (is_attribute_p ("stdcall", name))
4334 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4336 error ("stdcall and cdecl attributes are not compatible");
4338 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4340 error ("stdcall and fastcall attributes are not compatible");
4344 /* Can combine cdecl with regparm and sseregparm. */
4345 else if (is_attribute_p ("cdecl", name))
4347 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4349 error ("stdcall and cdecl attributes are not compatible");
4351 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4353 error ("fastcall and cdecl attributes are not compatible");
4357 /* Can combine sseregparm with all attributes. */
4362 /* Return 0 if the attributes for two types are incompatible, 1 if they
4363 are compatible, and 2 if they are nearly compatible (which causes a
4364 warning to be generated). */
4367 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4369 /* Check for mismatch of non-default calling convention. */
4370 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4372 if (TREE_CODE (type1) != FUNCTION_TYPE
4373 && TREE_CODE (type1) != METHOD_TYPE)
4376 /* Check for mismatched fastcall/regparm types. */
4377 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4378 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4379 || (ix86_function_regparm (type1, NULL)
4380 != ix86_function_regparm (type2, NULL)))
4383 /* Check for mismatched sseregparm types. */
4384 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4385 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4388 /* Check for mismatched return types (cdecl vs stdcall). */
4389 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4390 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4396 /* Return the regparm value for a function with the indicated TYPE and DECL.
4397 DECL may be NULL when calling function indirectly
4398 or considering a libcall. */
4401 ix86_function_regparm (const_tree type, const_tree decl)
4406 static bool error_issued;
4409 return (ix86_function_type_abi (type) == SYSV_ABI
4410 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4412 regparm = ix86_regparm;
4413 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4417 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4419 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4421 /* We can't use regparm(3) for nested functions because
4422 these pass static chain pointer in %ecx register. */
4423 if (!error_issued && regparm == 3
4424 && decl_function_context (decl)
4425 && !DECL_NO_STATIC_CHAIN (decl))
4427 error ("nested functions are limited to 2 register parameters");
4428 error_issued = true;
4436 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4439 /* Use register calling convention for local functions when possible. */
4441 && TREE_CODE (decl) == FUNCTION_DECL
4445 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4446 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4449 int local_regparm, globals = 0, regno;
4452 /* Make sure no regparm register is taken by a
4453 fixed register variable. */
4454 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4455 if (fixed_regs[local_regparm])
4458 /* We can't use regparm(3) for nested functions as these use
4459 static chain pointer in third argument. */
4460 if (local_regparm == 3
4461 && decl_function_context (decl)
4462 && !DECL_NO_STATIC_CHAIN (decl))
4465 /* If the function realigns its stackpointer, the prologue will
4466 clobber %ecx. If we've already generated code for the callee,
4467 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4468 scanning the attributes for the self-realigning property. */
4469 f = DECL_STRUCT_FUNCTION (decl);
4470 /* Since current internal arg pointer won't conflict with
4471 parameter passing regs, so no need to change stack
4472 realignment and adjust regparm number.
4474 Each fixed register usage increases register pressure,
4475 so less registers should be used for argument passing.
4476 This functionality can be overriden by an explicit
4478 for (regno = 0; regno <= DI_REG; regno++)
4479 if (fixed_regs[regno])
4483 = globals < local_regparm ? local_regparm - globals : 0;
4485 if (local_regparm > regparm)
4486 regparm = local_regparm;
4493 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4494 DFmode (2) arguments in SSE registers for a function with the
4495 indicated TYPE and DECL. DECL may be NULL when calling function
4496 indirectly or considering a libcall. Otherwise return 0. */
4499 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4501 gcc_assert (!TARGET_64BIT);
4503 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4504 by the sseregparm attribute. */
4505 if (TARGET_SSEREGPARM
4506 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4513 error ("Calling %qD with attribute sseregparm without "
4514 "SSE/SSE2 enabled", decl);
4516 error ("Calling %qT with attribute sseregparm without "
4517 "SSE/SSE2 enabled", type);
4525 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4526 (and DFmode for SSE2) arguments in SSE registers. */
4527 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4529 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4530 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4532 return TARGET_SSE2 ? 2 : 1;
4538 /* Return true if EAX is live at the start of the function. Used by
4539 ix86_expand_prologue to determine if we need special help before
4540 calling allocate_stack_worker. */
4543 ix86_eax_live_at_start_p (void)
4545 /* Cheat. Don't bother working forward from ix86_function_regparm
4546 to the function type to whether an actual argument is located in
4547 eax. Instead just look at cfg info, which is still close enough
4548 to correct at this point. This gives false positives for broken
4549 functions that might use uninitialized data that happens to be
4550 allocated in eax, but who cares? */
4551 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4554 /* Value is the number of bytes of arguments automatically
4555 popped when returning from a subroutine call.
4556 FUNDECL is the declaration node of the function (as a tree),
4557 FUNTYPE is the data type of the function (as a tree),
4558 or for a library call it is an identifier node for the subroutine name.
4559 SIZE is the number of bytes of arguments passed on the stack.
4561 On the 80386, the RTD insn may be used to pop them if the number
4562 of args is fixed, but if the number is variable then the caller
4563 must pop them all. RTD can't be used for library calls now
4564 because the library is compiled with the Unix compiler.
4565 Use of RTD is a selectable option, since it is incompatible with
4566 standard Unix calling sequences. If the option is not selected,
4567 the caller must always pop the args.
4569 The attribute stdcall is equivalent to RTD on a per module basis. */
4572 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4576 /* None of the 64-bit ABIs pop arguments. */
4580 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4582 /* Cdecl functions override -mrtd, and never pop the stack. */
4583 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4585 /* Stdcall and fastcall functions will pop the stack if not
4587 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4588 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4591 if (rtd && ! stdarg_p (funtype))
4595 /* Lose any fake structure return argument if it is passed on the stack. */
4596 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4597 && !KEEP_AGGREGATE_RETURN_POINTER)
4599 int nregs = ix86_function_regparm (funtype, fundecl);
4601 return GET_MODE_SIZE (Pmode);
4607 /* Argument support functions. */
4609 /* Return true when register may be used to pass function parameters. */
4611 ix86_function_arg_regno_p (int regno)
4614 const int *parm_regs;
4619 return (regno < REGPARM_MAX
4620 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4622 return (regno < REGPARM_MAX
4623 || (TARGET_MMX && MMX_REGNO_P (regno)
4624 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4625 || (TARGET_SSE && SSE_REGNO_P (regno)
4626 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4631 if (SSE_REGNO_P (regno) && TARGET_SSE)
4636 if (TARGET_SSE && SSE_REGNO_P (regno)
4637 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4641 /* TODO: The function should depend on current function ABI but
4642 builtins.c would need updating then. Therefore we use the
4645 /* RAX is used as hidden argument to va_arg functions. */
4646 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4649 if (ix86_abi == MS_ABI)
4650 parm_regs = x86_64_ms_abi_int_parameter_registers;
4652 parm_regs = x86_64_int_parameter_registers;
4653 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4654 : X86_64_REGPARM_MAX); i++)
4655 if (regno == parm_regs[i])
4660 /* Return if we do not know how to pass TYPE solely in registers. */
4663 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4665 if (must_pass_in_stack_var_size_or_pad (mode, type))
4668 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4669 The layout_type routine is crafty and tries to trick us into passing
4670 currently unsupported vector types on the stack by using TImode. */
4671 return (!TARGET_64BIT && mode == TImode
4672 && type && TREE_CODE (type) != VECTOR_TYPE);
4675 /* It returns the size, in bytes, of the area reserved for arguments passed
4676 in registers for the function represented by fndecl dependent to the used
4679 ix86_reg_parm_stack_space (const_tree fndecl)
4681 enum calling_abi call_abi = SYSV_ABI;
4682 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4683 call_abi = ix86_function_abi (fndecl);
4685 call_abi = ix86_function_type_abi (fndecl);
4686 if (call_abi == MS_ABI)
4691 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4694 ix86_function_type_abi (const_tree fntype)
4696 if (TARGET_64BIT && fntype != NULL)
4698 enum calling_abi abi = ix86_abi;
4699 if (abi == SYSV_ABI)
4701 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4704 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4711 static enum calling_abi
4712 ix86_function_abi (const_tree fndecl)
4716 return ix86_function_type_abi (TREE_TYPE (fndecl));
4719 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4722 ix86_cfun_abi (void)
4724 if (! cfun || ! TARGET_64BIT)
4726 return cfun->machine->call_abi;
4730 extern void init_regs (void);
4732 /* Implementation of call abi switching target hook. Specific to FNDECL
4733 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4734 for more details. */
4736 ix86_call_abi_override (const_tree fndecl)
4738 if (fndecl == NULL_TREE)
4739 cfun->machine->call_abi = ix86_abi;
4741 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4744 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4745 re-initialization of init_regs each time we switch function context since
4746 this is needed only during RTL expansion. */
4748 ix86_maybe_switch_abi (void)
4751 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4755 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4756 for a call to a function whose data type is FNTYPE.
4757 For a library call, FNTYPE is 0. */
4760 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4761 tree fntype, /* tree ptr for function decl */
4762 rtx libname, /* SYMBOL_REF of library name or 0 */
4765 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4766 memset (cum, 0, sizeof (*cum));
4769 cum->call_abi = ix86_function_abi (fndecl);
4771 cum->call_abi = ix86_function_type_abi (fntype);
4772 /* Set up the number of registers to use for passing arguments. */
4774 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4775 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4776 cum->nregs = ix86_regparm;
4779 if (cum->call_abi != ix86_abi)
4780 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4785 cum->sse_nregs = SSE_REGPARM_MAX;
4788 if (cum->call_abi != ix86_abi)
4789 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4790 : X64_SSE_REGPARM_MAX;
4794 cum->mmx_nregs = MMX_REGPARM_MAX;
4795 cum->warn_avx = true;
4796 cum->warn_sse = true;
4797 cum->warn_mmx = true;
4799 /* Because type might mismatch in between caller and callee, we need to
4800 use actual type of function for local calls.
4801 FIXME: cgraph_analyze can be told to actually record if function uses
4802 va_start so for local functions maybe_vaarg can be made aggressive
4804 FIXME: once typesytem is fixed, we won't need this code anymore. */
4806 fntype = TREE_TYPE (fndecl);
4807 cum->maybe_vaarg = (fntype
4808 ? (!prototype_p (fntype) || stdarg_p (fntype))
4813 /* If there are variable arguments, then we won't pass anything
4814 in registers in 32-bit mode. */
4815 if (stdarg_p (fntype))
4826 /* Use ecx and edx registers if function has fastcall attribute,
4827 else look for regparm information. */
4830 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4836 cum->nregs = ix86_function_regparm (fntype, fndecl);
4839 /* Set up the number of SSE registers used for passing SFmode
4840 and DFmode arguments. Warn for mismatching ABI. */
4841 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4845 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4846 But in the case of vector types, it is some vector mode.
4848 When we have only some of our vector isa extensions enabled, then there
4849 are some modes for which vector_mode_supported_p is false. For these
4850 modes, the generic vector support in gcc will choose some non-vector mode
4851 in order to implement the type. By computing the natural mode, we'll
4852 select the proper ABI location for the operand and not depend on whatever
4853 the middle-end decides to do with these vector types.
4855 The midde-end can't deal with the vector types > 16 bytes. In this
4856 case, we return the original mode and warn ABI change if CUM isn't
4859 static enum machine_mode
4860 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4862 enum machine_mode mode = TYPE_MODE (type);
4864 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4866 HOST_WIDE_INT size = int_size_in_bytes (type);
4867 if ((size == 8 || size == 16 || size == 32)
4868 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4869 && TYPE_VECTOR_SUBPARTS (type) > 1)
4871 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4873 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4874 mode = MIN_MODE_VECTOR_FLOAT;
4876 mode = MIN_MODE_VECTOR_INT;
4878 /* Get the mode which has this inner mode and number of units. */
4879 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4880 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4881 && GET_MODE_INNER (mode) == innermode)
4883 if (size == 32 && !TARGET_AVX)
4885 static bool warnedavx;
4892 warning (0, "AVX vector argument without AVX "
4893 "enabled changes the ABI");
4895 return TYPE_MODE (type);
4908 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4909 this may not agree with the mode that the type system has chosen for the
4910 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4911 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4914 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4919 if (orig_mode != BLKmode)
4920 tmp = gen_rtx_REG (orig_mode, regno);
4923 tmp = gen_rtx_REG (mode, regno);
4924 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4925 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4931 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4932 of this code is to classify each 8bytes of incoming argument by the register
4933 class and assign registers accordingly. */
4935 /* Return the union class of CLASS1 and CLASS2.
4936 See the x86-64 PS ABI for details. */
4938 static enum x86_64_reg_class
4939 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4941 /* Rule #1: If both classes are equal, this is the resulting class. */
4942 if (class1 == class2)
4945 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4947 if (class1 == X86_64_NO_CLASS)
4949 if (class2 == X86_64_NO_CLASS)
4952 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4953 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4954 return X86_64_MEMORY_CLASS;
4956 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4957 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4958 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4959 return X86_64_INTEGERSI_CLASS;
4960 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4961 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4962 return X86_64_INTEGER_CLASS;
4964 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4966 if (class1 == X86_64_X87_CLASS
4967 || class1 == X86_64_X87UP_CLASS
4968 || class1 == X86_64_COMPLEX_X87_CLASS
4969 || class2 == X86_64_X87_CLASS
4970 || class2 == X86_64_X87UP_CLASS
4971 || class2 == X86_64_COMPLEX_X87_CLASS)
4972 return X86_64_MEMORY_CLASS;
4974 /* Rule #6: Otherwise class SSE is used. */
4975 return X86_64_SSE_CLASS;
4978 /* Classify the argument of type TYPE and mode MODE.
4979 CLASSES will be filled by the register class used to pass each word
4980 of the operand. The number of words is returned. In case the parameter
4981 should be passed in memory, 0 is returned. As a special case for zero
4982 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4984 BIT_OFFSET is used internally for handling records and specifies offset
4985 of the offset in bits modulo 256 to avoid overflow cases.
4987 See the x86-64 PS ABI for details.
4991 classify_argument (enum machine_mode mode, const_tree type,
4992 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4994 HOST_WIDE_INT bytes =
4995 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4996 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4998 /* Variable sized entities are always passed/returned in memory. */
5002 if (mode != VOIDmode
5003 && targetm.calls.must_pass_in_stack (mode, type))
5006 if (type && AGGREGATE_TYPE_P (type))
5010 enum x86_64_reg_class subclasses[MAX_CLASSES];
5012 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5016 for (i = 0; i < words; i++)
5017 classes[i] = X86_64_NO_CLASS;
5019 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5020 signalize memory class, so handle it as special case. */
5023 classes[0] = X86_64_NO_CLASS;
5027 /* Classify each field of record and merge classes. */
5028 switch (TREE_CODE (type))
5031 /* And now merge the fields of structure. */
5032 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5034 if (TREE_CODE (field) == FIELD_DECL)
5038 if (TREE_TYPE (field) == error_mark_node)
5041 /* Bitfields are always classified as integer. Handle them
5042 early, since later code would consider them to be
5043 misaligned integers. */
5044 if (DECL_BIT_FIELD (field))
5046 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5047 i < ((int_bit_position (field) + (bit_offset % 64))
5048 + tree_low_cst (DECL_SIZE (field), 0)
5051 merge_classes (X86_64_INTEGER_CLASS,
5058 type = TREE_TYPE (field);
5060 /* Flexible array member is ignored. */
5061 if (TYPE_MODE (type) == BLKmode
5062 && TREE_CODE (type) == ARRAY_TYPE
5063 && TYPE_SIZE (type) == NULL_TREE
5064 && TYPE_DOMAIN (type) != NULL_TREE
5065 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5070 if (!warned && warn_psabi)
5073 inform (input_location,
5074 "The ABI of passing struct with"
5075 " a flexible array member has"
5076 " changed in GCC 4.4");
5080 num = classify_argument (TYPE_MODE (type), type,
5082 (int_bit_position (field)
5083 + bit_offset) % 256);
5086 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5087 for (i = 0; i < num && (i + pos) < words; i++)
5089 merge_classes (subclasses[i], classes[i + pos]);
5096 /* Arrays are handled as small records. */
5099 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5100 TREE_TYPE (type), subclasses, bit_offset);
5104 /* The partial classes are now full classes. */
5105 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5106 subclasses[0] = X86_64_SSE_CLASS;
5107 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5108 && !((bit_offset % 64) == 0 && bytes == 4))
5109 subclasses[0] = X86_64_INTEGER_CLASS;
5111 for (i = 0; i < words; i++)
5112 classes[i] = subclasses[i % num];
5117 case QUAL_UNION_TYPE:
5118 /* Unions are similar to RECORD_TYPE but offset is always 0.
5120 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5122 if (TREE_CODE (field) == FIELD_DECL)
5126 if (TREE_TYPE (field) == error_mark_node)
5129 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5130 TREE_TYPE (field), subclasses,
5134 for (i = 0; i < num; i++)
5135 classes[i] = merge_classes (subclasses[i], classes[i]);
5146 /* When size > 16 bytes, if the first one isn't
5147 X86_64_SSE_CLASS or any other ones aren't
5148 X86_64_SSEUP_CLASS, everything should be passed in
5150 if (classes[0] != X86_64_SSE_CLASS)
5153 for (i = 1; i < words; i++)
5154 if (classes[i] != X86_64_SSEUP_CLASS)
5158 /* Final merger cleanup. */
5159 for (i = 0; i < words; i++)
5161 /* If one class is MEMORY, everything should be passed in
5163 if (classes[i] == X86_64_MEMORY_CLASS)
5166 /* The X86_64_SSEUP_CLASS should be always preceded by
5167 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5168 if (classes[i] == X86_64_SSEUP_CLASS
5169 && classes[i - 1] != X86_64_SSE_CLASS
5170 && classes[i - 1] != X86_64_SSEUP_CLASS)
5172 /* The first one should never be X86_64_SSEUP_CLASS. */
5173 gcc_assert (i != 0);
5174 classes[i] = X86_64_SSE_CLASS;
5177 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5178 everything should be passed in memory. */
5179 if (classes[i] == X86_64_X87UP_CLASS
5180 && (classes[i - 1] != X86_64_X87_CLASS))
5184 /* The first one should never be X86_64_X87UP_CLASS. */
5185 gcc_assert (i != 0);
5186 if (!warned && warn_psabi)
5189 inform (input_location,
5190 "The ABI of passing union with long double"
5191 " has changed in GCC 4.4");
5199 /* Compute alignment needed. We align all types to natural boundaries with
5200 exception of XFmode that is aligned to 64bits. */
5201 if (mode != VOIDmode && mode != BLKmode)
5203 int mode_alignment = GET_MODE_BITSIZE (mode);
5206 mode_alignment = 128;
5207 else if (mode == XCmode)
5208 mode_alignment = 256;
5209 if (COMPLEX_MODE_P (mode))
5210 mode_alignment /= 2;
5211 /* Misaligned fields are always returned in memory. */
5212 if (bit_offset % mode_alignment)
5216 /* for V1xx modes, just use the base mode */
5217 if (VECTOR_MODE_P (mode) && mode != V1DImode
5218 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5219 mode = GET_MODE_INNER (mode);
5221 /* Classification of atomic types. */
5226 classes[0] = X86_64_SSE_CLASS;
5229 classes[0] = X86_64_SSE_CLASS;
5230 classes[1] = X86_64_SSEUP_CLASS;
5240 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5244 classes[0] = X86_64_INTEGERSI_CLASS;
5247 else if (size <= 64)
5249 classes[0] = X86_64_INTEGER_CLASS;
5252 else if (size <= 64+32)
5254 classes[0] = X86_64_INTEGER_CLASS;
5255 classes[1] = X86_64_INTEGERSI_CLASS;
5258 else if (size <= 64+64)
5260 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5268 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5272 /* OImode shouldn't be used directly. */
5277 if (!(bit_offset % 64))
5278 classes[0] = X86_64_SSESF_CLASS;
5280 classes[0] = X86_64_SSE_CLASS;
5283 classes[0] = X86_64_SSEDF_CLASS;
5286 classes[0] = X86_64_X87_CLASS;
5287 classes[1] = X86_64_X87UP_CLASS;
5290 classes[0] = X86_64_SSE_CLASS;
5291 classes[1] = X86_64_SSEUP_CLASS;
5294 classes[0] = X86_64_SSE_CLASS;
5295 if (!(bit_offset % 64))
5301 if (!warned && warn_psabi)
5304 inform (input_location,
5305 "The ABI of passing structure with complex float"
5306 " member has changed in GCC 4.4");
5308 classes[1] = X86_64_SSESF_CLASS;
5312 classes[0] = X86_64_SSEDF_CLASS;
5313 classes[1] = X86_64_SSEDF_CLASS;
5316 classes[0] = X86_64_COMPLEX_X87_CLASS;
5319 /* This modes is larger than 16 bytes. */
5327 classes[0] = X86_64_SSE_CLASS;
5328 classes[1] = X86_64_SSEUP_CLASS;
5329 classes[2] = X86_64_SSEUP_CLASS;
5330 classes[3] = X86_64_SSEUP_CLASS;
5338 classes[0] = X86_64_SSE_CLASS;
5339 classes[1] = X86_64_SSEUP_CLASS;
5346 classes[0] = X86_64_SSE_CLASS;
5352 gcc_assert (VECTOR_MODE_P (mode));
5357 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5359 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5360 classes[0] = X86_64_INTEGERSI_CLASS;
5362 classes[0] = X86_64_INTEGER_CLASS;
5363 classes[1] = X86_64_INTEGER_CLASS;
5364 return 1 + (bytes > 8);
5368 /* Examine the argument and return set number of register required in each
5369 class. Return 0 iff parameter should be passed in memory. */
5371 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5372 int *int_nregs, int *sse_nregs)
5374 enum x86_64_reg_class regclass[MAX_CLASSES];
5375 int n = classify_argument (mode, type, regclass, 0);
5381 for (n--; n >= 0; n--)
5382 switch (regclass[n])
5384 case X86_64_INTEGER_CLASS:
5385 case X86_64_INTEGERSI_CLASS:
5388 case X86_64_SSE_CLASS:
5389 case X86_64_SSESF_CLASS:
5390 case X86_64_SSEDF_CLASS:
5393 case X86_64_NO_CLASS:
5394 case X86_64_SSEUP_CLASS:
5396 case X86_64_X87_CLASS:
5397 case X86_64_X87UP_CLASS:
5401 case X86_64_COMPLEX_X87_CLASS:
5402 return in_return ? 2 : 0;
5403 case X86_64_MEMORY_CLASS:
5409 /* Construct container for the argument used by GCC interface. See
5410 FUNCTION_ARG for the detailed description. */
5413 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5414 const_tree type, int in_return, int nintregs, int nsseregs,
5415 const int *intreg, int sse_regno)
5417 /* The following variables hold the static issued_error state. */
5418 static bool issued_sse_arg_error;
5419 static bool issued_sse_ret_error;
5420 static bool issued_x87_ret_error;
5422 enum machine_mode tmpmode;
5424 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5425 enum x86_64_reg_class regclass[MAX_CLASSES];
5429 int needed_sseregs, needed_intregs;
5430 rtx exp[MAX_CLASSES];
5433 n = classify_argument (mode, type, regclass, 0);
5436 if (!examine_argument (mode, type, in_return, &needed_intregs,
5439 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5442 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5443 some less clueful developer tries to use floating-point anyway. */
5444 if (needed_sseregs && !TARGET_SSE)
5448 if (!issued_sse_ret_error)
5450 error ("SSE register return with SSE disabled");
5451 issued_sse_ret_error = true;
5454 else if (!issued_sse_arg_error)
5456 error ("SSE register argument with SSE disabled");
5457 issued_sse_arg_error = true;
5462 /* Likewise, error if the ABI requires us to return values in the
5463 x87 registers and the user specified -mno-80387. */
5464 if (!TARGET_80387 && in_return)
5465 for (i = 0; i < n; i++)
5466 if (regclass[i] == X86_64_X87_CLASS
5467 || regclass[i] == X86_64_X87UP_CLASS
5468 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5470 if (!issued_x87_ret_error)
5472 error ("x87 register return with x87 disabled");
5473 issued_x87_ret_error = true;
5478 /* First construct simple cases. Avoid SCmode, since we want to use
5479 single register to pass this type. */
5480 if (n == 1 && mode != SCmode)
5481 switch (regclass[0])
5483 case X86_64_INTEGER_CLASS:
5484 case X86_64_INTEGERSI_CLASS:
5485 return gen_rtx_REG (mode, intreg[0]);
5486 case X86_64_SSE_CLASS:
5487 case X86_64_SSESF_CLASS:
5488 case X86_64_SSEDF_CLASS:
5489 if (mode != BLKmode)
5490 return gen_reg_or_parallel (mode, orig_mode,
5491 SSE_REGNO (sse_regno));
5493 case X86_64_X87_CLASS:
5494 case X86_64_COMPLEX_X87_CLASS:
5495 return gen_rtx_REG (mode, FIRST_STACK_REG);
5496 case X86_64_NO_CLASS:
5497 /* Zero sized array, struct or class. */
5502 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5503 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5504 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5506 && regclass[0] == X86_64_SSE_CLASS
5507 && regclass[1] == X86_64_SSEUP_CLASS
5508 && regclass[2] == X86_64_SSEUP_CLASS
5509 && regclass[3] == X86_64_SSEUP_CLASS
5511 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5514 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5515 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5516 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5517 && regclass[1] == X86_64_INTEGER_CLASS
5518 && (mode == CDImode || mode == TImode || mode == TFmode)
5519 && intreg[0] + 1 == intreg[1])
5520 return gen_rtx_REG (mode, intreg[0]);
5522 /* Otherwise figure out the entries of the PARALLEL. */
5523 for (i = 0; i < n; i++)
5527 switch (regclass[i])
5529 case X86_64_NO_CLASS:
5531 case X86_64_INTEGER_CLASS:
5532 case X86_64_INTEGERSI_CLASS:
5533 /* Merge TImodes on aligned occasions here too. */
5534 if (i * 8 + 8 > bytes)
5535 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5536 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5540 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5541 if (tmpmode == BLKmode)
5543 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5544 gen_rtx_REG (tmpmode, *intreg),
5548 case X86_64_SSESF_CLASS:
5549 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5550 gen_rtx_REG (SFmode,
5551 SSE_REGNO (sse_regno)),
5555 case X86_64_SSEDF_CLASS:
5556 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5557 gen_rtx_REG (DFmode,
5558 SSE_REGNO (sse_regno)),
5562 case X86_64_SSE_CLASS:
5570 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5580 && regclass[1] == X86_64_SSEUP_CLASS
5581 && regclass[2] == X86_64_SSEUP_CLASS
5582 && regclass[3] == X86_64_SSEUP_CLASS);
5589 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5590 gen_rtx_REG (tmpmode,
5591 SSE_REGNO (sse_regno)),
5600 /* Empty aligned struct, union or class. */
5604 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5605 for (i = 0; i < nexps; i++)
5606 XVECEXP (ret, 0, i) = exp [i];
5610 /* Update the data in CUM to advance over an argument of mode MODE
5611 and data type TYPE. (TYPE is null for libcalls where that information
5612 may not be available.) */
5615 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5616 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5632 cum->words += words;
5633 cum->nregs -= words;
5634 cum->regno += words;
5636 if (cum->nregs <= 0)
5644 /* OImode shouldn't be used directly. */
5648 if (cum->float_in_sse < 2)
5651 if (cum->float_in_sse < 1)
5668 if (!type || !AGGREGATE_TYPE_P (type))
5670 cum->sse_words += words;
5671 cum->sse_nregs -= 1;
5672 cum->sse_regno += 1;
5673 if (cum->sse_nregs <= 0)
5686 if (!type || !AGGREGATE_TYPE_P (type))
5688 cum->mmx_words += words;
5689 cum->mmx_nregs -= 1;
5690 cum->mmx_regno += 1;
5691 if (cum->mmx_nregs <= 0)
5702 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5703 tree type, HOST_WIDE_INT words, int named)
5705 int int_nregs, sse_nregs;
5707 /* Unnamed 256bit vector mode parameters are passed on stack. */
5708 if (!named && VALID_AVX256_REG_MODE (mode))
5711 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5712 cum->words += words;
5713 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5715 cum->nregs -= int_nregs;
5716 cum->sse_nregs -= sse_nregs;
5717 cum->regno += int_nregs;
5718 cum->sse_regno += sse_nregs;
5721 cum->words += words;
5725 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5726 HOST_WIDE_INT words)
5728 /* Otherwise, this should be passed indirect. */
5729 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5731 cum->words += words;
5740 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5741 tree type, int named)
5743 HOST_WIDE_INT bytes, words;
5745 if (mode == BLKmode)
5746 bytes = int_size_in_bytes (type);
5748 bytes = GET_MODE_SIZE (mode);
5749 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5752 mode = type_natural_mode (type, NULL);
5754 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5755 function_arg_advance_ms_64 (cum, bytes, words);
5756 else if (TARGET_64BIT)
5757 function_arg_advance_64 (cum, mode, type, words, named);
5759 function_arg_advance_32 (cum, mode, type, bytes, words);
5762 /* Define where to put the arguments to a function.
5763 Value is zero to push the argument on the stack,
5764 or a hard register in which to store the argument.
5766 MODE is the argument's machine mode.
5767 TYPE is the data type of the argument (as a tree).
5768 This is null for libcalls where that information may
5770 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5771 the preceding args and about the function being called.
5772 NAMED is nonzero if this argument is a named parameter
5773 (otherwise it is an extra parameter matching an ellipsis). */
5776 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5777 enum machine_mode orig_mode, tree type,
5778 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5780 static bool warnedsse, warnedmmx;
5782 /* Avoid the AL settings for the Unix64 ABI. */
5783 if (mode == VOIDmode)
5799 if (words <= cum->nregs)
5801 int regno = cum->regno;
5803 /* Fastcall allocates the first two DWORD (SImode) or
5804 smaller arguments to ECX and EDX if it isn't an
5810 || (type && AGGREGATE_TYPE_P (type)))
5813 /* ECX not EAX is the first allocated register. */
5814 if (regno == AX_REG)
5817 return gen_rtx_REG (mode, regno);
5822 if (cum->float_in_sse < 2)
5825 if (cum->float_in_sse < 1)
5829 /* In 32bit, we pass TImode in xmm registers. */
5836 if (!type || !AGGREGATE_TYPE_P (type))
5838 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5841 warning (0, "SSE vector argument without SSE enabled "
5845 return gen_reg_or_parallel (mode, orig_mode,
5846 cum->sse_regno + FIRST_SSE_REG);
5851 /* OImode shouldn't be used directly. */
5860 if (!type || !AGGREGATE_TYPE_P (type))
5863 return gen_reg_or_parallel (mode, orig_mode,
5864 cum->sse_regno + FIRST_SSE_REG);
5873 if (!type || !AGGREGATE_TYPE_P (type))
5875 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5878 warning (0, "MMX vector argument without MMX enabled "
5882 return gen_reg_or_parallel (mode, orig_mode,
5883 cum->mmx_regno + FIRST_MMX_REG);
5892 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5893 enum machine_mode orig_mode, tree type, int named)
5895 /* Handle a hidden AL argument containing number of registers
5896 for varargs x86-64 functions. */
5897 if (mode == VOIDmode)
5898 return GEN_INT (cum->maybe_vaarg
5899 ? (cum->sse_nregs < 0
5900 ? (cum->call_abi == ix86_abi
5902 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5903 : X64_SSE_REGPARM_MAX))
5918 /* Unnamed 256bit vector mode parameters are passed on stack. */
5924 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5926 &x86_64_int_parameter_registers [cum->regno],
5931 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5932 enum machine_mode orig_mode, int named,
5933 HOST_WIDE_INT bytes)
5937 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5938 We use value of -2 to specify that current function call is MSABI. */
5939 if (mode == VOIDmode)
5940 return GEN_INT (-2);
5942 /* If we've run out of registers, it goes on the stack. */
5943 if (cum->nregs == 0)
5946 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5948 /* Only floating point modes are passed in anything but integer regs. */
5949 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5952 regno = cum->regno + FIRST_SSE_REG;
5957 /* Unnamed floating parameters are passed in both the
5958 SSE and integer registers. */
5959 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5960 t2 = gen_rtx_REG (mode, regno);
5961 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5962 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5963 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5966 /* Handle aggregated types passed in register. */
5967 if (orig_mode == BLKmode)
5969 if (bytes > 0 && bytes <= 8)
5970 mode = (bytes > 4 ? DImode : SImode);
5971 if (mode == BLKmode)
5975 return gen_reg_or_parallel (mode, orig_mode, regno);
5979 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5980 tree type, int named)
5982 enum machine_mode mode = omode;
5983 HOST_WIDE_INT bytes, words;
5985 if (mode == BLKmode)
5986 bytes = int_size_in_bytes (type);
5988 bytes = GET_MODE_SIZE (mode);
5989 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5991 /* To simplify the code below, represent vector types with a vector mode
5992 even if MMX/SSE are not active. */
5993 if (type && TREE_CODE (type) == VECTOR_TYPE)
5994 mode = type_natural_mode (type, cum);
5996 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5997 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5998 else if (TARGET_64BIT)
5999 return function_arg_64 (cum, mode, omode, type, named);
6001 return function_arg_32 (cum, mode, omode, type, bytes, words);
6004 /* A C expression that indicates when an argument must be passed by
6005 reference. If nonzero for an argument, a copy of that argument is
6006 made in memory and a pointer to the argument is passed instead of
6007 the argument itself. The pointer is passed in whatever way is
6008 appropriate for passing a pointer to that type. */
6011 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6012 enum machine_mode mode ATTRIBUTE_UNUSED,
6013 const_tree type, bool named ATTRIBUTE_UNUSED)
6015 /* See Windows x64 Software Convention. */
6016 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6018 int msize = (int) GET_MODE_SIZE (mode);
6021 /* Arrays are passed by reference. */
6022 if (TREE_CODE (type) == ARRAY_TYPE)
6025 if (AGGREGATE_TYPE_P (type))
6027 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6028 are passed by reference. */
6029 msize = int_size_in_bytes (type);
6033 /* __m128 is passed by reference. */
6035 case 1: case 2: case 4: case 8:
6041 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6047 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6050 contains_aligned_value_p (tree type)
6052 enum machine_mode mode = TYPE_MODE (type);
6053 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6057 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6059 if (TYPE_ALIGN (type) < 128)
6062 if (AGGREGATE_TYPE_P (type))
6064 /* Walk the aggregates recursively. */
6065 switch (TREE_CODE (type))
6069 case QUAL_UNION_TYPE:
6073 /* Walk all the structure fields. */
6074 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6076 if (TREE_CODE (field) == FIELD_DECL
6077 && contains_aligned_value_p (TREE_TYPE (field)))
6084 /* Just for use if some languages passes arrays by value. */
6085 if (contains_aligned_value_p (TREE_TYPE (type)))
6096 /* Gives the alignment boundary, in bits, of an argument with the
6097 specified mode and type. */
6100 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6105 /* Since canonical type is used for call, we convert it to
6106 canonical type if needed. */
6107 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6108 type = TYPE_CANONICAL (type);
6109 align = TYPE_ALIGN (type);
6112 align = GET_MODE_ALIGNMENT (mode);
6113 if (align < PARM_BOUNDARY)
6114 align = PARM_BOUNDARY;
6115 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6116 natural boundaries. */
6117 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6119 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6120 make an exception for SSE modes since these require 128bit
6123 The handling here differs from field_alignment. ICC aligns MMX
6124 arguments to 4 byte boundaries, while structure fields are aligned
6125 to 8 byte boundaries. */
6128 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6129 align = PARM_BOUNDARY;
6133 if (!contains_aligned_value_p (type))
6134 align = PARM_BOUNDARY;
6137 if (align > BIGGEST_ALIGNMENT)
6138 align = BIGGEST_ALIGNMENT;
6142 /* Return true if N is a possible register number of function value. */
6145 ix86_function_value_regno_p (int regno)
6152 case FIRST_FLOAT_REG:
6153 /* TODO: The function should depend on current function ABI but
6154 builtins.c would need updating then. Therefore we use the
6156 if (TARGET_64BIT && ix86_abi == MS_ABI)
6158 return TARGET_FLOAT_RETURNS_IN_80387;
6164 if (TARGET_MACHO || TARGET_64BIT)
6172 /* Define how to find the value returned by a function.
6173 VALTYPE is the data type of the value (as a tree).
6174 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6175 otherwise, FUNC is 0. */
6178 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6179 const_tree fntype, const_tree fn)
6183 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6184 we normally prevent this case when mmx is not available. However
6185 some ABIs may require the result to be returned like DImode. */
6186 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6187 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6189 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6190 we prevent this case when sse is not available. However some ABIs
6191 may require the result to be returned like integer TImode. */
6192 else if (mode == TImode
6193 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6194 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6196 /* 32-byte vector modes in %ymm0. */
6197 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6198 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6200 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6201 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6202 regno = FIRST_FLOAT_REG;
6204 /* Most things go in %eax. */
6207 /* Override FP return register with %xmm0 for local functions when
6208 SSE math is enabled or for functions with sseregparm attribute. */
6209 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6211 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6212 if ((sse_level >= 1 && mode == SFmode)
6213 || (sse_level == 2 && mode == DFmode))
6214 regno = FIRST_SSE_REG;
6217 /* OImode shouldn't be used directly. */
6218 gcc_assert (mode != OImode);
6220 return gen_rtx_REG (orig_mode, regno);
6224 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6229 /* Handle libcalls, which don't provide a type node. */
6230 if (valtype == NULL)
6242 return gen_rtx_REG (mode, FIRST_SSE_REG);
6245 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6249 return gen_rtx_REG (mode, AX_REG);
6253 ret = construct_container (mode, orig_mode, valtype, 1,
6254 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6255 x86_64_int_return_registers, 0);
6257 /* For zero sized structures, construct_container returns NULL, but we
6258 need to keep rest of compiler happy by returning meaningful value. */
6260 ret = gen_rtx_REG (orig_mode, AX_REG);
6266 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6268 unsigned int regno = AX_REG;
6272 switch (GET_MODE_SIZE (mode))
6275 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6276 && !COMPLEX_MODE_P (mode))
6277 regno = FIRST_SSE_REG;
6281 if (mode == SFmode || mode == DFmode)
6282 regno = FIRST_SSE_REG;
6288 return gen_rtx_REG (orig_mode, regno);
6292 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6293 enum machine_mode orig_mode, enum machine_mode mode)
6295 const_tree fn, fntype;
6298 if (fntype_or_decl && DECL_P (fntype_or_decl))
6299 fn = fntype_or_decl;
6300 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6302 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6303 return function_value_ms_64 (orig_mode, mode);
6304 else if (TARGET_64BIT)
6305 return function_value_64 (orig_mode, mode, valtype);
6307 return function_value_32 (orig_mode, mode, fntype, fn);
6311 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6312 bool outgoing ATTRIBUTE_UNUSED)
6314 enum machine_mode mode, orig_mode;
6316 orig_mode = TYPE_MODE (valtype);
6317 mode = type_natural_mode (valtype, NULL);
6318 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6322 ix86_libcall_value (enum machine_mode mode)
6324 return ix86_function_value_1 (NULL, NULL, mode, mode);
6327 /* Return true iff type is returned in memory. */
6329 static int ATTRIBUTE_UNUSED
6330 return_in_memory_32 (const_tree type, enum machine_mode mode)
6334 if (mode == BLKmode)
6337 size = int_size_in_bytes (type);
6339 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6342 if (VECTOR_MODE_P (mode) || mode == TImode)
6344 /* User-created vectors small enough to fit in EAX. */
6348 /* MMX/3dNow values are returned in MM0,
6349 except when it doesn't exits. */
6351 return (TARGET_MMX ? 0 : 1);
6353 /* SSE values are returned in XMM0, except when it doesn't exist. */
6355 return (TARGET_SSE ? 0 : 1);
6357 /* AVX values are returned in YMM0, except when it doesn't exist. */
6359 return TARGET_AVX ? 0 : 1;
6368 /* OImode shouldn't be used directly. */
6369 gcc_assert (mode != OImode);
6374 static int ATTRIBUTE_UNUSED
6375 return_in_memory_64 (const_tree type, enum machine_mode mode)
6377 int needed_intregs, needed_sseregs;
6378 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6381 static int ATTRIBUTE_UNUSED
6382 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6384 HOST_WIDE_INT size = int_size_in_bytes (type);
6386 /* __m128 is returned in xmm0. */
6387 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6388 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6391 /* Otherwise, the size must be exactly in [1248]. */
6392 return (size != 1 && size != 2 && size != 4 && size != 8);
6396 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6398 #ifdef SUBTARGET_RETURN_IN_MEMORY
6399 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6401 const enum machine_mode mode = type_natural_mode (type, NULL);
6405 if (ix86_function_type_abi (fntype) == MS_ABI)
6406 return return_in_memory_ms_64 (type, mode);
6408 return return_in_memory_64 (type, mode);
6411 return return_in_memory_32 (type, mode);
6415 /* Return false iff TYPE is returned in memory. This version is used
6416 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6417 but differs notably in that when MMX is available, 8-byte vectors
6418 are returned in memory, rather than in MMX registers. */
6421 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6424 enum machine_mode mode = type_natural_mode (type, NULL);
6427 return return_in_memory_64 (type, mode);
6429 if (mode == BLKmode)
6432 size = int_size_in_bytes (type);
6434 if (VECTOR_MODE_P (mode))
6436 /* Return in memory only if MMX registers *are* available. This
6437 seems backwards, but it is consistent with the existing
6444 else if (mode == TImode)
6446 else if (mode == XFmode)
6452 /* When returning SSE vector types, we have a choice of either
6453 (1) being abi incompatible with a -march switch, or
6454 (2) generating an error.
6455 Given no good solution, I think the safest thing is one warning.
6456 The user won't be able to use -Werror, but....
6458 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6459 called in response to actually generating a caller or callee that
6460 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6461 via aggregate_value_p for general type probing from tree-ssa. */
6464 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6466 static bool warnedsse, warnedmmx;
6468 if (!TARGET_64BIT && type)
6470 /* Look at the return type of the function, not the function type. */
6471 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6473 if (!TARGET_SSE && !warnedsse)
6476 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6479 warning (0, "SSE vector return without SSE enabled "
6484 if (!TARGET_MMX && !warnedmmx)
6486 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6489 warning (0, "MMX vector return without MMX enabled "
6499 /* Create the va_list data type. */
6501 /* Returns the calling convention specific va_list date type.
6502 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6505 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6507 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6509 /* For i386 we use plain pointer to argument area. */
6510 if (!TARGET_64BIT || abi == MS_ABI)
6511 return build_pointer_type (char_type_node);
6513 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6514 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6516 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6517 unsigned_type_node);
6518 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6519 unsigned_type_node);
6520 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6522 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6525 va_list_gpr_counter_field = f_gpr;
6526 va_list_fpr_counter_field = f_fpr;
6528 DECL_FIELD_CONTEXT (f_gpr) = record;
6529 DECL_FIELD_CONTEXT (f_fpr) = record;
6530 DECL_FIELD_CONTEXT (f_ovf) = record;
6531 DECL_FIELD_CONTEXT (f_sav) = record;
6533 TREE_CHAIN (record) = type_decl;
6534 TYPE_NAME (record) = type_decl;
6535 TYPE_FIELDS (record) = f_gpr;
6536 TREE_CHAIN (f_gpr) = f_fpr;
6537 TREE_CHAIN (f_fpr) = f_ovf;
6538 TREE_CHAIN (f_ovf) = f_sav;
6540 layout_type (record);
6542 /* The correct type is an array type of one element. */
6543 return build_array_type (record, build_index_type (size_zero_node));
6546 /* Setup the builtin va_list data type and for 64-bit the additional
6547 calling convention specific va_list data types. */
6550 ix86_build_builtin_va_list (void)
6552 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6554 /* Initialize abi specific va_list builtin types. */
6558 if (ix86_abi == MS_ABI)
6560 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6561 if (TREE_CODE (t) != RECORD_TYPE)
6562 t = build_variant_type_copy (t);
6563 sysv_va_list_type_node = t;
6568 if (TREE_CODE (t) != RECORD_TYPE)
6569 t = build_variant_type_copy (t);
6570 sysv_va_list_type_node = t;
6572 if (ix86_abi != MS_ABI)
6574 t = ix86_build_builtin_va_list_abi (MS_ABI);
6575 if (TREE_CODE (t) != RECORD_TYPE)
6576 t = build_variant_type_copy (t);
6577 ms_va_list_type_node = t;
6582 if (TREE_CODE (t) != RECORD_TYPE)
6583 t = build_variant_type_copy (t);
6584 ms_va_list_type_node = t;
6591 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6594 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6603 int regparm = ix86_regparm;
6605 if (cum->call_abi != ix86_abi)
6606 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6608 /* GPR size of varargs save area. */
6609 if (cfun->va_list_gpr_size)
6610 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6612 ix86_varargs_gpr_size = 0;
6614 /* FPR size of varargs save area. We don't need it if we don't pass
6615 anything in SSE registers. */
6616 if (cum->sse_nregs && cfun->va_list_fpr_size)
6617 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6619 ix86_varargs_fpr_size = 0;
6621 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6624 save_area = frame_pointer_rtx;
6625 set = get_varargs_alias_set ();
6627 for (i = cum->regno;
6629 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6632 mem = gen_rtx_MEM (Pmode,
6633 plus_constant (save_area, i * UNITS_PER_WORD));
6634 MEM_NOTRAP_P (mem) = 1;
6635 set_mem_alias_set (mem, set);
6636 emit_move_insn (mem, gen_rtx_REG (Pmode,
6637 x86_64_int_parameter_registers[i]));
6640 if (ix86_varargs_fpr_size)
6642 /* Now emit code to save SSE registers. The AX parameter contains number
6643 of SSE parameter registers used to call this function. We use
6644 sse_prologue_save insn template that produces computed jump across
6645 SSE saves. We need some preparation work to get this working. */
6647 label = gen_label_rtx ();
6648 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6650 /* Compute address to jump to :
6651 label - eax*4 + nnamed_sse_arguments*4 Or
6652 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6653 tmp_reg = gen_reg_rtx (Pmode);
6654 nsse_reg = gen_reg_rtx (Pmode);
6655 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6656 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6657 gen_rtx_MULT (Pmode, nsse_reg,
6660 /* vmovaps is one byte longer than movaps. */
6662 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6663 gen_rtx_PLUS (Pmode, tmp_reg,
6669 gen_rtx_CONST (DImode,
6670 gen_rtx_PLUS (DImode,
6672 GEN_INT (cum->sse_regno
6673 * (TARGET_AVX ? 5 : 4)))));
6675 emit_move_insn (nsse_reg, label_ref);
6676 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6678 /* Compute address of memory block we save into. We always use pointer
6679 pointing 127 bytes after first byte to store - this is needed to keep
6680 instruction size limited by 4 bytes (5 bytes for AVX) with one
6681 byte displacement. */
6682 tmp_reg = gen_reg_rtx (Pmode);
6683 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6684 plus_constant (save_area,
6685 ix86_varargs_gpr_size + 127)));
6686 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6687 MEM_NOTRAP_P (mem) = 1;
6688 set_mem_alias_set (mem, set);
6689 set_mem_align (mem, BITS_PER_WORD);
6691 /* And finally do the dirty job! */
6692 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6693 GEN_INT (cum->sse_regno), label));
6698 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6700 alias_set_type set = get_varargs_alias_set ();
6703 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6707 mem = gen_rtx_MEM (Pmode,
6708 plus_constant (virtual_incoming_args_rtx,
6709 i * UNITS_PER_WORD));
6710 MEM_NOTRAP_P (mem) = 1;
6711 set_mem_alias_set (mem, set);
6713 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6714 emit_move_insn (mem, reg);
6719 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6720 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6723 CUMULATIVE_ARGS next_cum;
6726 /* This argument doesn't appear to be used anymore. Which is good,
6727 because the old code here didn't suppress rtl generation. */
6728 gcc_assert (!no_rtl);
6733 fntype = TREE_TYPE (current_function_decl);
6735 /* For varargs, we do not want to skip the dummy va_dcl argument.
6736 For stdargs, we do want to skip the last named argument. */
6738 if (stdarg_p (fntype))
6739 function_arg_advance (&next_cum, mode, type, 1);
6741 if (cum->call_abi == MS_ABI)
6742 setup_incoming_varargs_ms_64 (&next_cum);
6744 setup_incoming_varargs_64 (&next_cum);
6747 /* Checks if TYPE is of kind va_list char *. */
6750 is_va_list_char_pointer (tree type)
6754 /* For 32-bit it is always true. */
6757 canonic = ix86_canonical_va_list_type (type);
6758 return (canonic == ms_va_list_type_node
6759 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6762 /* Implement va_start. */
6765 ix86_va_start (tree valist, rtx nextarg)
6767 HOST_WIDE_INT words, n_gpr, n_fpr;
6768 tree f_gpr, f_fpr, f_ovf, f_sav;
6769 tree gpr, fpr, ovf, sav, t;
6772 /* Only 64bit target needs something special. */
6773 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6775 std_expand_builtin_va_start (valist, nextarg);
6779 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6780 f_fpr = TREE_CHAIN (f_gpr);
6781 f_ovf = TREE_CHAIN (f_fpr);
6782 f_sav = TREE_CHAIN (f_ovf);
6784 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6785 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6786 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6787 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6788 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6790 /* Count number of gp and fp argument registers used. */
6791 words = crtl->args.info.words;
6792 n_gpr = crtl->args.info.regno;
6793 n_fpr = crtl->args.info.sse_regno;
6795 if (cfun->va_list_gpr_size)
6797 type = TREE_TYPE (gpr);
6798 t = build2 (MODIFY_EXPR, type,
6799 gpr, build_int_cst (type, n_gpr * 8));
6800 TREE_SIDE_EFFECTS (t) = 1;
6801 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6804 if (TARGET_SSE && cfun->va_list_fpr_size)
6806 type = TREE_TYPE (fpr);
6807 t = build2 (MODIFY_EXPR, type, fpr,
6808 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6809 TREE_SIDE_EFFECTS (t) = 1;
6810 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6813 /* Find the overflow area. */
6814 type = TREE_TYPE (ovf);
6815 t = make_tree (type, crtl->args.internal_arg_pointer);
6817 t = build2 (POINTER_PLUS_EXPR, type, t,
6818 size_int (words * UNITS_PER_WORD));
6819 t = build2 (MODIFY_EXPR, type, ovf, t);
6820 TREE_SIDE_EFFECTS (t) = 1;
6821 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6823 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6825 /* Find the register save area.
6826 Prologue of the function save it right above stack frame. */
6827 type = TREE_TYPE (sav);
6828 t = make_tree (type, frame_pointer_rtx);
6829 if (!ix86_varargs_gpr_size)
6830 t = build2 (POINTER_PLUS_EXPR, type, t,
6831 size_int (-8 * X86_64_REGPARM_MAX));
6832 t = build2 (MODIFY_EXPR, type, sav, t);
6833 TREE_SIDE_EFFECTS (t) = 1;
6834 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6838 /* Implement va_arg. */
6841 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6844 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6845 tree f_gpr, f_fpr, f_ovf, f_sav;
6846 tree gpr, fpr, ovf, sav, t;
6848 tree lab_false, lab_over = NULL_TREE;
6853 enum machine_mode nat_mode;
6856 /* Only 64bit target needs something special. */
6857 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6858 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6860 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6861 f_fpr = TREE_CHAIN (f_gpr);
6862 f_ovf = TREE_CHAIN (f_fpr);
6863 f_sav = TREE_CHAIN (f_ovf);
6865 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6866 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6867 valist = build_va_arg_indirect_ref (valist);
6868 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6869 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6870 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6872 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6874 type = build_pointer_type (type);
6875 size = int_size_in_bytes (type);
6876 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6878 nat_mode = type_natural_mode (type, NULL);
6887 /* Unnamed 256bit vector mode parameters are passed on stack. */
6888 if (ix86_cfun_abi () == SYSV_ABI)
6895 container = construct_container (nat_mode, TYPE_MODE (type),
6896 type, 0, X86_64_REGPARM_MAX,
6897 X86_64_SSE_REGPARM_MAX, intreg,
6902 /* Pull the value out of the saved registers. */
6904 addr = create_tmp_var (ptr_type_node, "addr");
6905 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6909 int needed_intregs, needed_sseregs;
6911 tree int_addr, sse_addr;
6913 lab_false = create_artificial_label ();
6914 lab_over = create_artificial_label ();
6916 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6918 need_temp = (!REG_P (container)
6919 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6920 || TYPE_ALIGN (type) > 128));
6922 /* In case we are passing structure, verify that it is consecutive block
6923 on the register save area. If not we need to do moves. */
6924 if (!need_temp && !REG_P (container))
6926 /* Verify that all registers are strictly consecutive */
6927 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6931 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6933 rtx slot = XVECEXP (container, 0, i);
6934 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6935 || INTVAL (XEXP (slot, 1)) != i * 16)
6943 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6945 rtx slot = XVECEXP (container, 0, i);
6946 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6947 || INTVAL (XEXP (slot, 1)) != i * 8)
6959 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6960 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6961 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6962 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6965 /* First ensure that we fit completely in registers. */
6968 t = build_int_cst (TREE_TYPE (gpr),
6969 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6970 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6971 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6972 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6973 gimplify_and_add (t, pre_p);
6977 t = build_int_cst (TREE_TYPE (fpr),
6978 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6979 + X86_64_REGPARM_MAX * 8);
6980 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6981 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6982 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6983 gimplify_and_add (t, pre_p);
6986 /* Compute index to start of area used for integer regs. */
6989 /* int_addr = gpr + sav; */
6990 t = fold_convert (sizetype, gpr);
6991 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6992 gimplify_assign (int_addr, t, pre_p);
6996 /* sse_addr = fpr + sav; */
6997 t = fold_convert (sizetype, fpr);
6998 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6999 gimplify_assign (sse_addr, t, pre_p);
7004 tree temp = create_tmp_var (type, "va_arg_tmp");
7007 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7008 gimplify_assign (addr, t, pre_p);
7010 for (i = 0; i < XVECLEN (container, 0); i++)
7012 rtx slot = XVECEXP (container, 0, i);
7013 rtx reg = XEXP (slot, 0);
7014 enum machine_mode mode = GET_MODE (reg);
7015 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7016 tree addr_type = build_pointer_type (piece_type);
7017 tree daddr_type = build_pointer_type_for_mode (piece_type,
7021 tree dest_addr, dest;
7023 if (SSE_REGNO_P (REGNO (reg)))
7025 src_addr = sse_addr;
7026 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7030 src_addr = int_addr;
7031 src_offset = REGNO (reg) * 8;
7033 src_addr = fold_convert (addr_type, src_addr);
7034 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7035 size_int (src_offset));
7036 src = build_va_arg_indirect_ref (src_addr);
7038 dest_addr = fold_convert (daddr_type, addr);
7039 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7040 size_int (INTVAL (XEXP (slot, 1))));
7041 dest = build_va_arg_indirect_ref (dest_addr);
7043 gimplify_assign (dest, src, pre_p);
7049 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7050 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7051 gimplify_assign (gpr, t, pre_p);
7056 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7057 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7058 gimplify_assign (fpr, t, pre_p);
7061 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7063 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7066 /* ... otherwise out of the overflow area. */
7068 /* When we align parameter on stack for caller, if the parameter
7069 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7070 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7071 here with caller. */
7072 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7073 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7074 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7076 /* Care for on-stack alignment if needed. */
7077 if (arg_boundary <= 64
7078 || integer_zerop (TYPE_SIZE (type)))
7082 HOST_WIDE_INT align = arg_boundary / 8;
7083 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7084 size_int (align - 1));
7085 t = fold_convert (sizetype, t);
7086 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7088 t = fold_convert (TREE_TYPE (ovf), t);
7090 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7091 gimplify_assign (addr, t, pre_p);
7093 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7094 size_int (rsize * UNITS_PER_WORD));
7095 gimplify_assign (unshare_expr (ovf), t, pre_p);
7098 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7100 ptrtype = build_pointer_type (type);
7101 addr = fold_convert (ptrtype, addr);
7104 addr = build_va_arg_indirect_ref (addr);
7105 return build_va_arg_indirect_ref (addr);
7108 /* Return nonzero if OPNUM's MEM should be matched
7109 in movabs* patterns. */
7112 ix86_check_movabs (rtx insn, int opnum)
7116 set = PATTERN (insn);
7117 if (GET_CODE (set) == PARALLEL)
7118 set = XVECEXP (set, 0, 0);
7119 gcc_assert (GET_CODE (set) == SET);
7120 mem = XEXP (set, opnum);
7121 while (GET_CODE (mem) == SUBREG)
7122 mem = SUBREG_REG (mem);
7123 gcc_assert (MEM_P (mem));
7124 return (volatile_ok || !MEM_VOLATILE_P (mem));
7127 /* Initialize the table of extra 80387 mathematical constants. */
7130 init_ext_80387_constants (void)
7132 static const char * cst[5] =
7134 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7135 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7136 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7137 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7138 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7142 for (i = 0; i < 5; i++)
7144 real_from_string (&ext_80387_constants_table[i], cst[i]);
7145 /* Ensure each constant is rounded to XFmode precision. */
7146 real_convert (&ext_80387_constants_table[i],
7147 XFmode, &ext_80387_constants_table[i]);
7150 ext_80387_constants_init = 1;
7153 /* Return true if the constant is something that can be loaded with
7154 a special instruction. */
7157 standard_80387_constant_p (rtx x)
7159 enum machine_mode mode = GET_MODE (x);
7163 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7166 if (x == CONST0_RTX (mode))
7168 if (x == CONST1_RTX (mode))
7171 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7173 /* For XFmode constants, try to find a special 80387 instruction when
7174 optimizing for size or on those CPUs that benefit from them. */
7176 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7180 if (! ext_80387_constants_init)
7181 init_ext_80387_constants ();
7183 for (i = 0; i < 5; i++)
7184 if (real_identical (&r, &ext_80387_constants_table[i]))
7188 /* Load of the constant -0.0 or -1.0 will be split as
7189 fldz;fchs or fld1;fchs sequence. */
7190 if (real_isnegzero (&r))
7192 if (real_identical (&r, &dconstm1))
7198 /* Return the opcode of the special instruction to be used to load
7202 standard_80387_constant_opcode (rtx x)
7204 switch (standard_80387_constant_p (x))
7228 /* Return the CONST_DOUBLE representing the 80387 constant that is
7229 loaded by the specified special instruction. The argument IDX
7230 matches the return value from standard_80387_constant_p. */
7233 standard_80387_constant_rtx (int idx)
7237 if (! ext_80387_constants_init)
7238 init_ext_80387_constants ();
7254 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7258 /* Return 1 if mode is a valid mode for sse. */
7260 standard_sse_mode_p (enum machine_mode mode)
7277 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7278 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7279 modes and AVX is enabled. */
7282 standard_sse_constant_p (rtx x)
7284 enum machine_mode mode = GET_MODE (x);
7286 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7288 if (vector_all_ones_operand (x, mode))
7290 if (standard_sse_mode_p (mode))
7291 return TARGET_SSE2 ? 2 : -2;
7292 else if (VALID_AVX256_REG_MODE (mode))
7293 return TARGET_AVX ? 3 : -3;
7299 /* Return the opcode of the special instruction to be used to load
7303 standard_sse_constant_opcode (rtx insn, rtx x)
7305 switch (standard_sse_constant_p (x))
7308 switch (get_attr_mode (insn))
7311 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7313 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7315 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7317 return "vxorps\t%x0, %x0, %x0";
7319 return "vxorpd\t%x0, %x0, %x0";
7321 return "vpxor\t%x0, %x0, %x0";
7327 switch (get_attr_mode (insn))
7332 return "vpcmpeqd\t%0, %0, %0";
7338 return "pcmpeqd\t%0, %0";
7343 /* Returns 1 if OP contains a symbol reference */
7346 symbolic_reference_mentioned_p (rtx op)
7351 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7354 fmt = GET_RTX_FORMAT (GET_CODE (op));
7355 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7361 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7362 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7366 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7373 /* Return 1 if it is appropriate to emit `ret' instructions in the
7374 body of a function. Do this only if the epilogue is simple, needing a
7375 couple of insns. Prior to reloading, we can't tell how many registers
7376 must be saved, so return 0 then. Return 0 if there is no frame
7377 marker to de-allocate. */
7380 ix86_can_use_return_insn_p (void)
7382 struct ix86_frame frame;
7384 if (! reload_completed || frame_pointer_needed)
7387 /* Don't allow more than 32 pop, since that's all we can do
7388 with one instruction. */
7389 if (crtl->args.pops_args
7390 && crtl->args.size >= 32768)
7393 ix86_compute_frame_layout (&frame);
7394 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7397 /* Value should be nonzero if functions must have frame pointers.
7398 Zero means the frame pointer need not be set up (and parms may
7399 be accessed via the stack pointer) in functions that seem suitable. */
7402 ix86_frame_pointer_required (void)
7404 /* If we accessed previous frames, then the generated code expects
7405 to be able to access the saved ebp value in our frame. */
7406 if (cfun->machine->accesses_prev_frame)
7409 /* Several x86 os'es need a frame pointer for other reasons,
7410 usually pertaining to setjmp. */
7411 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7414 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7415 the frame pointer by default. Turn it back on now if we've not
7416 got a leaf function. */
7417 if (TARGET_OMIT_LEAF_FRAME_POINTER
7418 && (!current_function_is_leaf
7419 || ix86_current_function_calls_tls_descriptor))
7428 /* Record that the current function accesses previous call frames. */
7431 ix86_setup_frame_addresses (void)
7433 cfun->machine->accesses_prev_frame = 1;
7436 #ifndef USE_HIDDEN_LINKONCE
7437 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7438 # define USE_HIDDEN_LINKONCE 1
7440 # define USE_HIDDEN_LINKONCE 0
7444 static int pic_labels_used;
7446 /* Fills in the label name that should be used for a pc thunk for
7447 the given register. */
7450 get_pc_thunk_name (char name[32], unsigned int regno)
7452 gcc_assert (!TARGET_64BIT);
7454 if (USE_HIDDEN_LINKONCE)
7455 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7457 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7461 /* This function generates code for -fpic that loads %ebx with
7462 the return address of the caller and then returns. */
7465 ix86_file_end (void)
7470 for (regno = 0; regno < 8; ++regno)
7474 if (! ((pic_labels_used >> regno) & 1))
7477 get_pc_thunk_name (name, regno);
7482 switch_to_section (darwin_sections[text_coal_section]);
7483 fputs ("\t.weak_definition\t", asm_out_file);
7484 assemble_name (asm_out_file, name);
7485 fputs ("\n\t.private_extern\t", asm_out_file);
7486 assemble_name (asm_out_file, name);
7487 fputs ("\n", asm_out_file);
7488 ASM_OUTPUT_LABEL (asm_out_file, name);
7492 if (USE_HIDDEN_LINKONCE)
7496 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7498 TREE_PUBLIC (decl) = 1;
7499 TREE_STATIC (decl) = 1;
7500 DECL_ONE_ONLY (decl) = 1;
7502 (*targetm.asm_out.unique_section) (decl, 0);
7503 switch_to_section (get_named_section (decl, NULL, 0));
7505 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7506 fputs ("\t.hidden\t", asm_out_file);
7507 assemble_name (asm_out_file, name);
7508 fputc ('\n', asm_out_file);
7509 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7513 switch_to_section (text_section);
7514 ASM_OUTPUT_LABEL (asm_out_file, name);
7517 xops[0] = gen_rtx_REG (Pmode, regno);
7518 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7519 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7520 output_asm_insn ("ret", xops);
7523 if (NEED_INDICATE_EXEC_STACK)
7524 file_end_indicate_exec_stack ();
7527 /* Emit code for the SET_GOT patterns. */
7530 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7536 if (TARGET_VXWORKS_RTP && flag_pic)
7538 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7539 xops[2] = gen_rtx_MEM (Pmode,
7540 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7541 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7543 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7544 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7545 an unadorned address. */
7546 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7547 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7548 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7552 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7554 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7556 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7559 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7561 output_asm_insn ("call\t%a2", xops);
7564 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7565 is what will be referenced by the Mach-O PIC subsystem. */
7567 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7570 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7571 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7574 output_asm_insn ("pop%z0\t%0", xops);
7579 get_pc_thunk_name (name, REGNO (dest));
7580 pic_labels_used |= 1 << REGNO (dest);
7582 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7583 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7584 output_asm_insn ("call\t%X2", xops);
7585 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7586 is what will be referenced by the Mach-O PIC subsystem. */
7589 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7591 targetm.asm_out.internal_label (asm_out_file, "L",
7592 CODE_LABEL_NUMBER (label));
7599 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7600 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7602 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7607 /* Generate an "push" pattern for input ARG. */
7612 return gen_rtx_SET (VOIDmode,
7614 gen_rtx_PRE_DEC (Pmode,
7615 stack_pointer_rtx)),
7619 /* Return >= 0 if there is an unused call-clobbered register available
7620 for the entire function. */
7623 ix86_select_alt_pic_regnum (void)
7625 if (current_function_is_leaf && !crtl->profile
7626 && !ix86_current_function_calls_tls_descriptor)
7629 /* Can't use the same register for both PIC and DRAP. */
7631 drap = REGNO (crtl->drap_reg);
7634 for (i = 2; i >= 0; --i)
7635 if (i != drap && !df_regs_ever_live_p (i))
7639 return INVALID_REGNUM;
7642 /* Return 1 if we need to save REGNO. */
7644 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7646 if (pic_offset_table_rtx
7647 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7648 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7650 || crtl->calls_eh_return
7651 || crtl->uses_const_pool))
7653 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7658 if (crtl->calls_eh_return && maybe_eh_return)
7663 unsigned test = EH_RETURN_DATA_REGNO (i);
7664 if (test == INVALID_REGNUM)
7672 && regno == REGNO (crtl->drap_reg))
7675 return (df_regs_ever_live_p (regno)
7676 && !call_used_regs[regno]
7677 && !fixed_regs[regno]
7678 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7681 /* Return number of saved general prupose registers. */
7684 ix86_nsaved_regs (void)
7689 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7690 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7695 /* Return number of saved SSE registrers. */
7698 ix86_nsaved_sseregs (void)
7703 if (ix86_cfun_abi () != MS_ABI)
7705 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7706 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7711 /* Given FROM and TO register numbers, say whether this elimination is
7712 allowed. If stack alignment is needed, we can only replace argument
7713 pointer with hard frame pointer, or replace frame pointer with stack
7714 pointer. Otherwise, frame pointer elimination is automatically
7715 handled and all other eliminations are valid. */
7718 ix86_can_eliminate (int from, int to)
7720 if (stack_realign_fp)
7721 return ((from == ARG_POINTER_REGNUM
7722 && to == HARD_FRAME_POINTER_REGNUM)
7723 || (from == FRAME_POINTER_REGNUM
7724 && to == STACK_POINTER_REGNUM));
7726 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7729 /* Return the offset between two registers, one to be eliminated, and the other
7730 its replacement, at the start of a routine. */
7733 ix86_initial_elimination_offset (int from, int to)
7735 struct ix86_frame frame;
7736 ix86_compute_frame_layout (&frame);
7738 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7739 return frame.hard_frame_pointer_offset;
7740 else if (from == FRAME_POINTER_REGNUM
7741 && to == HARD_FRAME_POINTER_REGNUM)
7742 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7745 gcc_assert (to == STACK_POINTER_REGNUM);
7747 if (from == ARG_POINTER_REGNUM)
7748 return frame.stack_pointer_offset;
7750 gcc_assert (from == FRAME_POINTER_REGNUM);
7751 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7755 /* In a dynamically-aligned function, we can't know the offset from
7756 stack pointer to frame pointer, so we must ensure that setjmp
7757 eliminates fp against the hard fp (%ebp) rather than trying to
7758 index from %esp up to the top of the frame across a gap that is
7759 of unknown (at compile-time) size. */
7761 ix86_builtin_setjmp_frame_value (void)
7763 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7766 /* Fill structure ix86_frame about frame of currently computed function. */
7769 ix86_compute_frame_layout (struct ix86_frame *frame)
7771 HOST_WIDE_INT total_size;
7772 unsigned int stack_alignment_needed;
7773 HOST_WIDE_INT offset;
7774 unsigned int preferred_alignment;
7775 HOST_WIDE_INT size = get_frame_size ();
7777 frame->nregs = ix86_nsaved_regs ();
7778 frame->nsseregs = ix86_nsaved_sseregs ();
7781 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7782 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7784 /* MS ABI seem to require stack alignment to be always 16 except for function
7786 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7788 preferred_alignment = 16;
7789 stack_alignment_needed = 16;
7790 crtl->preferred_stack_boundary = 128;
7791 crtl->stack_alignment_needed = 128;
7794 gcc_assert (!size || stack_alignment_needed);
7795 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7796 gcc_assert (preferred_alignment <= stack_alignment_needed);
7798 /* During reload iteration the amount of registers saved can change.
7799 Recompute the value as needed. Do not recompute when amount of registers
7800 didn't change as reload does multiple calls to the function and does not
7801 expect the decision to change within single iteration. */
7802 if (!optimize_function_for_size_p (cfun)
7803 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7805 int count = frame->nregs;
7807 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7808 /* The fast prologue uses move instead of push to save registers. This
7809 is significantly longer, but also executes faster as modern hardware
7810 can execute the moves in parallel, but can't do that for push/pop.
7812 Be careful about choosing what prologue to emit: When function takes
7813 many instructions to execute we may use slow version as well as in
7814 case function is known to be outside hot spot (this is known with
7815 feedback only). Weight the size of function by number of registers
7816 to save as it is cheap to use one or two push instructions but very
7817 slow to use many of them. */
7819 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7820 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7821 || (flag_branch_probabilities
7822 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7823 cfun->machine->use_fast_prologue_epilogue = false;
7825 cfun->machine->use_fast_prologue_epilogue
7826 = !expensive_function_p (count);
7828 if (TARGET_PROLOGUE_USING_MOVE
7829 && cfun->machine->use_fast_prologue_epilogue)
7830 frame->save_regs_using_mov = true;
7832 frame->save_regs_using_mov = false;
7835 /* Skip return address and saved base pointer. */
7836 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7838 frame->hard_frame_pointer_offset = offset;
7840 /* Set offset to aligned because the realigned frame starts from
7842 if (stack_realign_fp)
7843 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7845 /* Register save area */
7846 offset += frame->nregs * UNITS_PER_WORD;
7848 /* Align SSE reg save area. */
7849 if (frame->nsseregs)
7850 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7852 frame->padding0 = 0;
7854 /* SSE register save area. */
7855 offset += frame->padding0 + frame->nsseregs * 16;
7858 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7859 offset += frame->va_arg_size;
7861 /* Align start of frame for local function. */
7862 frame->padding1 = ((offset + stack_alignment_needed - 1)
7863 & -stack_alignment_needed) - offset;
7865 offset += frame->padding1;
7867 /* Frame pointer points here. */
7868 frame->frame_pointer_offset = offset;
7872 /* Add outgoing arguments area. Can be skipped if we eliminated
7873 all the function calls as dead code.
7874 Skipping is however impossible when function calls alloca. Alloca
7875 expander assumes that last crtl->outgoing_args_size
7876 of stack frame are unused. */
7877 if (ACCUMULATE_OUTGOING_ARGS
7878 && (!current_function_is_leaf || cfun->calls_alloca
7879 || ix86_current_function_calls_tls_descriptor))
7881 offset += crtl->outgoing_args_size;
7882 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7885 frame->outgoing_arguments_size = 0;
7887 /* Align stack boundary. Only needed if we're calling another function
7889 if (!current_function_is_leaf || cfun->calls_alloca
7890 || ix86_current_function_calls_tls_descriptor)
7891 frame->padding2 = ((offset + preferred_alignment - 1)
7892 & -preferred_alignment) - offset;
7894 frame->padding2 = 0;
7896 offset += frame->padding2;
7898 /* We've reached end of stack frame. */
7899 frame->stack_pointer_offset = offset;
7901 /* Size prologue needs to allocate. */
7902 frame->to_allocate =
7903 (size + frame->padding1 + frame->padding2
7904 + frame->outgoing_arguments_size + frame->va_arg_size);
7906 if ((!frame->to_allocate && frame->nregs <= 1)
7907 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7908 frame->save_regs_using_mov = false;
7910 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7911 && current_function_is_leaf
7912 && !ix86_current_function_calls_tls_descriptor)
7914 frame->red_zone_size = frame->to_allocate;
7915 if (frame->save_regs_using_mov)
7916 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7917 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7918 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7921 frame->red_zone_size = 0;
7922 frame->to_allocate -= frame->red_zone_size;
7923 frame->stack_pointer_offset -= frame->red_zone_size;
7925 fprintf (stderr, "\n");
7926 fprintf (stderr, "size: %ld\n", (long)size);
7927 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7928 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7929 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7930 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7931 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7932 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7933 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7934 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7935 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7936 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7937 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7938 (long)frame->hard_frame_pointer_offset);
7939 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7940 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7941 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7942 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7946 /* Emit code to save registers in the prologue. */
7949 ix86_emit_save_regs (void)
7954 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7955 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7957 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7958 RTX_FRAME_RELATED_P (insn) = 1;
7962 /* Emit code to save registers using MOV insns. First register
7963 is restored from POINTER + OFFSET. */
7965 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7970 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7971 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7973 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7975 gen_rtx_REG (Pmode, regno));
7976 RTX_FRAME_RELATED_P (insn) = 1;
7977 offset += UNITS_PER_WORD;
7981 /* Emit code to save registers using MOV insns. First register
7982 is restored from POINTER + OFFSET. */
7984 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7990 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7991 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7993 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7994 set_mem_align (mem, 128);
7995 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7996 RTX_FRAME_RELATED_P (insn) = 1;
8001 /* Expand prologue or epilogue stack adjustment.
8002 The pattern exist to put a dependency on all ebp-based memory accesses.
8003 STYLE should be negative if instructions should be marked as frame related,
8004 zero if %r11 register is live and cannot be freely used and positive
8008 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
8013 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8014 else if (x86_64_immediate_operand (offset, DImode))
8015 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8019 /* r11 is used by indirect sibcall return as well, set before the
8020 epilogue and used after the epilogue. ATM indirect sibcall
8021 shouldn't be used together with huge frame sizes in one
8022 function because of the frame_size check in sibcall.c. */
8024 r11 = gen_rtx_REG (DImode, R11_REG);
8025 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8027 RTX_FRAME_RELATED_P (insn) = 1;
8028 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8032 RTX_FRAME_RELATED_P (insn) = 1;
8035 /* Find an available register to be used as dynamic realign argument
8036 pointer regsiter. Such a register will be written in prologue and
8037 used in begin of body, so it must not be
8038 1. parameter passing register.
8040 We reuse static-chain register if it is available. Otherwise, we
8041 use DI for i386 and R13 for x86-64. We chose R13 since it has
8044 Return: the regno of chosen register. */
8047 find_drap_reg (void)
8049 tree decl = cfun->decl;
8053 /* Use R13 for nested function or function need static chain.
8054 Since function with tail call may use any caller-saved
8055 registers in epilogue, DRAP must not use caller-saved
8056 register in such case. */
8057 if ((decl_function_context (decl)
8058 && !DECL_NO_STATIC_CHAIN (decl))
8059 || crtl->tail_call_emit)
8066 /* Use DI for nested function or function need static chain.
8067 Since function with tail call may use any caller-saved
8068 registers in epilogue, DRAP must not use caller-saved
8069 register in such case. */
8070 if ((decl_function_context (decl)
8071 && !DECL_NO_STATIC_CHAIN (decl))
8072 || crtl->tail_call_emit)
8075 /* Reuse static chain register if it isn't used for parameter
8077 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8078 && !lookup_attribute ("fastcall",
8079 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8086 /* Update incoming stack boundary and estimated stack alignment. */
8089 ix86_update_stack_boundary (void)
8091 /* Prefer the one specified at command line. */
8092 ix86_incoming_stack_boundary
8093 = (ix86_user_incoming_stack_boundary
8094 ? ix86_user_incoming_stack_boundary
8095 : ix86_default_incoming_stack_boundary);
8097 /* Incoming stack alignment can be changed on individual functions
8098 via force_align_arg_pointer attribute. We use the smallest
8099 incoming stack boundary. */
8100 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8101 && lookup_attribute (ix86_force_align_arg_pointer_string,
8102 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8103 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8105 /* The incoming stack frame has to be aligned at least at
8106 parm_stack_boundary. */
8107 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8108 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8110 /* Stack at entrance of main is aligned by runtime. We use the
8111 smallest incoming stack boundary. */
8112 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8113 && DECL_NAME (current_function_decl)
8114 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8115 && DECL_FILE_SCOPE_P (current_function_decl))
8116 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8118 /* x86_64 vararg needs 16byte stack alignment for register save
8122 && crtl->stack_alignment_estimated < 128)
8123 crtl->stack_alignment_estimated = 128;
8126 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8127 needed or an rtx for DRAP otherwise. */
8130 ix86_get_drap_rtx (void)
8132 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8133 crtl->need_drap = true;
8135 if (stack_realign_drap)
8137 /* Assign DRAP to vDRAP and returns vDRAP */
8138 unsigned int regno = find_drap_reg ();
8143 arg_ptr = gen_rtx_REG (Pmode, regno);
8144 crtl->drap_reg = arg_ptr;
8147 drap_vreg = copy_to_reg (arg_ptr);
8151 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8152 RTX_FRAME_RELATED_P (insn) = 1;
8159 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8162 ix86_internal_arg_pointer (void)
8164 return virtual_incoming_args_rtx;
8167 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8168 This is called from dwarf2out.c to emit call frame instructions
8169 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8171 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8173 rtx unspec = SET_SRC (pattern);
8174 gcc_assert (GET_CODE (unspec) == UNSPEC);
8178 case UNSPEC_REG_SAVE:
8179 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8180 SET_DEST (pattern));
8182 case UNSPEC_DEF_CFA:
8183 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8184 INTVAL (XVECEXP (unspec, 0, 0)));
8191 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8192 to be generated in correct form. */
8194 ix86_finalize_stack_realign_flags (void)
8196 /* Check if stack realign is really needed after reload, and
8197 stores result in cfun */
8198 unsigned int incoming_stack_boundary
8199 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8200 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8201 unsigned int stack_realign = (incoming_stack_boundary
8202 < (current_function_is_leaf
8203 ? crtl->max_used_stack_slot_alignment
8204 : crtl->stack_alignment_needed));
8206 if (crtl->stack_realign_finalized)
8208 /* After stack_realign_needed is finalized, we can't no longer
8210 gcc_assert (crtl->stack_realign_needed == stack_realign);
8214 crtl->stack_realign_needed = stack_realign;
8215 crtl->stack_realign_finalized = true;
8219 /* Expand the prologue into a bunch of separate insns. */
8222 ix86_expand_prologue (void)
8226 struct ix86_frame frame;
8227 HOST_WIDE_INT allocate;
8229 ix86_finalize_stack_realign_flags ();
8231 /* DRAP should not coexist with stack_realign_fp */
8232 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8234 ix86_compute_frame_layout (&frame);
8236 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8237 of DRAP is needed and stack realignment is really needed after reload */
8238 if (crtl->drap_reg && crtl->stack_realign_needed)
8241 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8242 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8243 ? 0 : UNITS_PER_WORD);
8245 gcc_assert (stack_realign_drap);
8247 /* Grab the argument pointer. */
8248 x = plus_constant (stack_pointer_rtx,
8249 (UNITS_PER_WORD + param_ptr_offset));
8252 /* Only need to push parameter pointer reg if it is caller
8254 if (!call_used_regs[REGNO (crtl->drap_reg)])
8256 /* Push arg pointer reg */
8257 insn = emit_insn (gen_push (y));
8258 RTX_FRAME_RELATED_P (insn) = 1;
8261 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8262 RTX_FRAME_RELATED_P (insn) = 1;
8264 /* Align the stack. */
8265 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8267 GEN_INT (-align_bytes)));
8268 RTX_FRAME_RELATED_P (insn) = 1;
8270 /* Replicate the return address on the stack so that return
8271 address can be reached via (argp - 1) slot. This is needed
8272 to implement macro RETURN_ADDR_RTX and intrinsic function
8273 expand_builtin_return_addr etc. */
8275 x = gen_frame_mem (Pmode,
8276 plus_constant (x, -UNITS_PER_WORD));
8277 insn = emit_insn (gen_push (x));
8278 RTX_FRAME_RELATED_P (insn) = 1;
8281 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8282 slower on all targets. Also sdb doesn't like it. */
8284 if (frame_pointer_needed)
8286 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8287 RTX_FRAME_RELATED_P (insn) = 1;
8289 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8290 RTX_FRAME_RELATED_P (insn) = 1;
8293 if (stack_realign_fp)
8295 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8296 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8298 /* Align the stack. */
8299 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8301 GEN_INT (-align_bytes)));
8302 RTX_FRAME_RELATED_P (insn) = 1;
8305 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8307 if (!frame.save_regs_using_mov)
8308 ix86_emit_save_regs ();
8310 allocate += frame.nregs * UNITS_PER_WORD;
8312 /* When using red zone we may start register saving before allocating
8313 the stack frame saving one cycle of the prologue. However I will
8314 avoid doing this if I am going to have to probe the stack since
8315 at least on x86_64 the stack probe can turn into a call that clobbers
8316 a red zone location */
8317 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8318 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8319 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8320 && !crtl->stack_realign_needed)
8321 ? hard_frame_pointer_rtx
8322 : stack_pointer_rtx,
8323 -frame.nregs * UNITS_PER_WORD);
8327 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8328 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8329 GEN_INT (-allocate), -1);
8332 /* Only valid for Win32. */
8333 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8337 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8339 if (cfun->machine->call_abi == MS_ABI)
8342 eax_live = ix86_eax_live_at_start_p ();
8346 emit_insn (gen_push (eax));
8347 allocate -= UNITS_PER_WORD;
8350 emit_move_insn (eax, GEN_INT (allocate));
8353 insn = gen_allocate_stack_worker_64 (eax, eax);
8355 insn = gen_allocate_stack_worker_32 (eax, eax);
8356 insn = emit_insn (insn);
8357 RTX_FRAME_RELATED_P (insn) = 1;
8358 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8359 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8360 add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
8364 if (frame_pointer_needed)
8365 t = plus_constant (hard_frame_pointer_rtx,
8368 - frame.nregs * UNITS_PER_WORD);
8370 t = plus_constant (stack_pointer_rtx, allocate);
8371 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8375 if (frame.save_regs_using_mov
8376 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8377 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8379 if (!frame_pointer_needed
8380 || !frame.to_allocate
8381 || crtl->stack_realign_needed)
8382 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8384 + frame.nsseregs * 16 + frame.padding0);
8386 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8387 -frame.nregs * UNITS_PER_WORD);
8389 if (!frame_pointer_needed
8390 || !frame.to_allocate
8391 || crtl->stack_realign_needed)
8392 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8395 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8396 - frame.nregs * UNITS_PER_WORD
8397 - frame.nsseregs * 16
8400 pic_reg_used = false;
8401 if (pic_offset_table_rtx
8402 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8405 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8407 if (alt_pic_reg_used != INVALID_REGNUM)
8408 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8410 pic_reg_used = true;
8417 if (ix86_cmodel == CM_LARGE_PIC)
8419 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8420 rtx label = gen_label_rtx ();
8422 LABEL_PRESERVE_P (label) = 1;
8423 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8424 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8425 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8426 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8427 pic_offset_table_rtx, tmp_reg));
8430 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8433 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8436 /* In the pic_reg_used case, make sure that the got load isn't deleted
8437 when mcount needs it. Blockage to avoid call movement across mcount
8438 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8440 if (crtl->profile && pic_reg_used)
8441 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8443 if (crtl->drap_reg && !crtl->stack_realign_needed)
8445 /* vDRAP is setup but after reload it turns out stack realign
8446 isn't necessary, here we will emit prologue to setup DRAP
8447 without stack realign adjustment */
8448 int drap_bp_offset = UNITS_PER_WORD * 2;
8449 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8450 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8453 /* Prevent instructions from being scheduled into register save push
8454 sequence when access to the redzone area is done through frame pointer.
8455 The offset betweeh the frame pointer and the stack pointer is calculated
8456 relative to the value of the stack pointer at the end of the function
8457 prologue, and moving instructions that access redzone area via frame
8458 pointer inside push sequence violates this assumption. */
8459 if (frame_pointer_needed && frame.red_zone_size)
8460 emit_insn (gen_memory_blockage ());
8462 /* Emit cld instruction if stringops are used in the function. */
8463 if (TARGET_CLD && ix86_current_function_needs_cld)
8464 emit_insn (gen_cld ());
8467 /* Emit code to restore saved registers using MOV insns. First register
8468 is restored from POINTER + OFFSET. */
8470 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8471 int maybe_eh_return)
8474 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8476 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8477 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8479 /* Ensure that adjust_address won't be forced to produce pointer
8480 out of range allowed by x86-64 instruction set. */
8481 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8485 r11 = gen_rtx_REG (DImode, R11_REG);
8486 emit_move_insn (r11, GEN_INT (offset));
8487 emit_insn (gen_adddi3 (r11, r11, pointer));
8488 base_address = gen_rtx_MEM (Pmode, r11);
8491 emit_move_insn (gen_rtx_REG (Pmode, regno),
8492 adjust_address (base_address, Pmode, offset));
8493 offset += UNITS_PER_WORD;
8497 /* Emit code to restore saved registers using MOV insns. First register
8498 is restored from POINTER + OFFSET. */
8500 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8501 int maybe_eh_return)
8504 rtx base_address = gen_rtx_MEM (TImode, pointer);
8507 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8508 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8510 /* Ensure that adjust_address won't be forced to produce pointer
8511 out of range allowed by x86-64 instruction set. */
8512 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8516 r11 = gen_rtx_REG (DImode, R11_REG);
8517 emit_move_insn (r11, GEN_INT (offset));
8518 emit_insn (gen_adddi3 (r11, r11, pointer));
8519 base_address = gen_rtx_MEM (TImode, r11);
8522 mem = adjust_address (base_address, TImode, offset);
8523 set_mem_align (mem, 128);
8524 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8529 /* Restore function stack, frame, and registers. */
8532 ix86_expand_epilogue (int style)
8536 struct ix86_frame frame;
8537 HOST_WIDE_INT offset;
8539 ix86_finalize_stack_realign_flags ();
8541 /* When stack is realigned, SP must be valid. */
8542 sp_valid = (!frame_pointer_needed
8543 || current_function_sp_is_unchanging
8544 || stack_realign_fp);
8546 ix86_compute_frame_layout (&frame);
8548 /* See the comment about red zone and frame
8549 pointer usage in ix86_expand_prologue. */
8550 if (frame_pointer_needed && frame.red_zone_size)
8551 emit_insn (gen_memory_blockage ());
8553 /* Calculate start of saved registers relative to ebp. Special care
8554 must be taken for the normal return case of a function using
8555 eh_return: the eax and edx registers are marked as saved, but not
8556 restored along this path. */
8557 offset = frame.nregs;
8558 if (crtl->calls_eh_return && style != 2)
8560 offset *= -UNITS_PER_WORD;
8561 offset -= frame.nsseregs * 16 + frame.padding0;
8563 /* If we're only restoring one register and sp is not valid then
8564 using a move instruction to restore the register since it's
8565 less work than reloading sp and popping the register.
8567 The default code result in stack adjustment using add/lea instruction,
8568 while this code results in LEAVE instruction (or discrete equivalent),
8569 so it is profitable in some other cases as well. Especially when there
8570 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8571 and there is exactly one register to pop. This heuristic may need some
8572 tuning in future. */
8573 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8574 || (TARGET_EPILOGUE_USING_MOVE
8575 && cfun->machine->use_fast_prologue_epilogue
8576 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8577 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8578 || (frame_pointer_needed && TARGET_USE_LEAVE
8579 && cfun->machine->use_fast_prologue_epilogue
8580 && (frame.nregs + frame.nsseregs) == 1)
8581 || crtl->calls_eh_return)
8583 /* Restore registers. We can use ebp or esp to address the memory
8584 locations. If both are available, default to ebp, since offsets
8585 are known to be small. Only exception is esp pointing directly
8586 to the end of block of saved registers, where we may simplify
8589 If we are realigning stack with bp and sp, regs restore can't
8590 be addressed by bp. sp must be used instead. */
8592 if (!frame_pointer_needed
8593 || (sp_valid && !frame.to_allocate)
8594 || stack_realign_fp)
8596 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8597 frame.to_allocate, style == 2);
8598 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8600 + frame.nsseregs * 16
8601 + frame.padding0, style == 2);
8605 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8606 offset, style == 2);
8607 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8609 + frame.nsseregs * 16
8610 + frame.padding0, style == 2);
8613 /* eh_return epilogues need %ecx added to the stack pointer. */
8616 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8618 /* Stack align doesn't work with eh_return. */
8619 gcc_assert (!crtl->stack_realign_needed);
8621 if (frame_pointer_needed)
8623 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8624 tmp = plus_constant (tmp, UNITS_PER_WORD);
8625 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8627 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8628 emit_move_insn (hard_frame_pointer_rtx, tmp);
8630 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8635 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8636 tmp = plus_constant (tmp, (frame.to_allocate
8637 + frame.nregs * UNITS_PER_WORD
8638 + frame.nsseregs * 16
8640 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8643 else if (!frame_pointer_needed)
8644 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8645 GEN_INT (frame.to_allocate
8646 + frame.nregs * UNITS_PER_WORD
8647 + frame.nsseregs * 16
8650 /* If not an i386, mov & pop is faster than "leave". */
8651 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8652 || !cfun->machine->use_fast_prologue_epilogue)
8653 emit_insn ((*ix86_gen_leave) ());
8656 pro_epilogue_adjust_stack (stack_pointer_rtx,
8657 hard_frame_pointer_rtx,
8660 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8665 /* First step is to deallocate the stack frame so that we can
8668 If we realign stack with frame pointer, then stack pointer
8669 won't be able to recover via lea $offset(%bp), %sp, because
8670 there is a padding area between bp and sp for realign.
8671 "add $to_allocate, %sp" must be used instead. */
8674 gcc_assert (frame_pointer_needed);
8675 gcc_assert (!stack_realign_fp);
8676 pro_epilogue_adjust_stack (stack_pointer_rtx,
8677 hard_frame_pointer_rtx,
8678 GEN_INT (offset), style);
8679 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8680 frame.to_allocate, style == 2);
8681 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8682 GEN_INT (frame.nsseregs * 16), style);
8684 else if (frame.to_allocate || frame.nsseregs)
8686 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8689 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8690 GEN_INT (frame.to_allocate
8691 + frame.nsseregs * 16
8692 + frame.padding0), style);
8695 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8696 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8697 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8698 if (frame_pointer_needed)
8700 /* Leave results in shorter dependency chains on CPUs that are
8701 able to grok it fast. */
8702 if (TARGET_USE_LEAVE)
8703 emit_insn ((*ix86_gen_leave) ());
8706 /* For stack realigned really happens, recover stack
8707 pointer to hard frame pointer is a must, if not using
8709 if (stack_realign_fp)
8710 pro_epilogue_adjust_stack (stack_pointer_rtx,
8711 hard_frame_pointer_rtx,
8713 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8718 if (crtl->drap_reg && crtl->stack_realign_needed)
8720 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8721 ? 0 : UNITS_PER_WORD);
8722 gcc_assert (stack_realign_drap);
8723 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8725 GEN_INT (-(UNITS_PER_WORD
8726 + param_ptr_offset))));
8727 if (!call_used_regs[REGNO (crtl->drap_reg)])
8728 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8732 /* Sibcall epilogues don't want a return instruction. */
8736 if (crtl->args.pops_args && crtl->args.size)
8738 rtx popc = GEN_INT (crtl->args.pops_args);
8740 /* i386 can only pop 64K bytes. If asked to pop more, pop
8741 return address, do explicit add, and jump indirectly to the
8744 if (crtl->args.pops_args >= 65536)
8746 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8748 /* There is no "pascal" calling convention in any 64bit ABI. */
8749 gcc_assert (!TARGET_64BIT);
8751 emit_insn (gen_popsi1 (ecx));
8752 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8753 emit_jump_insn (gen_return_indirect_internal (ecx));
8756 emit_jump_insn (gen_return_pop_internal (popc));
8759 emit_jump_insn (gen_return_internal ());
8762 /* Reset from the function's potential modifications. */
8765 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8766 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8768 if (pic_offset_table_rtx)
8769 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8771 /* Mach-O doesn't support labels at the end of objects, so if
8772 it looks like we might want one, insert a NOP. */
8774 rtx insn = get_last_insn ();
8777 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8778 insn = PREV_INSN (insn);
8782 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8783 fputs ("\tnop\n", file);
8789 /* Extract the parts of an RTL expression that is a valid memory address
8790 for an instruction. Return 0 if the structure of the address is
8791 grossly off. Return -1 if the address contains ASHIFT, so it is not
8792 strictly valid, but still used for computing length of lea instruction. */
8795 ix86_decompose_address (rtx addr, struct ix86_address *out)
8797 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8798 rtx base_reg, index_reg;
8799 HOST_WIDE_INT scale = 1;
8800 rtx scale_rtx = NULL_RTX;
8802 enum ix86_address_seg seg = SEG_DEFAULT;
8804 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8806 else if (GET_CODE (addr) == PLUS)
8816 addends[n++] = XEXP (op, 1);
8819 while (GET_CODE (op) == PLUS);
8824 for (i = n; i >= 0; --i)
8827 switch (GET_CODE (op))
8832 index = XEXP (op, 0);
8833 scale_rtx = XEXP (op, 1);
8837 if (XINT (op, 1) == UNSPEC_TP
8838 && TARGET_TLS_DIRECT_SEG_REFS
8839 && seg == SEG_DEFAULT)
8840 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8869 else if (GET_CODE (addr) == MULT)
8871 index = XEXP (addr, 0); /* index*scale */
8872 scale_rtx = XEXP (addr, 1);
8874 else if (GET_CODE (addr) == ASHIFT)
8878 /* We're called for lea too, which implements ashift on occasion. */
8879 index = XEXP (addr, 0);
8880 tmp = XEXP (addr, 1);
8881 if (!CONST_INT_P (tmp))
8883 scale = INTVAL (tmp);
8884 if ((unsigned HOST_WIDE_INT) scale > 3)
8890 disp = addr; /* displacement */
8892 /* Extract the integral value of scale. */
8895 if (!CONST_INT_P (scale_rtx))
8897 scale = INTVAL (scale_rtx);
8900 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8901 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8903 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8904 if (base_reg && index_reg && scale == 1
8905 && (index_reg == arg_pointer_rtx
8906 || index_reg == frame_pointer_rtx
8907 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8910 tmp = base, base = index, index = tmp;
8911 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8914 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8915 if ((base_reg == hard_frame_pointer_rtx
8916 || base_reg == frame_pointer_rtx
8917 || base_reg == arg_pointer_rtx) && !disp)
8920 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8921 Avoid this by transforming to [%esi+0].
8922 Reload calls address legitimization without cfun defined, so we need
8923 to test cfun for being non-NULL. */
8924 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8925 && base_reg && !index_reg && !disp
8927 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8930 /* Special case: encode reg+reg instead of reg*2. */
8931 if (!base && index && scale && scale == 2)
8932 base = index, base_reg = index_reg, scale = 1;
8934 /* Special case: scaling cannot be encoded without base or displacement. */
8935 if (!base && !disp && index && scale != 1)
8947 /* Return cost of the memory address x.
8948 For i386, it is better to use a complex address than let gcc copy
8949 the address into a reg and make a new pseudo. But not if the address
8950 requires to two regs - that would mean more pseudos with longer
8953 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8955 struct ix86_address parts;
8957 int ok = ix86_decompose_address (x, &parts);
8961 if (parts.base && GET_CODE (parts.base) == SUBREG)
8962 parts.base = SUBREG_REG (parts.base);
8963 if (parts.index && GET_CODE (parts.index) == SUBREG)
8964 parts.index = SUBREG_REG (parts.index);
8966 /* Attempt to minimize number of registers in the address. */
8968 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8970 && (!REG_P (parts.index)
8971 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8975 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8977 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8978 && parts.base != parts.index)
8981 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8982 since it's predecode logic can't detect the length of instructions
8983 and it degenerates to vector decoded. Increase cost of such
8984 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8985 to split such addresses or even refuse such addresses at all.
8987 Following addressing modes are affected:
8992 The first and last case may be avoidable by explicitly coding the zero in
8993 memory address, but I don't have AMD-K6 machine handy to check this
8997 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8998 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8999 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9005 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9006 this is used for to form addresses to local data when -fPIC is in
9010 darwin_local_data_pic (rtx disp)
9012 return (GET_CODE (disp) == UNSPEC
9013 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9016 /* Determine if a given RTX is a valid constant. We already know this
9017 satisfies CONSTANT_P. */
9020 legitimate_constant_p (rtx x)
9022 switch (GET_CODE (x))
9027 if (GET_CODE (x) == PLUS)
9029 if (!CONST_INT_P (XEXP (x, 1)))
9034 if (TARGET_MACHO && darwin_local_data_pic (x))
9037 /* Only some unspecs are valid as "constants". */
9038 if (GET_CODE (x) == UNSPEC)
9039 switch (XINT (x, 1))
9044 return TARGET_64BIT;
9047 x = XVECEXP (x, 0, 0);
9048 return (GET_CODE (x) == SYMBOL_REF
9049 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9051 x = XVECEXP (x, 0, 0);
9052 return (GET_CODE (x) == SYMBOL_REF
9053 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9058 /* We must have drilled down to a symbol. */
9059 if (GET_CODE (x) == LABEL_REF)
9061 if (GET_CODE (x) != SYMBOL_REF)
9066 /* TLS symbols are never valid. */
9067 if (SYMBOL_REF_TLS_MODEL (x))
9070 /* DLLIMPORT symbols are never valid. */
9071 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9072 && SYMBOL_REF_DLLIMPORT_P (x))
9077 if (GET_MODE (x) == TImode
9078 && x != CONST0_RTX (TImode)
9084 if (!standard_sse_constant_p (x))
9091 /* Otherwise we handle everything else in the move patterns. */
9095 /* Determine if it's legal to put X into the constant pool. This
9096 is not possible for the address of thread-local symbols, which
9097 is checked above. */
9100 ix86_cannot_force_const_mem (rtx x)
9102 /* We can always put integral constants and vectors in memory. */
9103 switch (GET_CODE (x))
9113 return !legitimate_constant_p (x);
9117 /* Nonzero if the constant value X is a legitimate general operand
9118 when generating PIC code. It is given that flag_pic is on and
9119 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9122 legitimate_pic_operand_p (rtx x)
9126 switch (GET_CODE (x))
9129 inner = XEXP (x, 0);
9130 if (GET_CODE (inner) == PLUS
9131 && CONST_INT_P (XEXP (inner, 1)))
9132 inner = XEXP (inner, 0);
9134 /* Only some unspecs are valid as "constants". */
9135 if (GET_CODE (inner) == UNSPEC)
9136 switch (XINT (inner, 1))
9141 return TARGET_64BIT;
9143 x = XVECEXP (inner, 0, 0);
9144 return (GET_CODE (x) == SYMBOL_REF
9145 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9146 case UNSPEC_MACHOPIC_OFFSET:
9147 return legitimate_pic_address_disp_p (x);
9155 return legitimate_pic_address_disp_p (x);
9162 /* Determine if a given CONST RTX is a valid memory displacement
9166 legitimate_pic_address_disp_p (rtx disp)
9170 /* In 64bit mode we can allow direct addresses of symbols and labels
9171 when they are not dynamic symbols. */
9174 rtx op0 = disp, op1;
9176 switch (GET_CODE (disp))
9182 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9184 op0 = XEXP (XEXP (disp, 0), 0);
9185 op1 = XEXP (XEXP (disp, 0), 1);
9186 if (!CONST_INT_P (op1)
9187 || INTVAL (op1) >= 16*1024*1024
9188 || INTVAL (op1) < -16*1024*1024)
9190 if (GET_CODE (op0) == LABEL_REF)
9192 if (GET_CODE (op0) != SYMBOL_REF)
9197 /* TLS references should always be enclosed in UNSPEC. */
9198 if (SYMBOL_REF_TLS_MODEL (op0))
9200 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9201 && ix86_cmodel != CM_LARGE_PIC)
9209 if (GET_CODE (disp) != CONST)
9211 disp = XEXP (disp, 0);
9215 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9216 of GOT tables. We should not need these anyway. */
9217 if (GET_CODE (disp) != UNSPEC
9218 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9219 && XINT (disp, 1) != UNSPEC_GOTOFF
9220 && XINT (disp, 1) != UNSPEC_PLTOFF))
9223 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9224 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9230 if (GET_CODE (disp) == PLUS)
9232 if (!CONST_INT_P (XEXP (disp, 1)))
9234 disp = XEXP (disp, 0);
9238 if (TARGET_MACHO && darwin_local_data_pic (disp))
9241 if (GET_CODE (disp) != UNSPEC)
9244 switch (XINT (disp, 1))
9249 /* We need to check for both symbols and labels because VxWorks loads
9250 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9252 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9253 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9255 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9256 While ABI specify also 32bit relocation but we don't produce it in
9257 small PIC model at all. */
9258 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9259 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9261 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9263 case UNSPEC_GOTTPOFF:
9264 case UNSPEC_GOTNTPOFF:
9265 case UNSPEC_INDNTPOFF:
9268 disp = XVECEXP (disp, 0, 0);
9269 return (GET_CODE (disp) == SYMBOL_REF
9270 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9272 disp = XVECEXP (disp, 0, 0);
9273 return (GET_CODE (disp) == SYMBOL_REF
9274 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9276 disp = XVECEXP (disp, 0, 0);
9277 return (GET_CODE (disp) == SYMBOL_REF
9278 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9284 /* Recognizes RTL expressions that are valid memory addresses for an
9285 instruction. The MODE argument is the machine mode for the MEM
9286 expression that wants to use this address.
9288 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9289 convert common non-canonical forms to canonical form so that they will
9293 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9294 rtx addr, bool strict)
9296 struct ix86_address parts;
9297 rtx base, index, disp;
9298 HOST_WIDE_INT scale;
9299 const char *reason = NULL;
9300 rtx reason_rtx = NULL_RTX;
9302 if (ix86_decompose_address (addr, &parts) <= 0)
9304 reason = "decomposition failed";
9309 index = parts.index;
9311 scale = parts.scale;
9313 /* Validate base register.
9315 Don't allow SUBREG's that span more than a word here. It can lead to spill
9316 failures when the base is one word out of a two word structure, which is
9317 represented internally as a DImode int. */
9326 else if (GET_CODE (base) == SUBREG
9327 && REG_P (SUBREG_REG (base))
9328 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9330 reg = SUBREG_REG (base);
9333 reason = "base is not a register";
9337 if (GET_MODE (base) != Pmode)
9339 reason = "base is not in Pmode";
9343 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9344 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9346 reason = "base is not valid";
9351 /* Validate index register.
9353 Don't allow SUBREG's that span more than a word here -- same as above. */
9362 else if (GET_CODE (index) == SUBREG
9363 && REG_P (SUBREG_REG (index))
9364 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9366 reg = SUBREG_REG (index);
9369 reason = "index is not a register";
9373 if (GET_MODE (index) != Pmode)
9375 reason = "index is not in Pmode";
9379 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9380 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9382 reason = "index is not valid";
9387 /* Validate scale factor. */
9390 reason_rtx = GEN_INT (scale);
9393 reason = "scale without index";
9397 if (scale != 2 && scale != 4 && scale != 8)
9399 reason = "scale is not a valid multiplier";
9404 /* Validate displacement. */
9409 if (GET_CODE (disp) == CONST
9410 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9411 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9412 switch (XINT (XEXP (disp, 0), 1))
9414 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9415 used. While ABI specify also 32bit relocations, we don't produce
9416 them at all and use IP relative instead. */
9419 gcc_assert (flag_pic);
9421 goto is_legitimate_pic;
9422 reason = "64bit address unspec";
9425 case UNSPEC_GOTPCREL:
9426 gcc_assert (flag_pic);
9427 goto is_legitimate_pic;
9429 case UNSPEC_GOTTPOFF:
9430 case UNSPEC_GOTNTPOFF:
9431 case UNSPEC_INDNTPOFF:
9437 reason = "invalid address unspec";
9441 else if (SYMBOLIC_CONST (disp)
9445 && MACHOPIC_INDIRECT
9446 && !machopic_operand_p (disp)
9452 if (TARGET_64BIT && (index || base))
9454 /* foo@dtpoff(%rX) is ok. */
9455 if (GET_CODE (disp) != CONST
9456 || GET_CODE (XEXP (disp, 0)) != PLUS
9457 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9458 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9459 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9460 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9462 reason = "non-constant pic memory reference";
9466 else if (! legitimate_pic_address_disp_p (disp))
9468 reason = "displacement is an invalid pic construct";
9472 /* This code used to verify that a symbolic pic displacement
9473 includes the pic_offset_table_rtx register.
9475 While this is good idea, unfortunately these constructs may
9476 be created by "adds using lea" optimization for incorrect
9485 This code is nonsensical, but results in addressing
9486 GOT table with pic_offset_table_rtx base. We can't
9487 just refuse it easily, since it gets matched by
9488 "addsi3" pattern, that later gets split to lea in the
9489 case output register differs from input. While this
9490 can be handled by separate addsi pattern for this case
9491 that never results in lea, this seems to be easier and
9492 correct fix for crash to disable this test. */
9494 else if (GET_CODE (disp) != LABEL_REF
9495 && !CONST_INT_P (disp)
9496 && (GET_CODE (disp) != CONST
9497 || !legitimate_constant_p (disp))
9498 && (GET_CODE (disp) != SYMBOL_REF
9499 || !legitimate_constant_p (disp)))
9501 reason = "displacement is not constant";
9504 else if (TARGET_64BIT
9505 && !x86_64_immediate_operand (disp, VOIDmode))
9507 reason = "displacement is out of range";
9512 /* Everything looks valid. */
9519 /* Determine if a given RTX is a valid constant address. */
9522 constant_address_p (rtx x)
9524 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9527 /* Return a unique alias set for the GOT. */
9529 static alias_set_type
9530 ix86_GOT_alias_set (void)
9532 static alias_set_type set = -1;
9534 set = new_alias_set ();
9538 /* Return a legitimate reference for ORIG (an address) using the
9539 register REG. If REG is 0, a new pseudo is generated.
9541 There are two types of references that must be handled:
9543 1. Global data references must load the address from the GOT, via
9544 the PIC reg. An insn is emitted to do this load, and the reg is
9547 2. Static data references, constant pool addresses, and code labels
9548 compute the address as an offset from the GOT, whose base is in
9549 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9550 differentiate them from global data objects. The returned
9551 address is the PIC reg + an unspec constant.
9553 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9554 reg also appears in the address. */
9557 legitimize_pic_address (rtx orig, rtx reg)
9564 if (TARGET_MACHO && !TARGET_64BIT)
9567 reg = gen_reg_rtx (Pmode);
9568 /* Use the generic Mach-O PIC machinery. */
9569 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9573 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9575 else if (TARGET_64BIT
9576 && ix86_cmodel != CM_SMALL_PIC
9577 && gotoff_operand (addr, Pmode))
9580 /* This symbol may be referenced via a displacement from the PIC
9581 base address (@GOTOFF). */
9583 if (reload_in_progress)
9584 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9585 if (GET_CODE (addr) == CONST)
9586 addr = XEXP (addr, 0);
9587 if (GET_CODE (addr) == PLUS)
9589 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9591 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9594 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9595 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9597 tmpreg = gen_reg_rtx (Pmode);
9600 emit_move_insn (tmpreg, new_rtx);
9604 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9605 tmpreg, 1, OPTAB_DIRECT);
9608 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9610 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9612 /* This symbol may be referenced via a displacement from the PIC
9613 base address (@GOTOFF). */
9615 if (reload_in_progress)
9616 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9617 if (GET_CODE (addr) == CONST)
9618 addr = XEXP (addr, 0);
9619 if (GET_CODE (addr) == PLUS)
9621 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9623 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9626 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9627 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9628 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9632 emit_move_insn (reg, new_rtx);
9636 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9637 /* We can't use @GOTOFF for text labels on VxWorks;
9638 see gotoff_operand. */
9639 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9641 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9643 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9644 return legitimize_dllimport_symbol (addr, true);
9645 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9646 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9647 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9649 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9650 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9654 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9656 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9657 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9658 new_rtx = gen_const_mem (Pmode, new_rtx);
9659 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9662 reg = gen_reg_rtx (Pmode);
9663 /* Use directly gen_movsi, otherwise the address is loaded
9664 into register for CSE. We don't want to CSE this addresses,
9665 instead we CSE addresses from the GOT table, so skip this. */
9666 emit_insn (gen_movsi (reg, new_rtx));
9671 /* This symbol must be referenced via a load from the
9672 Global Offset Table (@GOT). */
9674 if (reload_in_progress)
9675 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9676 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9677 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9679 new_rtx = force_reg (Pmode, new_rtx);
9680 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9681 new_rtx = gen_const_mem (Pmode, new_rtx);
9682 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9685 reg = gen_reg_rtx (Pmode);
9686 emit_move_insn (reg, new_rtx);
9692 if (CONST_INT_P (addr)
9693 && !x86_64_immediate_operand (addr, VOIDmode))
9697 emit_move_insn (reg, addr);
9701 new_rtx = force_reg (Pmode, addr);
9703 else if (GET_CODE (addr) == CONST)
9705 addr = XEXP (addr, 0);
9707 /* We must match stuff we generate before. Assume the only
9708 unspecs that can get here are ours. Not that we could do
9709 anything with them anyway.... */
9710 if (GET_CODE (addr) == UNSPEC
9711 || (GET_CODE (addr) == PLUS
9712 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9714 gcc_assert (GET_CODE (addr) == PLUS);
9716 if (GET_CODE (addr) == PLUS)
9718 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9720 /* Check first to see if this is a constant offset from a @GOTOFF
9721 symbol reference. */
9722 if (gotoff_operand (op0, Pmode)
9723 && CONST_INT_P (op1))
9727 if (reload_in_progress)
9728 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9729 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9731 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9732 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9733 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9737 emit_move_insn (reg, new_rtx);
9743 if (INTVAL (op1) < -16*1024*1024
9744 || INTVAL (op1) >= 16*1024*1024)
9746 if (!x86_64_immediate_operand (op1, Pmode))
9747 op1 = force_reg (Pmode, op1);
9748 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9754 base = legitimize_pic_address (XEXP (addr, 0), reg);
9755 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9756 base == reg ? NULL_RTX : reg);
9758 if (CONST_INT_P (new_rtx))
9759 new_rtx = plus_constant (base, INTVAL (new_rtx));
9762 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9764 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9765 new_rtx = XEXP (new_rtx, 1);
9767 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9775 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9778 get_thread_pointer (int to_reg)
9782 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9786 reg = gen_reg_rtx (Pmode);
9787 insn = gen_rtx_SET (VOIDmode, reg, tp);
9788 insn = emit_insn (insn);
9793 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
9794 false if we expect this to be used for a memory address and true if
9795 we expect to load the address into a register. */
9798 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9800 rtx dest, base, off, pic, tp;
9805 case TLS_MODEL_GLOBAL_DYNAMIC:
9806 dest = gen_reg_rtx (Pmode);
9807 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9809 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9811 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9814 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9815 insns = get_insns ();
9818 RTL_CONST_CALL_P (insns) = 1;
9819 emit_libcall_block (insns, dest, rax, x);
9821 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9822 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9824 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9826 if (TARGET_GNU2_TLS)
9828 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9830 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9834 case TLS_MODEL_LOCAL_DYNAMIC:
9835 base = gen_reg_rtx (Pmode);
9836 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9838 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9840 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9843 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9844 insns = get_insns ();
9847 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9848 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9849 RTL_CONST_CALL_P (insns) = 1;
9850 emit_libcall_block (insns, base, rax, note);
9852 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9853 emit_insn (gen_tls_local_dynamic_base_64 (base));
9855 emit_insn (gen_tls_local_dynamic_base_32 (base));
9857 if (TARGET_GNU2_TLS)
9859 rtx x = ix86_tls_module_base ();
9861 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9862 gen_rtx_MINUS (Pmode, x, tp));
9865 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9866 off = gen_rtx_CONST (Pmode, off);
9868 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9870 if (TARGET_GNU2_TLS)
9872 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9874 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9879 case TLS_MODEL_INITIAL_EXEC:
9883 type = UNSPEC_GOTNTPOFF;
9887 if (reload_in_progress)
9888 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9889 pic = pic_offset_table_rtx;
9890 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9892 else if (!TARGET_ANY_GNU_TLS)
9894 pic = gen_reg_rtx (Pmode);
9895 emit_insn (gen_set_got (pic));
9896 type = UNSPEC_GOTTPOFF;
9901 type = UNSPEC_INDNTPOFF;
9904 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9905 off = gen_rtx_CONST (Pmode, off);
9907 off = gen_rtx_PLUS (Pmode, pic, off);
9908 off = gen_const_mem (Pmode, off);
9909 set_mem_alias_set (off, ix86_GOT_alias_set ());
9911 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9913 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9914 off = force_reg (Pmode, off);
9915 return gen_rtx_PLUS (Pmode, base, off);
9919 base = get_thread_pointer (true);
9920 dest = gen_reg_rtx (Pmode);
9921 emit_insn (gen_subsi3 (dest, base, off));
9925 case TLS_MODEL_LOCAL_EXEC:
9926 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9927 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9928 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9929 off = gen_rtx_CONST (Pmode, off);
9931 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9933 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9934 return gen_rtx_PLUS (Pmode, base, off);
9938 base = get_thread_pointer (true);
9939 dest = gen_reg_rtx (Pmode);
9940 emit_insn (gen_subsi3 (dest, base, off));
9951 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9954 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9955 htab_t dllimport_map;
9958 get_dllimport_decl (tree decl)
9960 struct tree_map *h, in;
9964 size_t namelen, prefixlen;
9970 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9972 in.hash = htab_hash_pointer (decl);
9973 in.base.from = decl;
9974 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9975 h = (struct tree_map *) *loc;
9979 *loc = h = GGC_NEW (struct tree_map);
9981 h->base.from = decl;
9982 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9983 DECL_ARTIFICIAL (to) = 1;
9984 DECL_IGNORED_P (to) = 1;
9985 DECL_EXTERNAL (to) = 1;
9986 TREE_READONLY (to) = 1;
9988 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9989 name = targetm.strip_name_encoding (name);
9990 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9991 ? "*__imp_" : "*__imp__";
9992 namelen = strlen (name);
9993 prefixlen = strlen (prefix);
9994 imp_name = (char *) alloca (namelen + prefixlen + 1);
9995 memcpy (imp_name, prefix, prefixlen);
9996 memcpy (imp_name + prefixlen, name, namelen + 1);
9998 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9999 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10000 SET_SYMBOL_REF_DECL (rtl, to);
10001 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10003 rtl = gen_const_mem (Pmode, rtl);
10004 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10006 SET_DECL_RTL (to, rtl);
10007 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10012 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10013 true if we require the result be a register. */
10016 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10021 gcc_assert (SYMBOL_REF_DECL (symbol));
10022 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10024 x = DECL_RTL (imp_decl);
10026 x = force_reg (Pmode, x);
10030 /* Try machine-dependent ways of modifying an illegitimate address
10031 to be legitimate. If we find one, return the new, valid address.
10032 This macro is used in only one place: `memory_address' in explow.c.
10034 OLDX is the address as it was before break_out_memory_refs was called.
10035 In some cases it is useful to look at this to decide what needs to be done.
10037 It is always safe for this macro to do nothing. It exists to recognize
10038 opportunities to optimize the output.
10040 For the 80386, we handle X+REG by loading X into a register R and
10041 using R+REG. R will go in a general reg and indexing will be used.
10042 However, if REG is a broken-out memory address or multiplication,
10043 nothing needs to be done because REG can certainly go in a general reg.
10045 When -fpic is used, special handling is needed for symbolic references.
10046 See comments by legitimize_pic_address in i386.c for details. */
10049 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10050 enum machine_mode mode)
10055 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10057 return legitimize_tls_address (x, (enum tls_model) log, false);
10058 if (GET_CODE (x) == CONST
10059 && GET_CODE (XEXP (x, 0)) == PLUS
10060 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10061 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10063 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10064 (enum tls_model) log, false);
10065 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10068 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10070 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10071 return legitimize_dllimport_symbol (x, true);
10072 if (GET_CODE (x) == CONST
10073 && GET_CODE (XEXP (x, 0)) == PLUS
10074 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10075 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10077 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10078 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10082 if (flag_pic && SYMBOLIC_CONST (x))
10083 return legitimize_pic_address (x, 0);
10085 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10086 if (GET_CODE (x) == ASHIFT
10087 && CONST_INT_P (XEXP (x, 1))
10088 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10091 log = INTVAL (XEXP (x, 1));
10092 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10093 GEN_INT (1 << log));
10096 if (GET_CODE (x) == PLUS)
10098 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10100 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10101 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10102 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10105 log = INTVAL (XEXP (XEXP (x, 0), 1));
10106 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10107 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10108 GEN_INT (1 << log));
10111 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10112 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10113 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10116 log = INTVAL (XEXP (XEXP (x, 1), 1));
10117 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10118 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10119 GEN_INT (1 << log));
10122 /* Put multiply first if it isn't already. */
10123 if (GET_CODE (XEXP (x, 1)) == MULT)
10125 rtx tmp = XEXP (x, 0);
10126 XEXP (x, 0) = XEXP (x, 1);
10131 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10132 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10133 created by virtual register instantiation, register elimination, and
10134 similar optimizations. */
10135 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10138 x = gen_rtx_PLUS (Pmode,
10139 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10140 XEXP (XEXP (x, 1), 0)),
10141 XEXP (XEXP (x, 1), 1));
10145 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10146 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10147 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10148 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10149 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10150 && CONSTANT_P (XEXP (x, 1)))
10153 rtx other = NULL_RTX;
10155 if (CONST_INT_P (XEXP (x, 1)))
10157 constant = XEXP (x, 1);
10158 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10160 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10162 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10163 other = XEXP (x, 1);
10171 x = gen_rtx_PLUS (Pmode,
10172 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10173 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10174 plus_constant (other, INTVAL (constant)));
10178 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10181 if (GET_CODE (XEXP (x, 0)) == MULT)
10184 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10187 if (GET_CODE (XEXP (x, 1)) == MULT)
10190 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10194 && REG_P (XEXP (x, 1))
10195 && REG_P (XEXP (x, 0)))
10198 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10201 x = legitimize_pic_address (x, 0);
10204 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10207 if (REG_P (XEXP (x, 0)))
10209 rtx temp = gen_reg_rtx (Pmode);
10210 rtx val = force_operand (XEXP (x, 1), temp);
10212 emit_move_insn (temp, val);
10214 XEXP (x, 1) = temp;
10218 else if (REG_P (XEXP (x, 1)))
10220 rtx temp = gen_reg_rtx (Pmode);
10221 rtx val = force_operand (XEXP (x, 0), temp);
10223 emit_move_insn (temp, val);
10225 XEXP (x, 0) = temp;
10233 /* Print an integer constant expression in assembler syntax. Addition
10234 and subtraction are the only arithmetic that may appear in these
10235 expressions. FILE is the stdio stream to write to, X is the rtx, and
10236 CODE is the operand print code from the output string. */
10239 output_pic_addr_const (FILE *file, rtx x, int code)
10243 switch (GET_CODE (x))
10246 gcc_assert (flag_pic);
10251 if (! TARGET_MACHO || TARGET_64BIT)
10252 output_addr_const (file, x);
10255 const char *name = XSTR (x, 0);
10257 /* Mark the decl as referenced so that cgraph will
10258 output the function. */
10259 if (SYMBOL_REF_DECL (x))
10260 mark_decl_referenced (SYMBOL_REF_DECL (x));
10263 if (MACHOPIC_INDIRECT
10264 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10265 name = machopic_indirection_name (x, /*stub_p=*/true);
10267 assemble_name (file, name);
10269 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10270 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10271 fputs ("@PLT", file);
10278 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10279 assemble_name (asm_out_file, buf);
10283 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10287 /* This used to output parentheses around the expression,
10288 but that does not work on the 386 (either ATT or BSD assembler). */
10289 output_pic_addr_const (file, XEXP (x, 0), code);
10293 if (GET_MODE (x) == VOIDmode)
10295 /* We can use %d if the number is <32 bits and positive. */
10296 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10297 fprintf (file, "0x%lx%08lx",
10298 (unsigned long) CONST_DOUBLE_HIGH (x),
10299 (unsigned long) CONST_DOUBLE_LOW (x));
10301 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10304 /* We can't handle floating point constants;
10305 PRINT_OPERAND must handle them. */
10306 output_operand_lossage ("floating constant misused");
10310 /* Some assemblers need integer constants to appear first. */
10311 if (CONST_INT_P (XEXP (x, 0)))
10313 output_pic_addr_const (file, XEXP (x, 0), code);
10315 output_pic_addr_const (file, XEXP (x, 1), code);
10319 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10320 output_pic_addr_const (file, XEXP (x, 1), code);
10322 output_pic_addr_const (file, XEXP (x, 0), code);
10328 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10329 output_pic_addr_const (file, XEXP (x, 0), code);
10331 output_pic_addr_const (file, XEXP (x, 1), code);
10333 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10337 gcc_assert (XVECLEN (x, 0) == 1);
10338 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10339 switch (XINT (x, 1))
10342 fputs ("@GOT", file);
10344 case UNSPEC_GOTOFF:
10345 fputs ("@GOTOFF", file);
10347 case UNSPEC_PLTOFF:
10348 fputs ("@PLTOFF", file);
10350 case UNSPEC_GOTPCREL:
10351 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10352 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10354 case UNSPEC_GOTTPOFF:
10355 /* FIXME: This might be @TPOFF in Sun ld too. */
10356 fputs ("@GOTTPOFF", file);
10359 fputs ("@TPOFF", file);
10361 case UNSPEC_NTPOFF:
10363 fputs ("@TPOFF", file);
10365 fputs ("@NTPOFF", file);
10367 case UNSPEC_DTPOFF:
10368 fputs ("@DTPOFF", file);
10370 case UNSPEC_GOTNTPOFF:
10372 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10373 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10375 fputs ("@GOTNTPOFF", file);
10377 case UNSPEC_INDNTPOFF:
10378 fputs ("@INDNTPOFF", file);
10381 case UNSPEC_MACHOPIC_OFFSET:
10383 machopic_output_function_base_name (file);
10387 output_operand_lossage ("invalid UNSPEC as operand");
10393 output_operand_lossage ("invalid expression as operand");
10397 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10398 We need to emit DTP-relative relocations. */
10400 static void ATTRIBUTE_UNUSED
10401 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10403 fputs (ASM_LONG, file);
10404 output_addr_const (file, x);
10405 fputs ("@DTPOFF", file);
10411 fputs (", 0", file);
10414 gcc_unreachable ();
10418 /* Return true if X is a representation of the PIC register. This copes
10419 with calls from ix86_find_base_term, where the register might have
10420 been replaced by a cselib value. */
10423 ix86_pic_register_p (rtx x)
10425 if (GET_CODE (x) == VALUE)
10426 return (pic_offset_table_rtx
10427 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10429 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10432 /* In the name of slightly smaller debug output, and to cater to
10433 general assembler lossage, recognize PIC+GOTOFF and turn it back
10434 into a direct symbol reference.
10436 On Darwin, this is necessary to avoid a crash, because Darwin
10437 has a different PIC label for each routine but the DWARF debugging
10438 information is not associated with any particular routine, so it's
10439 necessary to remove references to the PIC label from RTL stored by
10440 the DWARF output code. */
10443 ix86_delegitimize_address (rtx orig_x)
10446 /* reg_addend is NULL or a multiple of some register. */
10447 rtx reg_addend = NULL_RTX;
10448 /* const_addend is NULL or a const_int. */
10449 rtx const_addend = NULL_RTX;
10450 /* This is the result, or NULL. */
10451 rtx result = NULL_RTX;
10458 if (GET_CODE (x) != CONST
10459 || GET_CODE (XEXP (x, 0)) != UNSPEC
10460 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10461 || !MEM_P (orig_x))
10463 return XVECEXP (XEXP (x, 0), 0, 0);
10466 if (GET_CODE (x) != PLUS
10467 || GET_CODE (XEXP (x, 1)) != CONST)
10470 if (ix86_pic_register_p (XEXP (x, 0)))
10471 /* %ebx + GOT/GOTOFF */
10473 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10475 /* %ebx + %reg * scale + GOT/GOTOFF */
10476 reg_addend = XEXP (x, 0);
10477 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10478 reg_addend = XEXP (reg_addend, 1);
10479 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10480 reg_addend = XEXP (reg_addend, 0);
10483 if (!REG_P (reg_addend)
10484 && GET_CODE (reg_addend) != MULT
10485 && GET_CODE (reg_addend) != ASHIFT)
10491 x = XEXP (XEXP (x, 1), 0);
10492 if (GET_CODE (x) == PLUS
10493 && CONST_INT_P (XEXP (x, 1)))
10495 const_addend = XEXP (x, 1);
10499 if (GET_CODE (x) == UNSPEC
10500 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10501 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10502 result = XVECEXP (x, 0, 0);
10504 if (TARGET_MACHO && darwin_local_data_pic (x)
10505 && !MEM_P (orig_x))
10506 result = XVECEXP (x, 0, 0);
10512 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10514 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10518 /* If X is a machine specific address (i.e. a symbol or label being
10519 referenced as a displacement from the GOT implemented using an
10520 UNSPEC), then return the base term. Otherwise return X. */
10523 ix86_find_base_term (rtx x)
10529 if (GET_CODE (x) != CONST)
10531 term = XEXP (x, 0);
10532 if (GET_CODE (term) == PLUS
10533 && (CONST_INT_P (XEXP (term, 1))
10534 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10535 term = XEXP (term, 0);
10536 if (GET_CODE (term) != UNSPEC
10537 || XINT (term, 1) != UNSPEC_GOTPCREL)
10540 return XVECEXP (term, 0, 0);
10543 return ix86_delegitimize_address (x);
10547 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10548 int fp, FILE *file)
10550 const char *suffix;
10552 if (mode == CCFPmode || mode == CCFPUmode)
10554 enum rtx_code second_code, bypass_code;
10555 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10556 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10557 code = ix86_fp_compare_code_to_integer (code);
10561 code = reverse_condition (code);
10612 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10616 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10617 Those same assemblers have the same but opposite lossage on cmov. */
10618 if (mode == CCmode)
10619 suffix = fp ? "nbe" : "a";
10620 else if (mode == CCCmode)
10623 gcc_unreachable ();
10639 gcc_unreachable ();
10643 gcc_assert (mode == CCmode || mode == CCCmode);
10660 gcc_unreachable ();
10664 /* ??? As above. */
10665 gcc_assert (mode == CCmode || mode == CCCmode);
10666 suffix = fp ? "nb" : "ae";
10669 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10673 /* ??? As above. */
10674 if (mode == CCmode)
10676 else if (mode == CCCmode)
10677 suffix = fp ? "nb" : "ae";
10679 gcc_unreachable ();
10682 suffix = fp ? "u" : "p";
10685 suffix = fp ? "nu" : "np";
10688 gcc_unreachable ();
10690 fputs (suffix, file);
10693 /* Print the name of register X to FILE based on its machine mode and number.
10694 If CODE is 'w', pretend the mode is HImode.
10695 If CODE is 'b', pretend the mode is QImode.
10696 If CODE is 'k', pretend the mode is SImode.
10697 If CODE is 'q', pretend the mode is DImode.
10698 If CODE is 'x', pretend the mode is V4SFmode.
10699 If CODE is 't', pretend the mode is V8SFmode.
10700 If CODE is 'h', pretend the reg is the 'high' byte register.
10701 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10702 If CODE is 'd', duplicate the operand for AVX instruction.
10706 print_reg (rtx x, int code, FILE *file)
10709 bool duplicated = code == 'd' && TARGET_AVX;
10711 gcc_assert (x == pc_rtx
10712 || (REGNO (x) != ARG_POINTER_REGNUM
10713 && REGNO (x) != FRAME_POINTER_REGNUM
10714 && REGNO (x) != FLAGS_REG
10715 && REGNO (x) != FPSR_REG
10716 && REGNO (x) != FPCR_REG));
10718 if (ASSEMBLER_DIALECT == ASM_ATT)
10723 gcc_assert (TARGET_64BIT);
10724 fputs ("rip", file);
10728 if (code == 'w' || MMX_REG_P (x))
10730 else if (code == 'b')
10732 else if (code == 'k')
10734 else if (code == 'q')
10736 else if (code == 'y')
10738 else if (code == 'h')
10740 else if (code == 'x')
10742 else if (code == 't')
10745 code = GET_MODE_SIZE (GET_MODE (x));
10747 /* Irritatingly, AMD extended registers use different naming convention
10748 from the normal registers. */
10749 if (REX_INT_REG_P (x))
10751 gcc_assert (TARGET_64BIT);
10755 error ("extended registers have no high halves");
10758 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10761 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10764 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10767 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10770 error ("unsupported operand size for extended register");
10780 if (STACK_TOP_P (x))
10789 if (! ANY_FP_REG_P (x))
10790 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10795 reg = hi_reg_name[REGNO (x)];
10798 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10800 reg = qi_reg_name[REGNO (x)];
10803 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10805 reg = qi_high_reg_name[REGNO (x)];
10810 gcc_assert (!duplicated);
10812 fputs (hi_reg_name[REGNO (x)] + 1, file);
10817 gcc_unreachable ();
10823 if (ASSEMBLER_DIALECT == ASM_ATT)
10824 fprintf (file, ", %%%s", reg);
10826 fprintf (file, ", %s", reg);
10830 /* Locate some local-dynamic symbol still in use by this function
10831 so that we can print its name in some tls_local_dynamic_base
10835 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10839 if (GET_CODE (x) == SYMBOL_REF
10840 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10842 cfun->machine->some_ld_name = XSTR (x, 0);
10849 static const char *
10850 get_some_local_dynamic_name (void)
10854 if (cfun->machine->some_ld_name)
10855 return cfun->machine->some_ld_name;
10857 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10859 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10860 return cfun->machine->some_ld_name;
10862 gcc_unreachable ();
10865 /* Meaning of CODE:
10866 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10867 C -- print opcode suffix for set/cmov insn.
10868 c -- like C, but print reversed condition
10869 E,e -- likewise, but for compare-and-branch fused insn.
10870 F,f -- likewise, but for floating-point.
10871 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10873 R -- print the prefix for register names.
10874 z -- print the opcode suffix for the size of the current operand.
10875 Z -- likewise, with special suffixes for x87 instructions.
10876 * -- print a star (in certain assembler syntax)
10877 A -- print an absolute memory reference.
10878 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10879 s -- print a shift double count, followed by the assemblers argument
10881 b -- print the QImode name of the register for the indicated operand.
10882 %b0 would print %al if operands[0] is reg 0.
10883 w -- likewise, print the HImode name of the register.
10884 k -- likewise, print the SImode name of the register.
10885 q -- likewise, print the DImode name of the register.
10886 x -- likewise, print the V4SFmode name of the register.
10887 t -- likewise, print the V8SFmode name of the register.
10888 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10889 y -- print "st(0)" instead of "st" as a register.
10890 d -- print duplicated register operand for AVX instruction.
10891 D -- print condition for SSE cmp instruction.
10892 P -- if PIC, print an @PLT suffix.
10893 X -- don't print any sort of PIC '@' suffix for a symbol.
10894 & -- print some in-use local-dynamic symbol name.
10895 H -- print a memory address offset by 8; used for sse high-parts
10896 Y -- print condition for SSE5 com* instruction.
10897 + -- print a branch hint as 'cs' or 'ds' prefix
10898 ; -- print a semicolon (after prefixes due to bug in older gas).
10902 print_operand (FILE *file, rtx x, int code)
10909 if (ASSEMBLER_DIALECT == ASM_ATT)
10914 assemble_name (file, get_some_local_dynamic_name ());
10918 switch (ASSEMBLER_DIALECT)
10925 /* Intel syntax. For absolute addresses, registers should not
10926 be surrounded by braces. */
10930 PRINT_OPERAND (file, x, 0);
10937 gcc_unreachable ();
10940 PRINT_OPERAND (file, x, 0);
10945 if (ASSEMBLER_DIALECT == ASM_ATT)
10950 if (ASSEMBLER_DIALECT == ASM_ATT)
10955 if (ASSEMBLER_DIALECT == ASM_ATT)
10960 if (ASSEMBLER_DIALECT == ASM_ATT)
10965 if (ASSEMBLER_DIALECT == ASM_ATT)
10970 if (ASSEMBLER_DIALECT == ASM_ATT)
10975 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10977 /* Opcodes don't get size suffixes if using Intel opcodes. */
10978 if (ASSEMBLER_DIALECT == ASM_INTEL)
10981 switch (GET_MODE_SIZE (GET_MODE (x)))
11000 output_operand_lossage
11001 ("invalid operand size for operand code '%c'", code);
11006 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11008 (0, "non-integer operand used with operand code '%c'", code);
11012 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11013 if (ASSEMBLER_DIALECT == ASM_INTEL)
11016 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11018 switch (GET_MODE_SIZE (GET_MODE (x)))
11021 #ifdef HAVE_AS_IX86_FILDS
11031 #ifdef HAVE_AS_IX86_FILDQ
11034 fputs ("ll", file);
11042 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11044 /* 387 opcodes don't get size suffixes
11045 if the operands are registers. */
11046 if (STACK_REG_P (x))
11049 switch (GET_MODE_SIZE (GET_MODE (x)))
11070 output_operand_lossage
11071 ("invalid operand type used with operand code '%c'", code);
11075 output_operand_lossage
11076 ("invalid operand size for operand code '%c'", code);
11093 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11095 PRINT_OPERAND (file, x, 0);
11096 fputs (", ", file);
11101 /* Little bit of braindamage here. The SSE compare instructions
11102 does use completely different names for the comparisons that the
11103 fp conditional moves. */
11106 switch (GET_CODE (x))
11109 fputs ("eq", file);
11112 fputs ("eq_us", file);
11115 fputs ("lt", file);
11118 fputs ("nge", file);
11121 fputs ("le", file);
11124 fputs ("ngt", file);
11127 fputs ("unord", file);
11130 fputs ("neq", file);
11133 fputs ("neq_oq", file);
11136 fputs ("ge", file);
11139 fputs ("nlt", file);
11142 fputs ("gt", file);
11145 fputs ("nle", file);
11148 fputs ("ord", file);
11151 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11157 switch (GET_CODE (x))
11161 fputs ("eq", file);
11165 fputs ("lt", file);
11169 fputs ("le", file);
11172 fputs ("unord", file);
11176 fputs ("neq", file);
11180 fputs ("nlt", file);
11184 fputs ("nle", file);
11187 fputs ("ord", file);
11190 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11196 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11197 if (ASSEMBLER_DIALECT == ASM_ATT)
11199 switch (GET_MODE (x))
11201 case HImode: putc ('w', file); break;
11203 case SFmode: putc ('l', file); break;
11205 case DFmode: putc ('q', file); break;
11206 default: gcc_unreachable ();
11213 if (!COMPARISON_P (x))
11215 output_operand_lossage ("operand is neither a constant nor a "
11216 "condition code, invalid operand code "
11220 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11223 if (!COMPARISON_P (x))
11225 output_operand_lossage ("operand is neither a constant nor a "
11226 "condition code, invalid operand code "
11230 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11231 if (ASSEMBLER_DIALECT == ASM_ATT)
11234 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11237 /* Like above, but reverse condition */
11239 /* Check to see if argument to %c is really a constant
11240 and not a condition code which needs to be reversed. */
11241 if (!COMPARISON_P (x))
11243 output_operand_lossage ("operand is neither a constant nor a "
11244 "condition code, invalid operand "
11248 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11251 if (!COMPARISON_P (x))
11253 output_operand_lossage ("operand is neither a constant nor a "
11254 "condition code, invalid operand "
11258 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11259 if (ASSEMBLER_DIALECT == ASM_ATT)
11262 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11266 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11270 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11274 /* It doesn't actually matter what mode we use here, as we're
11275 only going to use this for printing. */
11276 x = adjust_address_nv (x, DImode, 8);
11284 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11287 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11290 int pred_val = INTVAL (XEXP (x, 0));
11292 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11293 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11295 int taken = pred_val > REG_BR_PROB_BASE / 2;
11296 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11298 /* Emit hints only in the case default branch prediction
11299 heuristics would fail. */
11300 if (taken != cputaken)
11302 /* We use 3e (DS) prefix for taken branches and
11303 2e (CS) prefix for not taken branches. */
11305 fputs ("ds ; ", file);
11307 fputs ("cs ; ", file);
11315 switch (GET_CODE (x))
11318 fputs ("neq", file);
11321 fputs ("eq", file);
11325 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11329 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11333 fputs ("le", file);
11337 fputs ("lt", file);
11340 fputs ("unord", file);
11343 fputs ("ord", file);
11346 fputs ("ueq", file);
11349 fputs ("nlt", file);
11352 fputs ("nle", file);
11355 fputs ("ule", file);
11358 fputs ("ult", file);
11361 fputs ("une", file);
11364 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11371 fputs (" ; ", file);
11378 output_operand_lossage ("invalid operand code '%c'", code);
11383 print_reg (x, code, file);
11385 else if (MEM_P (x))
11387 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11388 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11389 && GET_MODE (x) != BLKmode)
11392 switch (GET_MODE_SIZE (GET_MODE (x)))
11394 case 1: size = "BYTE"; break;
11395 case 2: size = "WORD"; break;
11396 case 4: size = "DWORD"; break;
11397 case 8: size = "QWORD"; break;
11398 case 12: size = "XWORD"; break;
11400 if (GET_MODE (x) == XFmode)
11406 gcc_unreachable ();
11409 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11412 else if (code == 'w')
11414 else if (code == 'k')
11417 fputs (size, file);
11418 fputs (" PTR ", file);
11422 /* Avoid (%rip) for call operands. */
11423 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11424 && !CONST_INT_P (x))
11425 output_addr_const (file, x);
11426 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11427 output_operand_lossage ("invalid constraints for operand");
11429 output_address (x);
11432 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11437 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11438 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11440 if (ASSEMBLER_DIALECT == ASM_ATT)
11442 fprintf (file, "0x%08lx", (long unsigned int) l);
11445 /* These float cases don't actually occur as immediate operands. */
11446 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11450 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11451 fprintf (file, "%s", dstr);
11454 else if (GET_CODE (x) == CONST_DOUBLE
11455 && GET_MODE (x) == XFmode)
11459 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11460 fprintf (file, "%s", dstr);
11465 /* We have patterns that allow zero sets of memory, for instance.
11466 In 64-bit mode, we should probably support all 8-byte vectors,
11467 since we can in fact encode that into an immediate. */
11468 if (GET_CODE (x) == CONST_VECTOR)
11470 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11476 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11478 if (ASSEMBLER_DIALECT == ASM_ATT)
11481 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11482 || GET_CODE (x) == LABEL_REF)
11484 if (ASSEMBLER_DIALECT == ASM_ATT)
11487 fputs ("OFFSET FLAT:", file);
11490 if (CONST_INT_P (x))
11491 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11493 output_pic_addr_const (file, x, code);
11495 output_addr_const (file, x);
11499 /* Print a memory operand whose address is ADDR. */
11502 print_operand_address (FILE *file, rtx addr)
11504 struct ix86_address parts;
11505 rtx base, index, disp;
11507 int ok = ix86_decompose_address (addr, &parts);
11512 index = parts.index;
11514 scale = parts.scale;
11522 if (ASSEMBLER_DIALECT == ASM_ATT)
11524 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11527 gcc_unreachable ();
11530 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11531 if (TARGET_64BIT && !base && !index)
11535 if (GET_CODE (disp) == CONST
11536 && GET_CODE (XEXP (disp, 0)) == PLUS
11537 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11538 symbol = XEXP (XEXP (disp, 0), 0);
11540 if (GET_CODE (symbol) == LABEL_REF
11541 || (GET_CODE (symbol) == SYMBOL_REF
11542 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11545 if (!base && !index)
11547 /* Displacement only requires special attention. */
11549 if (CONST_INT_P (disp))
11551 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11552 fputs ("ds:", file);
11553 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11556 output_pic_addr_const (file, disp, 0);
11558 output_addr_const (file, disp);
11562 if (ASSEMBLER_DIALECT == ASM_ATT)
11567 output_pic_addr_const (file, disp, 0);
11568 else if (GET_CODE (disp) == LABEL_REF)
11569 output_asm_label (disp);
11571 output_addr_const (file, disp);
11576 print_reg (base, 0, file);
11580 print_reg (index, 0, file);
11582 fprintf (file, ",%d", scale);
11588 rtx offset = NULL_RTX;
11592 /* Pull out the offset of a symbol; print any symbol itself. */
11593 if (GET_CODE (disp) == CONST
11594 && GET_CODE (XEXP (disp, 0)) == PLUS
11595 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11597 offset = XEXP (XEXP (disp, 0), 1);
11598 disp = gen_rtx_CONST (VOIDmode,
11599 XEXP (XEXP (disp, 0), 0));
11603 output_pic_addr_const (file, disp, 0);
11604 else if (GET_CODE (disp) == LABEL_REF)
11605 output_asm_label (disp);
11606 else if (CONST_INT_P (disp))
11609 output_addr_const (file, disp);
11615 print_reg (base, 0, file);
11618 if (INTVAL (offset) >= 0)
11620 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11624 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11631 print_reg (index, 0, file);
11633 fprintf (file, "*%d", scale);
11641 output_addr_const_extra (FILE *file, rtx x)
11645 if (GET_CODE (x) != UNSPEC)
11648 op = XVECEXP (x, 0, 0);
11649 switch (XINT (x, 1))
11651 case UNSPEC_GOTTPOFF:
11652 output_addr_const (file, op);
11653 /* FIXME: This might be @TPOFF in Sun ld. */
11654 fputs ("@GOTTPOFF", file);
11657 output_addr_const (file, op);
11658 fputs ("@TPOFF", file);
11660 case UNSPEC_NTPOFF:
11661 output_addr_const (file, op);
11663 fputs ("@TPOFF", file);
11665 fputs ("@NTPOFF", file);
11667 case UNSPEC_DTPOFF:
11668 output_addr_const (file, op);
11669 fputs ("@DTPOFF", file);
11671 case UNSPEC_GOTNTPOFF:
11672 output_addr_const (file, op);
11674 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11675 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11677 fputs ("@GOTNTPOFF", file);
11679 case UNSPEC_INDNTPOFF:
11680 output_addr_const (file, op);
11681 fputs ("@INDNTPOFF", file);
11684 case UNSPEC_MACHOPIC_OFFSET:
11685 output_addr_const (file, op);
11687 machopic_output_function_base_name (file);
11698 /* Split one or more DImode RTL references into pairs of SImode
11699 references. The RTL can be REG, offsettable MEM, integer constant, or
11700 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11701 split and "num" is its length. lo_half and hi_half are output arrays
11702 that parallel "operands". */
11705 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11709 rtx op = operands[num];
11711 /* simplify_subreg refuse to split volatile memory addresses,
11712 but we still have to handle it. */
11715 lo_half[num] = adjust_address (op, SImode, 0);
11716 hi_half[num] = adjust_address (op, SImode, 4);
11720 lo_half[num] = simplify_gen_subreg (SImode, op,
11721 GET_MODE (op) == VOIDmode
11722 ? DImode : GET_MODE (op), 0);
11723 hi_half[num] = simplify_gen_subreg (SImode, op,
11724 GET_MODE (op) == VOIDmode
11725 ? DImode : GET_MODE (op), 4);
11729 /* Split one or more TImode RTL references into pairs of DImode
11730 references. The RTL can be REG, offsettable MEM, integer constant, or
11731 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11732 split and "num" is its length. lo_half and hi_half are output arrays
11733 that parallel "operands". */
11736 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11740 rtx op = operands[num];
11742 /* simplify_subreg refuse to split volatile memory addresses, but we
11743 still have to handle it. */
11746 lo_half[num] = adjust_address (op, DImode, 0);
11747 hi_half[num] = adjust_address (op, DImode, 8);
11751 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11752 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11757 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11758 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11759 is the expression of the binary operation. The output may either be
11760 emitted here, or returned to the caller, like all output_* functions.
11762 There is no guarantee that the operands are the same mode, as they
11763 might be within FLOAT or FLOAT_EXTEND expressions. */
11765 #ifndef SYSV386_COMPAT
11766 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11767 wants to fix the assemblers because that causes incompatibility
11768 with gcc. No-one wants to fix gcc because that causes
11769 incompatibility with assemblers... You can use the option of
11770 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11771 #define SYSV386_COMPAT 1
11775 output_387_binary_op (rtx insn, rtx *operands)
11777 static char buf[40];
11780 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11782 #ifdef ENABLE_CHECKING
11783 /* Even if we do not want to check the inputs, this documents input
11784 constraints. Which helps in understanding the following code. */
11785 if (STACK_REG_P (operands[0])
11786 && ((REG_P (operands[1])
11787 && REGNO (operands[0]) == REGNO (operands[1])
11788 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11789 || (REG_P (operands[2])
11790 && REGNO (operands[0]) == REGNO (operands[2])
11791 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11792 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11795 gcc_assert (is_sse);
11798 switch (GET_CODE (operands[3]))
11801 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11802 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11810 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11811 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11819 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11820 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11828 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11829 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11837 gcc_unreachable ();
11844 strcpy (buf, ssep);
11845 if (GET_MODE (operands[0]) == SFmode)
11846 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11848 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11852 strcpy (buf, ssep + 1);
11853 if (GET_MODE (operands[0]) == SFmode)
11854 strcat (buf, "ss\t{%2, %0|%0, %2}");
11856 strcat (buf, "sd\t{%2, %0|%0, %2}");
11862 switch (GET_CODE (operands[3]))
11866 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11868 rtx temp = operands[2];
11869 operands[2] = operands[1];
11870 operands[1] = temp;
11873 /* know operands[0] == operands[1]. */
11875 if (MEM_P (operands[2]))
11881 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11883 if (STACK_TOP_P (operands[0]))
11884 /* How is it that we are storing to a dead operand[2]?
11885 Well, presumably operands[1] is dead too. We can't
11886 store the result to st(0) as st(0) gets popped on this
11887 instruction. Instead store to operands[2] (which I
11888 think has to be st(1)). st(1) will be popped later.
11889 gcc <= 2.8.1 didn't have this check and generated
11890 assembly code that the Unixware assembler rejected. */
11891 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11893 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11897 if (STACK_TOP_P (operands[0]))
11898 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11900 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11905 if (MEM_P (operands[1]))
11911 if (MEM_P (operands[2]))
11917 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11920 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11921 derived assemblers, confusingly reverse the direction of
11922 the operation for fsub{r} and fdiv{r} when the
11923 destination register is not st(0). The Intel assembler
11924 doesn't have this brain damage. Read !SYSV386_COMPAT to
11925 figure out what the hardware really does. */
11926 if (STACK_TOP_P (operands[0]))
11927 p = "{p\t%0, %2|rp\t%2, %0}";
11929 p = "{rp\t%2, %0|p\t%0, %2}";
11931 if (STACK_TOP_P (operands[0]))
11932 /* As above for fmul/fadd, we can't store to st(0). */
11933 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11935 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11940 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11943 if (STACK_TOP_P (operands[0]))
11944 p = "{rp\t%0, %1|p\t%1, %0}";
11946 p = "{p\t%1, %0|rp\t%0, %1}";
11948 if (STACK_TOP_P (operands[0]))
11949 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11951 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11956 if (STACK_TOP_P (operands[0]))
11958 if (STACK_TOP_P (operands[1]))
11959 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11961 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11964 else if (STACK_TOP_P (operands[1]))
11967 p = "{\t%1, %0|r\t%0, %1}";
11969 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11975 p = "{r\t%2, %0|\t%0, %2}";
11977 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11983 gcc_unreachable ();
11990 /* Return needed mode for entity in optimize_mode_switching pass. */
11993 ix86_mode_needed (int entity, rtx insn)
11995 enum attr_i387_cw mode;
11997 /* The mode UNINITIALIZED is used to store control word after a
11998 function call or ASM pattern. The mode ANY specify that function
11999 has no requirements on the control word and make no changes in the
12000 bits we are interested in. */
12003 || (NONJUMP_INSN_P (insn)
12004 && (asm_noperands (PATTERN (insn)) >= 0
12005 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12006 return I387_CW_UNINITIALIZED;
12008 if (recog_memoized (insn) < 0)
12009 return I387_CW_ANY;
12011 mode = get_attr_i387_cw (insn);
12016 if (mode == I387_CW_TRUNC)
12021 if (mode == I387_CW_FLOOR)
12026 if (mode == I387_CW_CEIL)
12031 if (mode == I387_CW_MASK_PM)
12036 gcc_unreachable ();
12039 return I387_CW_ANY;
12042 /* Output code to initialize control word copies used by trunc?f?i and
12043 rounding patterns. CURRENT_MODE is set to current control word,
12044 while NEW_MODE is set to new control word. */
12047 emit_i387_cw_initialization (int mode)
12049 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12052 enum ix86_stack_slot slot;
12054 rtx reg = gen_reg_rtx (HImode);
12056 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12057 emit_move_insn (reg, copy_rtx (stored_mode));
12059 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12060 || optimize_function_for_size_p (cfun))
12064 case I387_CW_TRUNC:
12065 /* round toward zero (truncate) */
12066 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12067 slot = SLOT_CW_TRUNC;
12070 case I387_CW_FLOOR:
12071 /* round down toward -oo */
12072 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12073 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12074 slot = SLOT_CW_FLOOR;
12078 /* round up toward +oo */
12079 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12080 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12081 slot = SLOT_CW_CEIL;
12084 case I387_CW_MASK_PM:
12085 /* mask precision exception for nearbyint() */
12086 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12087 slot = SLOT_CW_MASK_PM;
12091 gcc_unreachable ();
12098 case I387_CW_TRUNC:
12099 /* round toward zero (truncate) */
12100 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12101 slot = SLOT_CW_TRUNC;
12104 case I387_CW_FLOOR:
12105 /* round down toward -oo */
12106 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12107 slot = SLOT_CW_FLOOR;
12111 /* round up toward +oo */
12112 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12113 slot = SLOT_CW_CEIL;
12116 case I387_CW_MASK_PM:
12117 /* mask precision exception for nearbyint() */
12118 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12119 slot = SLOT_CW_MASK_PM;
12123 gcc_unreachable ();
12127 gcc_assert (slot < MAX_386_STACK_LOCALS);
12129 new_mode = assign_386_stack_local (HImode, slot);
12130 emit_move_insn (new_mode, reg);
12133 /* Output code for INSN to convert a float to a signed int. OPERANDS
12134 are the insn operands. The output may be [HSD]Imode and the input
12135 operand may be [SDX]Fmode. */
12138 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12140 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12141 int dimode_p = GET_MODE (operands[0]) == DImode;
12142 int round_mode = get_attr_i387_cw (insn);
12144 /* Jump through a hoop or two for DImode, since the hardware has no
12145 non-popping instruction. We used to do this a different way, but
12146 that was somewhat fragile and broke with post-reload splitters. */
12147 if ((dimode_p || fisttp) && !stack_top_dies)
12148 output_asm_insn ("fld\t%y1", operands);
12150 gcc_assert (STACK_TOP_P (operands[1]));
12151 gcc_assert (MEM_P (operands[0]));
12152 gcc_assert (GET_MODE (operands[1]) != TFmode);
12155 output_asm_insn ("fisttp%Z0\t%0", operands);
12158 if (round_mode != I387_CW_ANY)
12159 output_asm_insn ("fldcw\t%3", operands);
12160 if (stack_top_dies || dimode_p)
12161 output_asm_insn ("fistp%Z0\t%0", operands);
12163 output_asm_insn ("fist%Z0\t%0", operands);
12164 if (round_mode != I387_CW_ANY)
12165 output_asm_insn ("fldcw\t%2", operands);
12171 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12172 have the values zero or one, indicates the ffreep insn's operand
12173 from the OPERANDS array. */
12175 static const char *
12176 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12178 if (TARGET_USE_FFREEP)
12179 #if HAVE_AS_IX86_FFREEP
12180 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12183 static char retval[] = ".word\t0xc_df";
12184 int regno = REGNO (operands[opno]);
12186 gcc_assert (FP_REGNO_P (regno));
12188 retval[9] = '0' + (regno - FIRST_STACK_REG);
12193 return opno ? "fstp\t%y1" : "fstp\t%y0";
12197 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12198 should be used. UNORDERED_P is true when fucom should be used. */
12201 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12203 int stack_top_dies;
12204 rtx cmp_op0, cmp_op1;
12205 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12209 cmp_op0 = operands[0];
12210 cmp_op1 = operands[1];
12214 cmp_op0 = operands[1];
12215 cmp_op1 = operands[2];
12220 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12221 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12222 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12223 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12225 if (GET_MODE (operands[0]) == SFmode)
12227 return &ucomiss[TARGET_AVX ? 0 : 1];
12229 return &comiss[TARGET_AVX ? 0 : 1];
12232 return &ucomisd[TARGET_AVX ? 0 : 1];
12234 return &comisd[TARGET_AVX ? 0 : 1];
12237 gcc_assert (STACK_TOP_P (cmp_op0));
12239 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12241 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12243 if (stack_top_dies)
12245 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12246 return output_387_ffreep (operands, 1);
12249 return "ftst\n\tfnstsw\t%0";
12252 if (STACK_REG_P (cmp_op1)
12254 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12255 && REGNO (cmp_op1) != FIRST_STACK_REG)
12257 /* If both the top of the 387 stack dies, and the other operand
12258 is also a stack register that dies, then this must be a
12259 `fcompp' float compare */
12263 /* There is no double popping fcomi variant. Fortunately,
12264 eflags is immune from the fstp's cc clobbering. */
12266 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12268 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12269 return output_387_ffreep (operands, 0);
12274 return "fucompp\n\tfnstsw\t%0";
12276 return "fcompp\n\tfnstsw\t%0";
12281 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12283 static const char * const alt[16] =
12285 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12286 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12287 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12288 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12290 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12291 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12295 "fcomi\t{%y1, %0|%0, %y1}",
12296 "fcomip\t{%y1, %0|%0, %y1}",
12297 "fucomi\t{%y1, %0|%0, %y1}",
12298 "fucomip\t{%y1, %0|%0, %y1}",
12309 mask = eflags_p << 3;
12310 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12311 mask |= unordered_p << 1;
12312 mask |= stack_top_dies;
12314 gcc_assert (mask < 16);
12323 ix86_output_addr_vec_elt (FILE *file, int value)
12325 const char *directive = ASM_LONG;
12329 directive = ASM_QUAD;
12331 gcc_assert (!TARGET_64BIT);
12334 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12338 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12340 const char *directive = ASM_LONG;
12343 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12344 directive = ASM_QUAD;
12346 gcc_assert (!TARGET_64BIT);
12348 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12349 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12350 fprintf (file, "%s%s%d-%s%d\n",
12351 directive, LPREFIX, value, LPREFIX, rel);
12352 else if (HAVE_AS_GOTOFF_IN_DATA)
12353 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12355 else if (TARGET_MACHO)
12357 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12358 machopic_output_function_base_name (file);
12359 fprintf(file, "\n");
12363 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12364 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12367 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12371 ix86_expand_clear (rtx dest)
12375 /* We play register width games, which are only valid after reload. */
12376 gcc_assert (reload_completed);
12378 /* Avoid HImode and its attendant prefix byte. */
12379 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12380 dest = gen_rtx_REG (SImode, REGNO (dest));
12381 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12383 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12384 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12386 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12387 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12393 /* X is an unchanging MEM. If it is a constant pool reference, return
12394 the constant pool rtx, else NULL. */
12397 maybe_get_pool_constant (rtx x)
12399 x = ix86_delegitimize_address (XEXP (x, 0));
12401 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12402 return get_pool_constant (x);
12408 ix86_expand_move (enum machine_mode mode, rtx operands[])
12411 enum tls_model model;
12416 if (GET_CODE (op1) == SYMBOL_REF)
12418 model = SYMBOL_REF_TLS_MODEL (op1);
12421 op1 = legitimize_tls_address (op1, model, true);
12422 op1 = force_operand (op1, op0);
12426 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12427 && SYMBOL_REF_DLLIMPORT_P (op1))
12428 op1 = legitimize_dllimport_symbol (op1, false);
12430 else if (GET_CODE (op1) == CONST
12431 && GET_CODE (XEXP (op1, 0)) == PLUS
12432 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12434 rtx addend = XEXP (XEXP (op1, 0), 1);
12435 rtx symbol = XEXP (XEXP (op1, 0), 0);
12438 model = SYMBOL_REF_TLS_MODEL (symbol);
12440 tmp = legitimize_tls_address (symbol, model, true);
12441 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12442 && SYMBOL_REF_DLLIMPORT_P (symbol))
12443 tmp = legitimize_dllimport_symbol (symbol, true);
12447 tmp = force_operand (tmp, NULL);
12448 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12449 op0, 1, OPTAB_DIRECT);
12455 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12457 if (TARGET_MACHO && !TARGET_64BIT)
12462 rtx temp = ((reload_in_progress
12463 || ((op0 && REG_P (op0))
12465 ? op0 : gen_reg_rtx (Pmode));
12466 op1 = machopic_indirect_data_reference (op1, temp);
12467 op1 = machopic_legitimize_pic_address (op1, mode,
12468 temp == op1 ? 0 : temp);
12470 else if (MACHOPIC_INDIRECT)
12471 op1 = machopic_indirect_data_reference (op1, 0);
12479 op1 = force_reg (Pmode, op1);
12480 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12482 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12483 op1 = legitimize_pic_address (op1, reg);
12492 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12493 || !push_operand (op0, mode))
12495 op1 = force_reg (mode, op1);
12497 if (push_operand (op0, mode)
12498 && ! general_no_elim_operand (op1, mode))
12499 op1 = copy_to_mode_reg (mode, op1);
12501 /* Force large constants in 64bit compilation into register
12502 to get them CSEed. */
12503 if (can_create_pseudo_p ()
12504 && (mode == DImode) && TARGET_64BIT
12505 && immediate_operand (op1, mode)
12506 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12507 && !register_operand (op0, mode)
12509 op1 = copy_to_mode_reg (mode, op1);
12511 if (can_create_pseudo_p ()
12512 && FLOAT_MODE_P (mode)
12513 && GET_CODE (op1) == CONST_DOUBLE)
12515 /* If we are loading a floating point constant to a register,
12516 force the value to memory now, since we'll get better code
12517 out the back end. */
12519 op1 = validize_mem (force_const_mem (mode, op1));
12520 if (!register_operand (op0, mode))
12522 rtx temp = gen_reg_rtx (mode);
12523 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12524 emit_move_insn (op0, temp);
12530 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12534 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12536 rtx op0 = operands[0], op1 = operands[1];
12537 unsigned int align = GET_MODE_ALIGNMENT (mode);
12539 /* Force constants other than zero into memory. We do not know how
12540 the instructions used to build constants modify the upper 64 bits
12541 of the register, once we have that information we may be able
12542 to handle some of them more efficiently. */
12543 if (can_create_pseudo_p ()
12544 && register_operand (op0, mode)
12545 && (CONSTANT_P (op1)
12546 || (GET_CODE (op1) == SUBREG
12547 && CONSTANT_P (SUBREG_REG (op1))))
12548 && standard_sse_constant_p (op1) <= 0)
12549 op1 = validize_mem (force_const_mem (mode, op1));
12551 /* We need to check memory alignment for SSE mode since attribute
12552 can make operands unaligned. */
12553 if (can_create_pseudo_p ()
12554 && SSE_REG_MODE_P (mode)
12555 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12556 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12560 /* ix86_expand_vector_move_misalign() does not like constants ... */
12561 if (CONSTANT_P (op1)
12562 || (GET_CODE (op1) == SUBREG
12563 && CONSTANT_P (SUBREG_REG (op1))))
12564 op1 = validize_mem (force_const_mem (mode, op1));
12566 /* ... nor both arguments in memory. */
12567 if (!register_operand (op0, mode)
12568 && !register_operand (op1, mode))
12569 op1 = force_reg (mode, op1);
12571 tmp[0] = op0; tmp[1] = op1;
12572 ix86_expand_vector_move_misalign (mode, tmp);
12576 /* Make operand1 a register if it isn't already. */
12577 if (can_create_pseudo_p ()
12578 && !register_operand (op0, mode)
12579 && !register_operand (op1, mode))
12581 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12585 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12588 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12589 straight to ix86_expand_vector_move. */
12590 /* Code generation for scalar reg-reg moves of single and double precision data:
12591 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12595 if (x86_sse_partial_reg_dependency == true)
12600 Code generation for scalar loads of double precision data:
12601 if (x86_sse_split_regs == true)
12602 movlpd mem, reg (gas syntax)
12606 Code generation for unaligned packed loads of single precision data
12607 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12608 if (x86_sse_unaligned_move_optimal)
12611 if (x86_sse_partial_reg_dependency == true)
12623 Code generation for unaligned packed loads of double precision data
12624 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12625 if (x86_sse_unaligned_move_optimal)
12628 if (x86_sse_split_regs == true)
12641 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12650 switch (GET_MODE_CLASS (mode))
12652 case MODE_VECTOR_INT:
12654 switch (GET_MODE_SIZE (mode))
12657 op0 = gen_lowpart (V16QImode, op0);
12658 op1 = gen_lowpart (V16QImode, op1);
12659 emit_insn (gen_avx_movdqu (op0, op1));
12662 op0 = gen_lowpart (V32QImode, op0);
12663 op1 = gen_lowpart (V32QImode, op1);
12664 emit_insn (gen_avx_movdqu256 (op0, op1));
12667 gcc_unreachable ();
12670 case MODE_VECTOR_FLOAT:
12671 op0 = gen_lowpart (mode, op0);
12672 op1 = gen_lowpart (mode, op1);
12677 emit_insn (gen_avx_movups (op0, op1));
12680 emit_insn (gen_avx_movups256 (op0, op1));
12683 emit_insn (gen_avx_movupd (op0, op1));
12686 emit_insn (gen_avx_movupd256 (op0, op1));
12689 gcc_unreachable ();
12694 gcc_unreachable ();
12702 /* If we're optimizing for size, movups is the smallest. */
12703 if (optimize_insn_for_size_p ())
12705 op0 = gen_lowpart (V4SFmode, op0);
12706 op1 = gen_lowpart (V4SFmode, op1);
12707 emit_insn (gen_sse_movups (op0, op1));
12711 /* ??? If we have typed data, then it would appear that using
12712 movdqu is the only way to get unaligned data loaded with
12714 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12716 op0 = gen_lowpart (V16QImode, op0);
12717 op1 = gen_lowpart (V16QImode, op1);
12718 emit_insn (gen_sse2_movdqu (op0, op1));
12722 if (TARGET_SSE2 && mode == V2DFmode)
12726 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12728 op0 = gen_lowpart (V2DFmode, op0);
12729 op1 = gen_lowpart (V2DFmode, op1);
12730 emit_insn (gen_sse2_movupd (op0, op1));
12734 /* When SSE registers are split into halves, we can avoid
12735 writing to the top half twice. */
12736 if (TARGET_SSE_SPLIT_REGS)
12738 emit_clobber (op0);
12743 /* ??? Not sure about the best option for the Intel chips.
12744 The following would seem to satisfy; the register is
12745 entirely cleared, breaking the dependency chain. We
12746 then store to the upper half, with a dependency depth
12747 of one. A rumor has it that Intel recommends two movsd
12748 followed by an unpacklpd, but this is unconfirmed. And
12749 given that the dependency depth of the unpacklpd would
12750 still be one, I'm not sure why this would be better. */
12751 zero = CONST0_RTX (V2DFmode);
12754 m = adjust_address (op1, DFmode, 0);
12755 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12756 m = adjust_address (op1, DFmode, 8);
12757 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12761 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12763 op0 = gen_lowpart (V4SFmode, op0);
12764 op1 = gen_lowpart (V4SFmode, op1);
12765 emit_insn (gen_sse_movups (op0, op1));
12769 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12770 emit_move_insn (op0, CONST0_RTX (mode));
12772 emit_clobber (op0);
12774 if (mode != V4SFmode)
12775 op0 = gen_lowpart (V4SFmode, op0);
12776 m = adjust_address (op1, V2SFmode, 0);
12777 emit_insn (gen_sse_loadlps (op0, op0, m));
12778 m = adjust_address (op1, V2SFmode, 8);
12779 emit_insn (gen_sse_loadhps (op0, op0, m));
12782 else if (MEM_P (op0))
12784 /* If we're optimizing for size, movups is the smallest. */
12785 if (optimize_insn_for_size_p ())
12787 op0 = gen_lowpart (V4SFmode, op0);
12788 op1 = gen_lowpart (V4SFmode, op1);
12789 emit_insn (gen_sse_movups (op0, op1));
12793 /* ??? Similar to above, only less clear because of quote
12794 typeless stores unquote. */
12795 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12796 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12798 op0 = gen_lowpart (V16QImode, op0);
12799 op1 = gen_lowpart (V16QImode, op1);
12800 emit_insn (gen_sse2_movdqu (op0, op1));
12804 if (TARGET_SSE2 && mode == V2DFmode)
12806 m = adjust_address (op0, DFmode, 0);
12807 emit_insn (gen_sse2_storelpd (m, op1));
12808 m = adjust_address (op0, DFmode, 8);
12809 emit_insn (gen_sse2_storehpd (m, op1));
12813 if (mode != V4SFmode)
12814 op1 = gen_lowpart (V4SFmode, op1);
12815 m = adjust_address (op0, V2SFmode, 0);
12816 emit_insn (gen_sse_storelps (m, op1));
12817 m = adjust_address (op0, V2SFmode, 8);
12818 emit_insn (gen_sse_storehps (m, op1));
12822 gcc_unreachable ();
12825 /* Expand a push in MODE. This is some mode for which we do not support
12826 proper push instructions, at least from the registers that we expect
12827 the value to live in. */
12830 ix86_expand_push (enum machine_mode mode, rtx x)
12834 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12835 GEN_INT (-GET_MODE_SIZE (mode)),
12836 stack_pointer_rtx, 1, OPTAB_DIRECT);
12837 if (tmp != stack_pointer_rtx)
12838 emit_move_insn (stack_pointer_rtx, tmp);
12840 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12842 /* When we push an operand onto stack, it has to be aligned at least
12843 at the function argument boundary. However since we don't have
12844 the argument type, we can't determine the actual argument
12846 emit_move_insn (tmp, x);
12849 /* Helper function of ix86_fixup_binary_operands to canonicalize
12850 operand order. Returns true if the operands should be swapped. */
12853 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12856 rtx dst = operands[0];
12857 rtx src1 = operands[1];
12858 rtx src2 = operands[2];
12860 /* If the operation is not commutative, we can't do anything. */
12861 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12864 /* Highest priority is that src1 should match dst. */
12865 if (rtx_equal_p (dst, src1))
12867 if (rtx_equal_p (dst, src2))
12870 /* Next highest priority is that immediate constants come second. */
12871 if (immediate_operand (src2, mode))
12873 if (immediate_operand (src1, mode))
12876 /* Lowest priority is that memory references should come second. */
12886 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12887 destination to use for the operation. If different from the true
12888 destination in operands[0], a copy operation will be required. */
12891 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12894 rtx dst = operands[0];
12895 rtx src1 = operands[1];
12896 rtx src2 = operands[2];
12898 /* Canonicalize operand order. */
12899 if (ix86_swap_binary_operands_p (code, mode, operands))
12903 /* It is invalid to swap operands of different modes. */
12904 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12911 /* Both source operands cannot be in memory. */
12912 if (MEM_P (src1) && MEM_P (src2))
12914 /* Optimization: Only read from memory once. */
12915 if (rtx_equal_p (src1, src2))
12917 src2 = force_reg (mode, src2);
12921 src2 = force_reg (mode, src2);
12924 /* If the destination is memory, and we do not have matching source
12925 operands, do things in registers. */
12926 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12927 dst = gen_reg_rtx (mode);
12929 /* Source 1 cannot be a constant. */
12930 if (CONSTANT_P (src1))
12931 src1 = force_reg (mode, src1);
12933 /* Source 1 cannot be a non-matching memory. */
12934 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12935 src1 = force_reg (mode, src1);
12937 operands[1] = src1;
12938 operands[2] = src2;
12942 /* Similarly, but assume that the destination has already been
12943 set up properly. */
12946 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12947 enum machine_mode mode, rtx operands[])
12949 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12950 gcc_assert (dst == operands[0]);
12953 /* Attempt to expand a binary operator. Make the expansion closer to the
12954 actual machine, then just general_operand, which will allow 3 separate
12955 memory references (one output, two input) in a single insn. */
12958 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12961 rtx src1, src2, dst, op, clob;
12963 dst = ix86_fixup_binary_operands (code, mode, operands);
12964 src1 = operands[1];
12965 src2 = operands[2];
12967 /* Emit the instruction. */
12969 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12970 if (reload_in_progress)
12972 /* Reload doesn't know about the flags register, and doesn't know that
12973 it doesn't want to clobber it. We can only do this with PLUS. */
12974 gcc_assert (code == PLUS);
12979 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12980 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12983 /* Fix up the destination if needed. */
12984 if (dst != operands[0])
12985 emit_move_insn (operands[0], dst);
12988 /* Return TRUE or FALSE depending on whether the binary operator meets the
12989 appropriate constraints. */
12992 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12995 rtx dst = operands[0];
12996 rtx src1 = operands[1];
12997 rtx src2 = operands[2];
12999 /* Both source operands cannot be in memory. */
13000 if (MEM_P (src1) && MEM_P (src2))
13003 /* Canonicalize operand order for commutative operators. */
13004 if (ix86_swap_binary_operands_p (code, mode, operands))
13011 /* If the destination is memory, we must have a matching source operand. */
13012 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13015 /* Source 1 cannot be a constant. */
13016 if (CONSTANT_P (src1))
13019 /* Source 1 cannot be a non-matching memory. */
13020 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13026 /* Attempt to expand a unary operator. Make the expansion closer to the
13027 actual machine, then just general_operand, which will allow 2 separate
13028 memory references (one output, one input) in a single insn. */
13031 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13034 int matching_memory;
13035 rtx src, dst, op, clob;
13040 /* If the destination is memory, and we do not have matching source
13041 operands, do things in registers. */
13042 matching_memory = 0;
13045 if (rtx_equal_p (dst, src))
13046 matching_memory = 1;
13048 dst = gen_reg_rtx (mode);
13051 /* When source operand is memory, destination must match. */
13052 if (MEM_P (src) && !matching_memory)
13053 src = force_reg (mode, src);
13055 /* Emit the instruction. */
13057 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13058 if (reload_in_progress || code == NOT)
13060 /* Reload doesn't know about the flags register, and doesn't know that
13061 it doesn't want to clobber it. */
13062 gcc_assert (code == NOT);
13067 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13068 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13071 /* Fix up the destination if needed. */
13072 if (dst != operands[0])
13073 emit_move_insn (operands[0], dst);
13076 #define LEA_SEARCH_THRESHOLD 12
13078 /* Search backward for non-agu definition of register number REGNO1
13079 or register number REGNO2 in INSN's basic block until
13080 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13081 2. Reach BB boundary, or
13082 3. Reach agu definition.
13083 Returns the distance between the non-agu definition point and INSN.
13084 If no definition point, returns -1. */
13087 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13090 basic_block bb = BLOCK_FOR_INSN (insn);
13093 enum attr_type insn_type;
13095 if (insn != BB_HEAD (bb))
13097 rtx prev = PREV_INSN (insn);
13098 while (prev && distance < LEA_SEARCH_THRESHOLD)
13103 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13104 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13105 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13106 && (regno1 == DF_REF_REGNO (*def_rec)
13107 || regno2 == DF_REF_REGNO (*def_rec)))
13109 insn_type = get_attr_type (prev);
13110 if (insn_type != TYPE_LEA)
13114 if (prev == BB_HEAD (bb))
13116 prev = PREV_INSN (prev);
13120 if (distance < LEA_SEARCH_THRESHOLD)
13124 bool simple_loop = false;
13126 FOR_EACH_EDGE (e, ei, bb->preds)
13129 simple_loop = true;
13135 rtx prev = BB_END (bb);
13138 && distance < LEA_SEARCH_THRESHOLD)
13143 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13144 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13145 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13146 && (regno1 == DF_REF_REGNO (*def_rec)
13147 || regno2 == DF_REF_REGNO (*def_rec)))
13149 insn_type = get_attr_type (prev);
13150 if (insn_type != TYPE_LEA)
13154 prev = PREV_INSN (prev);
13162 /* get_attr_type may modify recog data. We want to make sure
13163 that recog data is valid for instruction INSN, on which
13164 distance_non_agu_define is called. INSN is unchanged here. */
13165 extract_insn_cached (insn);
13169 /* Return the distance between INSN and the next insn that uses
13170 register number REGNO0 in memory address. Return -1 if no such
13171 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13174 distance_agu_use (unsigned int regno0, rtx insn)
13176 basic_block bb = BLOCK_FOR_INSN (insn);
13181 if (insn != BB_END (bb))
13183 rtx next = NEXT_INSN (insn);
13184 while (next && distance < LEA_SEARCH_THRESHOLD)
13190 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13191 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13192 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13193 && regno0 == DF_REF_REGNO (*use_rec))
13195 /* Return DISTANCE if OP0 is used in memory
13196 address in NEXT. */
13200 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13201 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13202 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13203 && regno0 == DF_REF_REGNO (*def_rec))
13205 /* Return -1 if OP0 is set in NEXT. */
13209 if (next == BB_END (bb))
13211 next = NEXT_INSN (next);
13215 if (distance < LEA_SEARCH_THRESHOLD)
13219 bool simple_loop = false;
13221 FOR_EACH_EDGE (e, ei, bb->succs)
13224 simple_loop = true;
13230 rtx next = BB_HEAD (bb);
13233 && distance < LEA_SEARCH_THRESHOLD)
13239 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13240 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13241 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13242 && regno0 == DF_REF_REGNO (*use_rec))
13244 /* Return DISTANCE if OP0 is used in memory
13245 address in NEXT. */
13249 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13250 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13251 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13252 && regno0 == DF_REF_REGNO (*def_rec))
13254 /* Return -1 if OP0 is set in NEXT. */
13259 next = NEXT_INSN (next);
13267 /* Define this macro to tune LEA priority vs ADD, it take effect when
13268 there is a dilemma of choicing LEA or ADD
13269 Negative value: ADD is more preferred than LEA
13271 Positive value: LEA is more preferred than ADD*/
13272 #define IX86_LEA_PRIORITY 2
13274 /* Return true if it is ok to optimize an ADD operation to LEA
13275 operation to avoid flag register consumation. For the processors
13276 like ATOM, if the destination register of LEA holds an actual
13277 address which will be used soon, LEA is better and otherwise ADD
13281 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13282 rtx insn, rtx operands[])
13284 unsigned int regno0 = true_regnum (operands[0]);
13285 unsigned int regno1 = true_regnum (operands[1]);
13286 unsigned int regno2;
13288 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13289 return regno0 != regno1;
13291 regno2 = true_regnum (operands[2]);
13293 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13294 if (regno0 != regno1 && regno0 != regno2)
13298 int dist_define, dist_use;
13299 dist_define = distance_non_agu_define (regno1, regno2, insn);
13300 if (dist_define <= 0)
13303 /* If this insn has both backward non-agu dependence and forward
13304 agu dependence, the one with short distance take effect. */
13305 dist_use = distance_agu_use (regno0, insn);
13307 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13314 /* Return true if destination reg of SET_BODY is shift count of
13318 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13324 /* Retrieve destination of SET_BODY. */
13325 switch (GET_CODE (set_body))
13328 set_dest = SET_DEST (set_body);
13329 if (!set_dest || !REG_P (set_dest))
13333 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13334 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13342 /* Retrieve shift count of USE_BODY. */
13343 switch (GET_CODE (use_body))
13346 shift_rtx = XEXP (use_body, 1);
13349 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13350 if (ix86_dep_by_shift_count_body (set_body,
13351 XVECEXP (use_body, 0, i)))
13359 && (GET_CODE (shift_rtx) == ASHIFT
13360 || GET_CODE (shift_rtx) == LSHIFTRT
13361 || GET_CODE (shift_rtx) == ASHIFTRT
13362 || GET_CODE (shift_rtx) == ROTATE
13363 || GET_CODE (shift_rtx) == ROTATERT))
13365 rtx shift_count = XEXP (shift_rtx, 1);
13367 /* Return true if shift count is dest of SET_BODY. */
13368 if (REG_P (shift_count)
13369 && true_regnum (set_dest) == true_regnum (shift_count))
13376 /* Return true if destination reg of SET_INSN is shift count of
13380 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13382 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13383 PATTERN (use_insn));
13386 /* Return TRUE or FALSE depending on whether the unary operator meets the
13387 appropriate constraints. */
13390 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13391 enum machine_mode mode ATTRIBUTE_UNUSED,
13392 rtx operands[2] ATTRIBUTE_UNUSED)
13394 /* If one of operands is memory, source and destination must match. */
13395 if ((MEM_P (operands[0])
13396 || MEM_P (operands[1]))
13397 && ! rtx_equal_p (operands[0], operands[1]))
13402 /* Post-reload splitter for converting an SF or DFmode value in an
13403 SSE register into an unsigned SImode. */
13406 ix86_split_convert_uns_si_sse (rtx operands[])
13408 enum machine_mode vecmode;
13409 rtx value, large, zero_or_two31, input, two31, x;
13411 large = operands[1];
13412 zero_or_two31 = operands[2];
13413 input = operands[3];
13414 two31 = operands[4];
13415 vecmode = GET_MODE (large);
13416 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13418 /* Load up the value into the low element. We must ensure that the other
13419 elements are valid floats -- zero is the easiest such value. */
13422 if (vecmode == V4SFmode)
13423 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13425 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13429 input = gen_rtx_REG (vecmode, REGNO (input));
13430 emit_move_insn (value, CONST0_RTX (vecmode));
13431 if (vecmode == V4SFmode)
13432 emit_insn (gen_sse_movss (value, value, input));
13434 emit_insn (gen_sse2_movsd (value, value, input));
13437 emit_move_insn (large, two31);
13438 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13440 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13441 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13443 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13444 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13446 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13447 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13449 large = gen_rtx_REG (V4SImode, REGNO (large));
13450 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13452 x = gen_rtx_REG (V4SImode, REGNO (value));
13453 if (vecmode == V4SFmode)
13454 emit_insn (gen_sse2_cvttps2dq (x, value));
13456 emit_insn (gen_sse2_cvttpd2dq (x, value));
13459 emit_insn (gen_xorv4si3 (value, value, large));
13462 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13463 Expects the 64-bit DImode to be supplied in a pair of integral
13464 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13465 -mfpmath=sse, !optimize_size only. */
13468 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13470 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13471 rtx int_xmm, fp_xmm;
13472 rtx biases, exponents;
13475 int_xmm = gen_reg_rtx (V4SImode);
13476 if (TARGET_INTER_UNIT_MOVES)
13477 emit_insn (gen_movdi_to_sse (int_xmm, input));
13478 else if (TARGET_SSE_SPLIT_REGS)
13480 emit_clobber (int_xmm);
13481 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13485 x = gen_reg_rtx (V2DImode);
13486 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13487 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13490 x = gen_rtx_CONST_VECTOR (V4SImode,
13491 gen_rtvec (4, GEN_INT (0x43300000UL),
13492 GEN_INT (0x45300000UL),
13493 const0_rtx, const0_rtx));
13494 exponents = validize_mem (force_const_mem (V4SImode, x));
13496 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13497 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13499 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13500 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13501 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13502 (0x1.0p84 + double(fp_value_hi_xmm)).
13503 Note these exponents differ by 32. */
13505 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13507 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13508 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13509 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13510 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13511 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13512 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13513 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13514 biases = validize_mem (force_const_mem (V2DFmode, biases));
13515 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13517 /* Add the upper and lower DFmode values together. */
13519 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13522 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13523 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13524 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13527 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13530 /* Not used, but eases macroization of patterns. */
13532 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13533 rtx input ATTRIBUTE_UNUSED)
13535 gcc_unreachable ();
13538 /* Convert an unsigned SImode value into a DFmode. Only currently used
13539 for SSE, but applicable anywhere. */
13542 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13544 REAL_VALUE_TYPE TWO31r;
13547 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13548 NULL, 1, OPTAB_DIRECT);
13550 fp = gen_reg_rtx (DFmode);
13551 emit_insn (gen_floatsidf2 (fp, x));
13553 real_ldexp (&TWO31r, &dconst1, 31);
13554 x = const_double_from_real_value (TWO31r, DFmode);
13556 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13558 emit_move_insn (target, x);
13561 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13562 32-bit mode; otherwise we have a direct convert instruction. */
13565 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13567 REAL_VALUE_TYPE TWO32r;
13568 rtx fp_lo, fp_hi, x;
13570 fp_lo = gen_reg_rtx (DFmode);
13571 fp_hi = gen_reg_rtx (DFmode);
13573 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13575 real_ldexp (&TWO32r, &dconst1, 32);
13576 x = const_double_from_real_value (TWO32r, DFmode);
13577 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13579 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13581 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13584 emit_move_insn (target, x);
13587 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13588 For x86_32, -mfpmath=sse, !optimize_size only. */
13590 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13592 REAL_VALUE_TYPE ONE16r;
13593 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13595 real_ldexp (&ONE16r, &dconst1, 16);
13596 x = const_double_from_real_value (ONE16r, SFmode);
13597 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13598 NULL, 0, OPTAB_DIRECT);
13599 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13600 NULL, 0, OPTAB_DIRECT);
13601 fp_hi = gen_reg_rtx (SFmode);
13602 fp_lo = gen_reg_rtx (SFmode);
13603 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13604 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13605 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13607 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13609 if (!rtx_equal_p (target, fp_hi))
13610 emit_move_insn (target, fp_hi);
13613 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13614 then replicate the value for all elements of the vector
13618 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13625 v = gen_rtvec (4, value, value, value, value);
13626 return gen_rtx_CONST_VECTOR (V4SImode, v);
13630 v = gen_rtvec (2, value, value);
13631 return gen_rtx_CONST_VECTOR (V2DImode, v);
13635 v = gen_rtvec (4, value, value, value, value);
13637 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13638 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13639 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13643 v = gen_rtvec (2, value, value);
13645 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13646 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13649 gcc_unreachable ();
13653 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13654 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13655 for an SSE register. If VECT is true, then replicate the mask for
13656 all elements of the vector register. If INVERT is true, then create
13657 a mask excluding the sign bit. */
13660 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13662 enum machine_mode vec_mode, imode;
13663 HOST_WIDE_INT hi, lo;
13668 /* Find the sign bit, sign extended to 2*HWI. */
13674 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13675 lo = 0x80000000, hi = lo < 0;
13681 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13682 if (HOST_BITS_PER_WIDE_INT >= 64)
13683 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13685 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13690 vec_mode = VOIDmode;
13691 if (HOST_BITS_PER_WIDE_INT >= 64)
13694 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13701 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13705 lo = ~lo, hi = ~hi;
13711 mask = immed_double_const (lo, hi, imode);
13713 vec = gen_rtvec (2, v, mask);
13714 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13715 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13722 gcc_unreachable ();
13726 lo = ~lo, hi = ~hi;
13728 /* Force this value into the low part of a fp vector constant. */
13729 mask = immed_double_const (lo, hi, imode);
13730 mask = gen_lowpart (mode, mask);
13732 if (vec_mode == VOIDmode)
13733 return force_reg (mode, mask);
13735 v = ix86_build_const_vector (mode, vect, mask);
13736 return force_reg (vec_mode, v);
13739 /* Generate code for floating point ABS or NEG. */
13742 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13745 rtx mask, set, use, clob, dst, src;
13746 bool use_sse = false;
13747 bool vector_mode = VECTOR_MODE_P (mode);
13748 enum machine_mode elt_mode = mode;
13752 elt_mode = GET_MODE_INNER (mode);
13755 else if (mode == TFmode)
13757 else if (TARGET_SSE_MATH)
13758 use_sse = SSE_FLOAT_MODE_P (mode);
13760 /* NEG and ABS performed with SSE use bitwise mask operations.
13761 Create the appropriate mask now. */
13763 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13772 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13773 set = gen_rtx_SET (VOIDmode, dst, set);
13778 set = gen_rtx_fmt_e (code, mode, src);
13779 set = gen_rtx_SET (VOIDmode, dst, set);
13782 use = gen_rtx_USE (VOIDmode, mask);
13783 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13784 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13785 gen_rtvec (3, set, use, clob)));
13792 /* Expand a copysign operation. Special case operand 0 being a constant. */
13795 ix86_expand_copysign (rtx operands[])
13797 enum machine_mode mode;
13798 rtx dest, op0, op1, mask, nmask;
13800 dest = operands[0];
13804 mode = GET_MODE (dest);
13806 if (GET_CODE (op0) == CONST_DOUBLE)
13808 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13810 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13811 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13813 if (mode == SFmode || mode == DFmode)
13815 enum machine_mode vmode;
13817 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13819 if (op0 == CONST0_RTX (mode))
13820 op0 = CONST0_RTX (vmode);
13825 if (mode == SFmode)
13826 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13827 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13829 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13831 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13834 else if (op0 != CONST0_RTX (mode))
13835 op0 = force_reg (mode, op0);
13837 mask = ix86_build_signbit_mask (mode, 0, 0);
13839 if (mode == SFmode)
13840 copysign_insn = gen_copysignsf3_const;
13841 else if (mode == DFmode)
13842 copysign_insn = gen_copysigndf3_const;
13844 copysign_insn = gen_copysigntf3_const;
13846 emit_insn (copysign_insn (dest, op0, op1, mask));
13850 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13852 nmask = ix86_build_signbit_mask (mode, 0, 1);
13853 mask = ix86_build_signbit_mask (mode, 0, 0);
13855 if (mode == SFmode)
13856 copysign_insn = gen_copysignsf3_var;
13857 else if (mode == DFmode)
13858 copysign_insn = gen_copysigndf3_var;
13860 copysign_insn = gen_copysigntf3_var;
13862 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13866 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13867 be a constant, and so has already been expanded into a vector constant. */
13870 ix86_split_copysign_const (rtx operands[])
13872 enum machine_mode mode, vmode;
13873 rtx dest, op0, op1, mask, x;
13875 dest = operands[0];
13878 mask = operands[3];
13880 mode = GET_MODE (dest);
13881 vmode = GET_MODE (mask);
13883 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13884 x = gen_rtx_AND (vmode, dest, mask);
13885 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13887 if (op0 != CONST0_RTX (vmode))
13889 x = gen_rtx_IOR (vmode, dest, op0);
13890 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13894 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13895 so we have to do two masks. */
13898 ix86_split_copysign_var (rtx operands[])
13900 enum machine_mode mode, vmode;
13901 rtx dest, scratch, op0, op1, mask, nmask, x;
13903 dest = operands[0];
13904 scratch = operands[1];
13907 nmask = operands[4];
13908 mask = operands[5];
13910 mode = GET_MODE (dest);
13911 vmode = GET_MODE (mask);
13913 if (rtx_equal_p (op0, op1))
13915 /* Shouldn't happen often (it's useless, obviously), but when it does
13916 we'd generate incorrect code if we continue below. */
13917 emit_move_insn (dest, op0);
13921 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13923 gcc_assert (REGNO (op1) == REGNO (scratch));
13925 x = gen_rtx_AND (vmode, scratch, mask);
13926 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13929 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13930 x = gen_rtx_NOT (vmode, dest);
13931 x = gen_rtx_AND (vmode, x, op0);
13932 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13936 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13938 x = gen_rtx_AND (vmode, scratch, mask);
13940 else /* alternative 2,4 */
13942 gcc_assert (REGNO (mask) == REGNO (scratch));
13943 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13944 x = gen_rtx_AND (vmode, scratch, op1);
13946 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13948 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13950 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13951 x = gen_rtx_AND (vmode, dest, nmask);
13953 else /* alternative 3,4 */
13955 gcc_assert (REGNO (nmask) == REGNO (dest));
13957 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13958 x = gen_rtx_AND (vmode, dest, op0);
13960 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13963 x = gen_rtx_IOR (vmode, dest, scratch);
13964 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13967 /* Return TRUE or FALSE depending on whether the first SET in INSN
13968 has source and destination with matching CC modes, and that the
13969 CC mode is at least as constrained as REQ_MODE. */
13972 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13975 enum machine_mode set_mode;
13977 set = PATTERN (insn);
13978 if (GET_CODE (set) == PARALLEL)
13979 set = XVECEXP (set, 0, 0);
13980 gcc_assert (GET_CODE (set) == SET);
13981 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13983 set_mode = GET_MODE (SET_DEST (set));
13987 if (req_mode != CCNOmode
13988 && (req_mode != CCmode
13989 || XEXP (SET_SRC (set), 1) != const0_rtx))
13993 if (req_mode == CCGCmode)
13997 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14001 if (req_mode == CCZmode)
14012 gcc_unreachable ();
14015 return (GET_MODE (SET_SRC (set)) == set_mode);
14018 /* Generate insn patterns to do an integer compare of OPERANDS. */
14021 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14023 enum machine_mode cmpmode;
14026 cmpmode = SELECT_CC_MODE (code, op0, op1);
14027 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14029 /* This is very simple, but making the interface the same as in the
14030 FP case makes the rest of the code easier. */
14031 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14032 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14034 /* Return the test that should be put into the flags user, i.e.
14035 the bcc, scc, or cmov instruction. */
14036 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14039 /* Figure out whether to use ordered or unordered fp comparisons.
14040 Return the appropriate mode to use. */
14043 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14045 /* ??? In order to make all comparisons reversible, we do all comparisons
14046 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14047 all forms trapping and nontrapping comparisons, we can make inequality
14048 comparisons trapping again, since it results in better code when using
14049 FCOM based compares. */
14050 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14054 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14056 enum machine_mode mode = GET_MODE (op0);
14058 if (SCALAR_FLOAT_MODE_P (mode))
14060 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14061 return ix86_fp_compare_mode (code);
14066 /* Only zero flag is needed. */
14067 case EQ: /* ZF=0 */
14068 case NE: /* ZF!=0 */
14070 /* Codes needing carry flag. */
14071 case GEU: /* CF=0 */
14072 case LTU: /* CF=1 */
14073 /* Detect overflow checks. They need just the carry flag. */
14074 if (GET_CODE (op0) == PLUS
14075 && rtx_equal_p (op1, XEXP (op0, 0)))
14079 case GTU: /* CF=0 & ZF=0 */
14080 case LEU: /* CF=1 | ZF=1 */
14081 /* Detect overflow checks. They need just the carry flag. */
14082 if (GET_CODE (op0) == MINUS
14083 && rtx_equal_p (op1, XEXP (op0, 0)))
14087 /* Codes possibly doable only with sign flag when
14088 comparing against zero. */
14089 case GE: /* SF=OF or SF=0 */
14090 case LT: /* SF<>OF or SF=1 */
14091 if (op1 == const0_rtx)
14094 /* For other cases Carry flag is not required. */
14096 /* Codes doable only with sign flag when comparing
14097 against zero, but we miss jump instruction for it
14098 so we need to use relational tests against overflow
14099 that thus needs to be zero. */
14100 case GT: /* ZF=0 & SF=OF */
14101 case LE: /* ZF=1 | SF<>OF */
14102 if (op1 == const0_rtx)
14106 /* strcmp pattern do (use flags) and combine may ask us for proper
14111 gcc_unreachable ();
14115 /* Return the fixed registers used for condition codes. */
14118 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14125 /* If two condition code modes are compatible, return a condition code
14126 mode which is compatible with both. Otherwise, return
14129 static enum machine_mode
14130 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14135 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14138 if ((m1 == CCGCmode && m2 == CCGOCmode)
14139 || (m1 == CCGOCmode && m2 == CCGCmode))
14145 gcc_unreachable ();
14175 /* These are only compatible with themselves, which we already
14181 /* Split comparison code CODE into comparisons we can do using branch
14182 instructions. BYPASS_CODE is comparison code for branch that will
14183 branch around FIRST_CODE and SECOND_CODE. If some of branches
14184 is not required, set value to UNKNOWN.
14185 We never require more than two branches. */
14188 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14189 enum rtx_code *first_code,
14190 enum rtx_code *second_code)
14192 *first_code = code;
14193 *bypass_code = UNKNOWN;
14194 *second_code = UNKNOWN;
14196 /* The fcomi comparison sets flags as follows:
14206 case GT: /* GTU - CF=0 & ZF=0 */
14207 case GE: /* GEU - CF=0 */
14208 case ORDERED: /* PF=0 */
14209 case UNORDERED: /* PF=1 */
14210 case UNEQ: /* EQ - ZF=1 */
14211 case UNLT: /* LTU - CF=1 */
14212 case UNLE: /* LEU - CF=1 | ZF=1 */
14213 case LTGT: /* EQ - ZF=0 */
14215 case LT: /* LTU - CF=1 - fails on unordered */
14216 *first_code = UNLT;
14217 *bypass_code = UNORDERED;
14219 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14220 *first_code = UNLE;
14221 *bypass_code = UNORDERED;
14223 case EQ: /* EQ - ZF=1 - fails on unordered */
14224 *first_code = UNEQ;
14225 *bypass_code = UNORDERED;
14227 case NE: /* NE - ZF=0 - fails on unordered */
14228 *first_code = LTGT;
14229 *second_code = UNORDERED;
14231 case UNGE: /* GEU - CF=0 - fails on unordered */
14233 *second_code = UNORDERED;
14235 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14237 *second_code = UNORDERED;
14240 gcc_unreachable ();
14242 if (!TARGET_IEEE_FP)
14244 *second_code = UNKNOWN;
14245 *bypass_code = UNKNOWN;
14249 /* Return cost of comparison done fcom + arithmetics operations on AX.
14250 All following functions do use number of instructions as a cost metrics.
14251 In future this should be tweaked to compute bytes for optimize_size and
14252 take into account performance of various instructions on various CPUs. */
14254 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14256 if (!TARGET_IEEE_FP)
14258 /* The cost of code output by ix86_expand_fp_compare. */
14282 gcc_unreachable ();
14286 /* Return cost of comparison done using fcomi operation.
14287 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14289 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14291 enum rtx_code bypass_code, first_code, second_code;
14292 /* Return arbitrarily high cost when instruction is not supported - this
14293 prevents gcc from using it. */
14296 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14297 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14300 /* Return cost of comparison done using sahf operation.
14301 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14303 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14305 enum rtx_code bypass_code, first_code, second_code;
14306 /* Return arbitrarily high cost when instruction is not preferred - this
14307 avoids gcc from using it. */
14308 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14310 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14311 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14314 /* Compute cost of the comparison done using any method.
14315 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14317 ix86_fp_comparison_cost (enum rtx_code code)
14319 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14322 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14323 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14325 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14326 if (min > sahf_cost)
14328 if (min > fcomi_cost)
14333 /* Return true if we should use an FCOMI instruction for this
14337 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14339 enum rtx_code swapped_code = swap_condition (code);
14341 return ((ix86_fp_comparison_cost (code)
14342 == ix86_fp_comparison_fcomi_cost (code))
14343 || (ix86_fp_comparison_cost (swapped_code)
14344 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14347 /* Swap, force into registers, or otherwise massage the two operands
14348 to a fp comparison. The operands are updated in place; the new
14349 comparison code is returned. */
14351 static enum rtx_code
14352 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14354 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14355 rtx op0 = *pop0, op1 = *pop1;
14356 enum machine_mode op_mode = GET_MODE (op0);
14357 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14359 /* All of the unordered compare instructions only work on registers.
14360 The same is true of the fcomi compare instructions. The XFmode
14361 compare instructions require registers except when comparing
14362 against zero or when converting operand 1 from fixed point to
14366 && (fpcmp_mode == CCFPUmode
14367 || (op_mode == XFmode
14368 && ! (standard_80387_constant_p (op0) == 1
14369 || standard_80387_constant_p (op1) == 1)
14370 && GET_CODE (op1) != FLOAT)
14371 || ix86_use_fcomi_compare (code)))
14373 op0 = force_reg (op_mode, op0);
14374 op1 = force_reg (op_mode, op1);
14378 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14379 things around if they appear profitable, otherwise force op0
14380 into a register. */
14382 if (standard_80387_constant_p (op0) == 0
14384 && ! (standard_80387_constant_p (op1) == 0
14388 tmp = op0, op0 = op1, op1 = tmp;
14389 code = swap_condition (code);
14393 op0 = force_reg (op_mode, op0);
14395 if (CONSTANT_P (op1))
14397 int tmp = standard_80387_constant_p (op1);
14399 op1 = validize_mem (force_const_mem (op_mode, op1));
14403 op1 = force_reg (op_mode, op1);
14406 op1 = force_reg (op_mode, op1);
14410 /* Try to rearrange the comparison to make it cheaper. */
14411 if (ix86_fp_comparison_cost (code)
14412 > ix86_fp_comparison_cost (swap_condition (code))
14413 && (REG_P (op1) || can_create_pseudo_p ()))
14416 tmp = op0, op0 = op1, op1 = tmp;
14417 code = swap_condition (code);
14419 op0 = force_reg (op_mode, op0);
14427 /* Convert comparison codes we use to represent FP comparison to integer
14428 code that will result in proper branch. Return UNKNOWN if no such code
14432 ix86_fp_compare_code_to_integer (enum rtx_code code)
14461 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14464 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14465 rtx *second_test, rtx *bypass_test)
14467 enum machine_mode fpcmp_mode, intcmp_mode;
14469 int cost = ix86_fp_comparison_cost (code);
14470 enum rtx_code bypass_code, first_code, second_code;
14472 fpcmp_mode = ix86_fp_compare_mode (code);
14473 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14476 *second_test = NULL_RTX;
14478 *bypass_test = NULL_RTX;
14480 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14482 /* Do fcomi/sahf based test when profitable. */
14483 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14484 && (bypass_code == UNKNOWN || bypass_test)
14485 && (second_code == UNKNOWN || second_test))
14487 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14488 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14494 gcc_assert (TARGET_SAHF);
14497 scratch = gen_reg_rtx (HImode);
14498 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14500 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14503 /* The FP codes work out to act like unsigned. */
14504 intcmp_mode = fpcmp_mode;
14506 if (bypass_code != UNKNOWN)
14507 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14508 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14510 if (second_code != UNKNOWN)
14511 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14512 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14517 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14518 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14519 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14521 scratch = gen_reg_rtx (HImode);
14522 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14524 /* In the unordered case, we have to check C2 for NaN's, which
14525 doesn't happen to work out to anything nice combination-wise.
14526 So do some bit twiddling on the value we've got in AH to come
14527 up with an appropriate set of condition codes. */
14529 intcmp_mode = CCNOmode;
14534 if (code == GT || !TARGET_IEEE_FP)
14536 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14541 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14542 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14543 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14544 intcmp_mode = CCmode;
14550 if (code == LT && TARGET_IEEE_FP)
14552 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14553 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14554 intcmp_mode = CCmode;
14559 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14565 if (code == GE || !TARGET_IEEE_FP)
14567 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14572 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14573 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14580 if (code == LE && TARGET_IEEE_FP)
14582 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14583 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14584 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14585 intcmp_mode = CCmode;
14590 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14596 if (code == EQ && TARGET_IEEE_FP)
14598 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14599 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14600 intcmp_mode = CCmode;
14605 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14612 if (code == NE && TARGET_IEEE_FP)
14614 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14615 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14621 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14627 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14631 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14636 gcc_unreachable ();
14640 /* Return the test that should be put into the flags user, i.e.
14641 the bcc, scc, or cmov instruction. */
14642 return gen_rtx_fmt_ee (code, VOIDmode,
14643 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14648 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14651 op0 = ix86_compare_op0;
14652 op1 = ix86_compare_op1;
14655 *second_test = NULL_RTX;
14657 *bypass_test = NULL_RTX;
14659 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14660 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14662 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14664 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14665 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14666 second_test, bypass_test);
14669 ret = ix86_expand_int_compare (code, op0, op1);
14674 /* Return true if the CODE will result in nontrivial jump sequence. */
14676 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14678 enum rtx_code bypass_code, first_code, second_code;
14681 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14682 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14686 ix86_expand_branch (enum rtx_code code, rtx label)
14690 switch (GET_MODE (ix86_compare_op0))
14696 tmp = ix86_expand_compare (code, NULL, NULL);
14697 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14698 gen_rtx_LABEL_REF (VOIDmode, label),
14700 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14709 enum rtx_code bypass_code, first_code, second_code;
14711 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14712 &ix86_compare_op1);
14714 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14716 /* Check whether we will use the natural sequence with one jump. If
14717 so, we can expand jump early. Otherwise delay expansion by
14718 creating compound insn to not confuse optimizers. */
14719 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14721 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14722 gen_rtx_LABEL_REF (VOIDmode, label),
14723 pc_rtx, NULL_RTX, NULL_RTX);
14727 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14728 ix86_compare_op0, ix86_compare_op1);
14729 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14730 gen_rtx_LABEL_REF (VOIDmode, label),
14732 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14734 use_fcomi = ix86_use_fcomi_compare (code);
14735 vec = rtvec_alloc (3 + !use_fcomi);
14736 RTVEC_ELT (vec, 0) = tmp;
14738 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14740 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14743 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14745 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14754 /* Expand DImode branch into multiple compare+branch. */
14756 rtx lo[2], hi[2], label2;
14757 enum rtx_code code1, code2, code3;
14758 enum machine_mode submode;
14760 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14762 tmp = ix86_compare_op0;
14763 ix86_compare_op0 = ix86_compare_op1;
14764 ix86_compare_op1 = tmp;
14765 code = swap_condition (code);
14767 if (GET_MODE (ix86_compare_op0) == DImode)
14769 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14770 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14775 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14776 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14780 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14781 avoid two branches. This costs one extra insn, so disable when
14782 optimizing for size. */
14784 if ((code == EQ || code == NE)
14785 && (!optimize_insn_for_size_p ()
14786 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14791 if (hi[1] != const0_rtx)
14792 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14793 NULL_RTX, 0, OPTAB_WIDEN);
14796 if (lo[1] != const0_rtx)
14797 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14798 NULL_RTX, 0, OPTAB_WIDEN);
14800 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14801 NULL_RTX, 0, OPTAB_WIDEN);
14803 ix86_compare_op0 = tmp;
14804 ix86_compare_op1 = const0_rtx;
14805 ix86_expand_branch (code, label);
14809 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14810 op1 is a constant and the low word is zero, then we can just
14811 examine the high word. Similarly for low word -1 and
14812 less-or-equal-than or greater-than. */
14814 if (CONST_INT_P (hi[1]))
14817 case LT: case LTU: case GE: case GEU:
14818 if (lo[1] == const0_rtx)
14820 ix86_compare_op0 = hi[0];
14821 ix86_compare_op1 = hi[1];
14822 ix86_expand_branch (code, label);
14826 case LE: case LEU: case GT: case GTU:
14827 if (lo[1] == constm1_rtx)
14829 ix86_compare_op0 = hi[0];
14830 ix86_compare_op1 = hi[1];
14831 ix86_expand_branch (code, label);
14839 /* Otherwise, we need two or three jumps. */
14841 label2 = gen_label_rtx ();
14844 code2 = swap_condition (code);
14845 code3 = unsigned_condition (code);
14849 case LT: case GT: case LTU: case GTU:
14852 case LE: code1 = LT; code2 = GT; break;
14853 case GE: code1 = GT; code2 = LT; break;
14854 case LEU: code1 = LTU; code2 = GTU; break;
14855 case GEU: code1 = GTU; code2 = LTU; break;
14857 case EQ: code1 = UNKNOWN; code2 = NE; break;
14858 case NE: code2 = UNKNOWN; break;
14861 gcc_unreachable ();
14866 * if (hi(a) < hi(b)) goto true;
14867 * if (hi(a) > hi(b)) goto false;
14868 * if (lo(a) < lo(b)) goto true;
14872 ix86_compare_op0 = hi[0];
14873 ix86_compare_op1 = hi[1];
14875 if (code1 != UNKNOWN)
14876 ix86_expand_branch (code1, label);
14877 if (code2 != UNKNOWN)
14878 ix86_expand_branch (code2, label2);
14880 ix86_compare_op0 = lo[0];
14881 ix86_compare_op1 = lo[1];
14882 ix86_expand_branch (code3, label);
14884 if (code2 != UNKNOWN)
14885 emit_label (label2);
14890 /* If we have already emitted a compare insn, go straight to simple.
14891 ix86_expand_compare won't emit anything if ix86_compare_emitted
14893 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
14898 /* Split branch based on floating point condition. */
14900 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14901 rtx target1, rtx target2, rtx tmp, rtx pushed)
14903 rtx second, bypass;
14904 rtx label = NULL_RTX;
14906 int bypass_probability = -1, second_probability = -1, probability = -1;
14909 if (target2 != pc_rtx)
14912 code = reverse_condition_maybe_unordered (code);
14917 condition = ix86_expand_fp_compare (code, op1, op2,
14918 tmp, &second, &bypass);
14920 /* Remove pushed operand from stack. */
14922 ix86_free_from_memory (GET_MODE (pushed));
14924 if (split_branch_probability >= 0)
14926 /* Distribute the probabilities across the jumps.
14927 Assume the BYPASS and SECOND to be always test
14929 probability = split_branch_probability;
14931 /* Value of 1 is low enough to make no need for probability
14932 to be updated. Later we may run some experiments and see
14933 if unordered values are more frequent in practice. */
14935 bypass_probability = 1;
14937 second_probability = 1;
14939 if (bypass != NULL_RTX)
14941 label = gen_label_rtx ();
14942 i = emit_jump_insn (gen_rtx_SET
14944 gen_rtx_IF_THEN_ELSE (VOIDmode,
14946 gen_rtx_LABEL_REF (VOIDmode,
14949 if (bypass_probability >= 0)
14950 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
14952 i = emit_jump_insn (gen_rtx_SET
14954 gen_rtx_IF_THEN_ELSE (VOIDmode,
14955 condition, target1, target2)));
14956 if (probability >= 0)
14957 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
14958 if (second != NULL_RTX)
14960 i = emit_jump_insn (gen_rtx_SET
14962 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14964 if (second_probability >= 0)
14965 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
14967 if (label != NULL_RTX)
14968 emit_label (label);
14972 ix86_expand_setcc (enum rtx_code code, rtx dest)
14974 rtx ret, tmp, tmpreg, equiv;
14975 rtx second_test, bypass_test;
14977 gcc_assert (GET_MODE (dest) == QImode);
14979 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14980 PUT_MODE (ret, QImode);
14985 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14986 if (bypass_test || second_test)
14988 rtx test = second_test;
14990 rtx tmp2 = gen_reg_rtx (QImode);
14993 gcc_assert (!second_test);
14994 test = bypass_test;
14996 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14998 PUT_MODE (test, QImode);
14999 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
15002 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
15004 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
15007 /* Attach a REG_EQUAL note describing the comparison result. */
15008 if (ix86_compare_op0 && ix86_compare_op1)
15010 equiv = simplify_gen_relational (code, QImode,
15011 GET_MODE (ix86_compare_op0),
15012 ix86_compare_op0, ix86_compare_op1);
15013 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
15017 /* Expand comparison setting or clearing carry flag. Return true when
15018 successful and set pop for the operation. */
15020 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15022 enum machine_mode mode =
15023 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15025 /* Do not handle DImode compares that go through special path. */
15026 if (mode == (TARGET_64BIT ? TImode : DImode))
15029 if (SCALAR_FLOAT_MODE_P (mode))
15031 rtx second_test = NULL, bypass_test = NULL;
15032 rtx compare_op, compare_seq;
15034 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15036 /* Shortcut: following common codes never translate
15037 into carry flag compares. */
15038 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15039 || code == ORDERED || code == UNORDERED)
15042 /* These comparisons require zero flag; swap operands so they won't. */
15043 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15044 && !TARGET_IEEE_FP)
15049 code = swap_condition (code);
15052 /* Try to expand the comparison and verify that we end up with
15053 carry flag based comparison. This fails to be true only when
15054 we decide to expand comparison using arithmetic that is not
15055 too common scenario. */
15057 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15058 &second_test, &bypass_test);
15059 compare_seq = get_insns ();
15062 if (second_test || bypass_test)
15065 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15066 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15067 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15069 code = GET_CODE (compare_op);
15071 if (code != LTU && code != GEU)
15074 emit_insn (compare_seq);
15079 if (!INTEGRAL_MODE_P (mode))
15088 /* Convert a==0 into (unsigned)a<1. */
15091 if (op1 != const0_rtx)
15094 code = (code == EQ ? LTU : GEU);
15097 /* Convert a>b into b<a or a>=b-1. */
15100 if (CONST_INT_P (op1))
15102 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15103 /* Bail out on overflow. We still can swap operands but that
15104 would force loading of the constant into register. */
15105 if (op1 == const0_rtx
15106 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15108 code = (code == GTU ? GEU : LTU);
15115 code = (code == GTU ? LTU : GEU);
15119 /* Convert a>=0 into (unsigned)a<0x80000000. */
15122 if (mode == DImode || op1 != const0_rtx)
15124 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15125 code = (code == LT ? GEU : LTU);
15129 if (mode == DImode || op1 != constm1_rtx)
15131 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15132 code = (code == LE ? GEU : LTU);
15138 /* Swapping operands may cause constant to appear as first operand. */
15139 if (!nonimmediate_operand (op0, VOIDmode))
15141 if (!can_create_pseudo_p ())
15143 op0 = force_reg (mode, op0);
15145 ix86_compare_op0 = op0;
15146 ix86_compare_op1 = op1;
15147 *pop = ix86_expand_compare (code, NULL, NULL);
15148 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15153 ix86_expand_int_movcc (rtx operands[])
15155 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15156 rtx compare_seq, compare_op;
15157 rtx second_test, bypass_test;
15158 enum machine_mode mode = GET_MODE (operands[0]);
15159 bool sign_bit_compare_p = false;;
15162 ix86_compare_op0 = XEXP (operands[1], 0);
15163 ix86_compare_op1 = XEXP (operands[1], 1);
15164 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15165 compare_seq = get_insns ();
15168 compare_code = GET_CODE (compare_op);
15170 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15171 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15172 sign_bit_compare_p = true;
15174 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15175 HImode insns, we'd be swallowed in word prefix ops. */
15177 if ((mode != HImode || TARGET_FAST_PREFIX)
15178 && (mode != (TARGET_64BIT ? TImode : DImode))
15179 && CONST_INT_P (operands[2])
15180 && CONST_INT_P (operands[3]))
15182 rtx out = operands[0];
15183 HOST_WIDE_INT ct = INTVAL (operands[2]);
15184 HOST_WIDE_INT cf = INTVAL (operands[3]);
15185 HOST_WIDE_INT diff;
15188 /* Sign bit compares are better done using shifts than we do by using
15190 if (sign_bit_compare_p
15191 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15192 ix86_compare_op1, &compare_op))
15194 /* Detect overlap between destination and compare sources. */
15197 if (!sign_bit_compare_p)
15199 bool fpcmp = false;
15201 compare_code = GET_CODE (compare_op);
15203 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15204 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15207 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15210 /* To simplify rest of code, restrict to the GEU case. */
15211 if (compare_code == LTU)
15213 HOST_WIDE_INT tmp = ct;
15216 compare_code = reverse_condition (compare_code);
15217 code = reverse_condition (code);
15222 PUT_CODE (compare_op,
15223 reverse_condition_maybe_unordered
15224 (GET_CODE (compare_op)));
15226 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15230 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15231 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15232 tmp = gen_reg_rtx (mode);
15234 if (mode == DImode)
15235 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15237 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15241 if (code == GT || code == GE)
15242 code = reverse_condition (code);
15245 HOST_WIDE_INT tmp = ct;
15250 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15251 ix86_compare_op1, VOIDmode, 0, -1);
15264 tmp = expand_simple_binop (mode, PLUS,
15266 copy_rtx (tmp), 1, OPTAB_DIRECT);
15277 tmp = expand_simple_binop (mode, IOR,
15279 copy_rtx (tmp), 1, OPTAB_DIRECT);
15281 else if (diff == -1 && ct)
15291 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15293 tmp = expand_simple_binop (mode, PLUS,
15294 copy_rtx (tmp), GEN_INT (cf),
15295 copy_rtx (tmp), 1, OPTAB_DIRECT);
15303 * andl cf - ct, dest
15313 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15316 tmp = expand_simple_binop (mode, AND,
15318 gen_int_mode (cf - ct, mode),
15319 copy_rtx (tmp), 1, OPTAB_DIRECT);
15321 tmp = expand_simple_binop (mode, PLUS,
15322 copy_rtx (tmp), GEN_INT (ct),
15323 copy_rtx (tmp), 1, OPTAB_DIRECT);
15326 if (!rtx_equal_p (tmp, out))
15327 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15329 return 1; /* DONE */
15334 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15337 tmp = ct, ct = cf, cf = tmp;
15340 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15342 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15344 /* We may be reversing unordered compare to normal compare, that
15345 is not valid in general (we may convert non-trapping condition
15346 to trapping one), however on i386 we currently emit all
15347 comparisons unordered. */
15348 compare_code = reverse_condition_maybe_unordered (compare_code);
15349 code = reverse_condition_maybe_unordered (code);
15353 compare_code = reverse_condition (compare_code);
15354 code = reverse_condition (code);
15358 compare_code = UNKNOWN;
15359 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15360 && CONST_INT_P (ix86_compare_op1))
15362 if (ix86_compare_op1 == const0_rtx
15363 && (code == LT || code == GE))
15364 compare_code = code;
15365 else if (ix86_compare_op1 == constm1_rtx)
15369 else if (code == GT)
15374 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15375 if (compare_code != UNKNOWN
15376 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15377 && (cf == -1 || ct == -1))
15379 /* If lea code below could be used, only optimize
15380 if it results in a 2 insn sequence. */
15382 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15383 || diff == 3 || diff == 5 || diff == 9)
15384 || (compare_code == LT && ct == -1)
15385 || (compare_code == GE && cf == -1))
15388 * notl op1 (if necessary)
15396 code = reverse_condition (code);
15399 out = emit_store_flag (out, code, ix86_compare_op0,
15400 ix86_compare_op1, VOIDmode, 0, -1);
15402 out = expand_simple_binop (mode, IOR,
15404 out, 1, OPTAB_DIRECT);
15405 if (out != operands[0])
15406 emit_move_insn (operands[0], out);
15408 return 1; /* DONE */
15413 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15414 || diff == 3 || diff == 5 || diff == 9)
15415 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15417 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15423 * lea cf(dest*(ct-cf)),dest
15427 * This also catches the degenerate setcc-only case.
15433 out = emit_store_flag (out, code, ix86_compare_op0,
15434 ix86_compare_op1, VOIDmode, 0, 1);
15437 /* On x86_64 the lea instruction operates on Pmode, so we need
15438 to get arithmetics done in proper mode to match. */
15440 tmp = copy_rtx (out);
15444 out1 = copy_rtx (out);
15445 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15449 tmp = gen_rtx_PLUS (mode, tmp, out1);
15455 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15458 if (!rtx_equal_p (tmp, out))
15461 out = force_operand (tmp, copy_rtx (out));
15463 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15465 if (!rtx_equal_p (out, operands[0]))
15466 emit_move_insn (operands[0], copy_rtx (out));
15468 return 1; /* DONE */
15472 * General case: Jumpful:
15473 * xorl dest,dest cmpl op1, op2
15474 * cmpl op1, op2 movl ct, dest
15475 * setcc dest jcc 1f
15476 * decl dest movl cf, dest
15477 * andl (cf-ct),dest 1:
15480 * Size 20. Size 14.
15482 * This is reasonably steep, but branch mispredict costs are
15483 * high on modern cpus, so consider failing only if optimizing
15487 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15488 && BRANCH_COST (optimize_insn_for_speed_p (),
15493 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15498 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15500 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15502 /* We may be reversing unordered compare to normal compare,
15503 that is not valid in general (we may convert non-trapping
15504 condition to trapping one), however on i386 we currently
15505 emit all comparisons unordered. */
15506 code = reverse_condition_maybe_unordered (code);
15510 code = reverse_condition (code);
15511 if (compare_code != UNKNOWN)
15512 compare_code = reverse_condition (compare_code);
15516 if (compare_code != UNKNOWN)
15518 /* notl op1 (if needed)
15523 For x < 0 (resp. x <= -1) there will be no notl,
15524 so if possible swap the constants to get rid of the
15526 True/false will be -1/0 while code below (store flag
15527 followed by decrement) is 0/-1, so the constants need
15528 to be exchanged once more. */
15530 if (compare_code == GE || !cf)
15532 code = reverse_condition (code);
15537 HOST_WIDE_INT tmp = cf;
15542 out = emit_store_flag (out, code, ix86_compare_op0,
15543 ix86_compare_op1, VOIDmode, 0, -1);
15547 out = emit_store_flag (out, code, ix86_compare_op0,
15548 ix86_compare_op1, VOIDmode, 0, 1);
15550 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15551 copy_rtx (out), 1, OPTAB_DIRECT);
15554 out = expand_simple_binop (mode, AND, copy_rtx (out),
15555 gen_int_mode (cf - ct, mode),
15556 copy_rtx (out), 1, OPTAB_DIRECT);
15558 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15559 copy_rtx (out), 1, OPTAB_DIRECT);
15560 if (!rtx_equal_p (out, operands[0]))
15561 emit_move_insn (operands[0], copy_rtx (out));
15563 return 1; /* DONE */
15567 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15569 /* Try a few things more with specific constants and a variable. */
15572 rtx var, orig_out, out, tmp;
15574 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15575 return 0; /* FAIL */
15577 /* If one of the two operands is an interesting constant, load a
15578 constant with the above and mask it in with a logical operation. */
15580 if (CONST_INT_P (operands[2]))
15583 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15584 operands[3] = constm1_rtx, op = and_optab;
15585 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15586 operands[3] = const0_rtx, op = ior_optab;
15588 return 0; /* FAIL */
15590 else if (CONST_INT_P (operands[3]))
15593 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15594 operands[2] = constm1_rtx, op = and_optab;
15595 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15596 operands[2] = const0_rtx, op = ior_optab;
15598 return 0; /* FAIL */
15601 return 0; /* FAIL */
15603 orig_out = operands[0];
15604 tmp = gen_reg_rtx (mode);
15607 /* Recurse to get the constant loaded. */
15608 if (ix86_expand_int_movcc (operands) == 0)
15609 return 0; /* FAIL */
15611 /* Mask in the interesting variable. */
15612 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15614 if (!rtx_equal_p (out, orig_out))
15615 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15617 return 1; /* DONE */
15621 * For comparison with above,
15631 if (! nonimmediate_operand (operands[2], mode))
15632 operands[2] = force_reg (mode, operands[2]);
15633 if (! nonimmediate_operand (operands[3], mode))
15634 operands[3] = force_reg (mode, operands[3]);
15636 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15638 rtx tmp = gen_reg_rtx (mode);
15639 emit_move_insn (tmp, operands[3]);
15642 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15644 rtx tmp = gen_reg_rtx (mode);
15645 emit_move_insn (tmp, operands[2]);
15649 if (! register_operand (operands[2], VOIDmode)
15651 || ! register_operand (operands[3], VOIDmode)))
15652 operands[2] = force_reg (mode, operands[2]);
15655 && ! register_operand (operands[3], VOIDmode))
15656 operands[3] = force_reg (mode, operands[3]);
15658 emit_insn (compare_seq);
15659 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15660 gen_rtx_IF_THEN_ELSE (mode,
15661 compare_op, operands[2],
15664 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15665 gen_rtx_IF_THEN_ELSE (mode,
15667 copy_rtx (operands[3]),
15668 copy_rtx (operands[0]))));
15670 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15671 gen_rtx_IF_THEN_ELSE (mode,
15673 copy_rtx (operands[2]),
15674 copy_rtx (operands[0]))));
15676 return 1; /* DONE */
15679 /* Swap, force into registers, or otherwise massage the two operands
15680 to an sse comparison with a mask result. Thus we differ a bit from
15681 ix86_prepare_fp_compare_args which expects to produce a flags result.
15683 The DEST operand exists to help determine whether to commute commutative
15684 operators. The POP0/POP1 operands are updated in place. The new
15685 comparison code is returned, or UNKNOWN if not implementable. */
15687 static enum rtx_code
15688 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15689 rtx *pop0, rtx *pop1)
15697 /* We have no LTGT as an operator. We could implement it with
15698 NE & ORDERED, but this requires an extra temporary. It's
15699 not clear that it's worth it. */
15706 /* These are supported directly. */
15713 /* For commutative operators, try to canonicalize the destination
15714 operand to be first in the comparison - this helps reload to
15715 avoid extra moves. */
15716 if (!dest || !rtx_equal_p (dest, *pop1))
15724 /* These are not supported directly. Swap the comparison operands
15725 to transform into something that is supported. */
15729 code = swap_condition (code);
15733 gcc_unreachable ();
15739 /* Detect conditional moves that exactly match min/max operational
15740 semantics. Note that this is IEEE safe, as long as we don't
15741 interchange the operands.
15743 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15744 and TRUE if the operation is successful and instructions are emitted. */
15747 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15748 rtx cmp_op1, rtx if_true, rtx if_false)
15750 enum machine_mode mode;
15756 else if (code == UNGE)
15759 if_true = if_false;
15765 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15767 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15772 mode = GET_MODE (dest);
15774 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15775 but MODE may be a vector mode and thus not appropriate. */
15776 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15778 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15781 if_true = force_reg (mode, if_true);
15782 v = gen_rtvec (2, if_true, if_false);
15783 tmp = gen_rtx_UNSPEC (mode, v, u);
15787 code = is_min ? SMIN : SMAX;
15788 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15791 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15795 /* Expand an sse vector comparison. Return the register with the result. */
15798 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15799 rtx op_true, rtx op_false)
15801 enum machine_mode mode = GET_MODE (dest);
15804 cmp_op0 = force_reg (mode, cmp_op0);
15805 if (!nonimmediate_operand (cmp_op1, mode))
15806 cmp_op1 = force_reg (mode, cmp_op1);
15809 || reg_overlap_mentioned_p (dest, op_true)
15810 || reg_overlap_mentioned_p (dest, op_false))
15811 dest = gen_reg_rtx (mode);
15813 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15814 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15819 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15820 operations. This is used for both scalar and vector conditional moves. */
15823 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15825 enum machine_mode mode = GET_MODE (dest);
15828 if (op_false == CONST0_RTX (mode))
15830 op_true = force_reg (mode, op_true);
15831 x = gen_rtx_AND (mode, cmp, op_true);
15832 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15834 else if (op_true == CONST0_RTX (mode))
15836 op_false = force_reg (mode, op_false);
15837 x = gen_rtx_NOT (mode, cmp);
15838 x = gen_rtx_AND (mode, x, op_false);
15839 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15841 else if (TARGET_SSE5)
15843 rtx pcmov = gen_rtx_SET (mode, dest,
15844 gen_rtx_IF_THEN_ELSE (mode, cmp,
15851 op_true = force_reg (mode, op_true);
15852 op_false = force_reg (mode, op_false);
15854 t2 = gen_reg_rtx (mode);
15856 t3 = gen_reg_rtx (mode);
15860 x = gen_rtx_AND (mode, op_true, cmp);
15861 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15863 x = gen_rtx_NOT (mode, cmp);
15864 x = gen_rtx_AND (mode, x, op_false);
15865 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15867 x = gen_rtx_IOR (mode, t3, t2);
15868 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15872 /* Expand a floating-point conditional move. Return true if successful. */
15875 ix86_expand_fp_movcc (rtx operands[])
15877 enum machine_mode mode = GET_MODE (operands[0]);
15878 enum rtx_code code = GET_CODE (operands[1]);
15879 rtx tmp, compare_op, second_test, bypass_test;
15881 ix86_compare_op0 = XEXP (operands[1], 0);
15882 ix86_compare_op1 = XEXP (operands[1], 1);
15883 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15885 enum machine_mode cmode;
15887 /* Since we've no cmove for sse registers, don't force bad register
15888 allocation just to gain access to it. Deny movcc when the
15889 comparison mode doesn't match the move mode. */
15890 cmode = GET_MODE (ix86_compare_op0);
15891 if (cmode == VOIDmode)
15892 cmode = GET_MODE (ix86_compare_op1);
15896 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15898 &ix86_compare_op1);
15899 if (code == UNKNOWN)
15902 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15903 ix86_compare_op1, operands[2],
15907 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15908 ix86_compare_op1, operands[2], operands[3]);
15909 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15913 /* The floating point conditional move instructions don't directly
15914 support conditions resulting from a signed integer comparison. */
15916 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15918 /* The floating point conditional move instructions don't directly
15919 support signed integer comparisons. */
15921 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15923 gcc_assert (!second_test && !bypass_test);
15924 tmp = gen_reg_rtx (QImode);
15925 ix86_expand_setcc (code, tmp);
15927 ix86_compare_op0 = tmp;
15928 ix86_compare_op1 = const0_rtx;
15929 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15931 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15933 tmp = gen_reg_rtx (mode);
15934 emit_move_insn (tmp, operands[3]);
15937 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15939 tmp = gen_reg_rtx (mode);
15940 emit_move_insn (tmp, operands[2]);
15944 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15945 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15946 operands[2], operands[3])));
15948 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15949 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15950 operands[3], operands[0])));
15952 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15953 gen_rtx_IF_THEN_ELSE (mode, second_test,
15954 operands[2], operands[0])));
15959 /* Expand a floating-point vector conditional move; a vcond operation
15960 rather than a movcc operation. */
15963 ix86_expand_fp_vcond (rtx operands[])
15965 enum rtx_code code = GET_CODE (operands[3]);
15968 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15969 &operands[4], &operands[5]);
15970 if (code == UNKNOWN)
15973 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15974 operands[5], operands[1], operands[2]))
15977 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15978 operands[1], operands[2]);
15979 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15983 /* Expand a signed/unsigned integral vector conditional move. */
15986 ix86_expand_int_vcond (rtx operands[])
15988 enum machine_mode mode = GET_MODE (operands[0]);
15989 enum rtx_code code = GET_CODE (operands[3]);
15990 bool negate = false;
15993 cop0 = operands[4];
15994 cop1 = operands[5];
15996 /* SSE5 supports all of the comparisons on all vector int types. */
15999 /* Canonicalize the comparison to EQ, GT, GTU. */
16010 code = reverse_condition (code);
16016 code = reverse_condition (code);
16022 code = swap_condition (code);
16023 x = cop0, cop0 = cop1, cop1 = x;
16027 gcc_unreachable ();
16030 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16031 if (mode == V2DImode)
16036 /* SSE4.1 supports EQ. */
16037 if (!TARGET_SSE4_1)
16043 /* SSE4.2 supports GT/GTU. */
16044 if (!TARGET_SSE4_2)
16049 gcc_unreachable ();
16053 /* Unsigned parallel compare is not supported by the hardware. Play some
16054 tricks to turn this into a signed comparison against 0. */
16057 cop0 = force_reg (mode, cop0);
16066 /* Perform a parallel modulo subtraction. */
16067 t1 = gen_reg_rtx (mode);
16068 emit_insn ((mode == V4SImode
16070 : gen_subv2di3) (t1, cop0, cop1));
16072 /* Extract the original sign bit of op0. */
16073 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16075 t2 = gen_reg_rtx (mode);
16076 emit_insn ((mode == V4SImode
16078 : gen_andv2di3) (t2, cop0, mask));
16080 /* XOR it back into the result of the subtraction. This results
16081 in the sign bit set iff we saw unsigned underflow. */
16082 x = gen_reg_rtx (mode);
16083 emit_insn ((mode == V4SImode
16085 : gen_xorv2di3) (x, t1, t2));
16093 /* Perform a parallel unsigned saturating subtraction. */
16094 x = gen_reg_rtx (mode);
16095 emit_insn (gen_rtx_SET (VOIDmode, x,
16096 gen_rtx_US_MINUS (mode, cop0, cop1)));
16103 gcc_unreachable ();
16107 cop1 = CONST0_RTX (mode);
16111 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16112 operands[1+negate], operands[2-negate]);
16114 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16115 operands[2-negate]);
16119 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16120 true if we should do zero extension, else sign extension. HIGH_P is
16121 true if we want the N/2 high elements, else the low elements. */
16124 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16126 enum machine_mode imode = GET_MODE (operands[1]);
16127 rtx (*unpack)(rtx, rtx, rtx);
16134 unpack = gen_vec_interleave_highv16qi;
16136 unpack = gen_vec_interleave_lowv16qi;
16140 unpack = gen_vec_interleave_highv8hi;
16142 unpack = gen_vec_interleave_lowv8hi;
16146 unpack = gen_vec_interleave_highv4si;
16148 unpack = gen_vec_interleave_lowv4si;
16151 gcc_unreachable ();
16154 dest = gen_lowpart (imode, operands[0]);
16157 se = force_reg (imode, CONST0_RTX (imode));
16159 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16160 operands[1], pc_rtx, pc_rtx);
16162 emit_insn (unpack (dest, operands[1], se));
16165 /* This function performs the same task as ix86_expand_sse_unpack,
16166 but with SSE4.1 instructions. */
16169 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16171 enum machine_mode imode = GET_MODE (operands[1]);
16172 rtx (*unpack)(rtx, rtx);
16179 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16181 unpack = gen_sse4_1_extendv8qiv8hi2;
16185 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16187 unpack = gen_sse4_1_extendv4hiv4si2;
16191 unpack = gen_sse4_1_zero_extendv2siv2di2;
16193 unpack = gen_sse4_1_extendv2siv2di2;
16196 gcc_unreachable ();
16199 dest = operands[0];
16202 /* Shift higher 8 bytes to lower 8 bytes. */
16203 src = gen_reg_rtx (imode);
16204 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16205 gen_lowpart (TImode, operands[1]),
16211 emit_insn (unpack (dest, src));
16214 /* This function performs the same task as ix86_expand_sse_unpack,
16215 but with sse5 instructions. */
16218 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16220 enum machine_mode imode = GET_MODE (operands[1]);
16221 int pperm_bytes[16];
16223 int h = (high_p) ? 8 : 0;
16226 rtvec v = rtvec_alloc (16);
16229 rtx op0 = operands[0], op1 = operands[1];
16234 vs = rtvec_alloc (8);
16235 h2 = (high_p) ? 8 : 0;
16236 for (i = 0; i < 8; i++)
16238 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16239 pperm_bytes[2*i+1] = ((unsigned_p)
16241 : PPERM_SIGN | PPERM_SRC2 | i | h);
16244 for (i = 0; i < 16; i++)
16245 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16247 for (i = 0; i < 8; i++)
16248 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16250 p = gen_rtx_PARALLEL (VOIDmode, vs);
16251 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16253 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16255 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16259 vs = rtvec_alloc (4);
16260 h2 = (high_p) ? 4 : 0;
16261 for (i = 0; i < 4; i++)
16263 sign_extend = ((unsigned_p)
16265 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16266 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16267 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16268 pperm_bytes[4*i+2] = sign_extend;
16269 pperm_bytes[4*i+3] = sign_extend;
16272 for (i = 0; i < 16; i++)
16273 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16275 for (i = 0; i < 4; i++)
16276 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16278 p = gen_rtx_PARALLEL (VOIDmode, vs);
16279 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16281 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16283 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16287 vs = rtvec_alloc (2);
16288 h2 = (high_p) ? 2 : 0;
16289 for (i = 0; i < 2; i++)
16291 sign_extend = ((unsigned_p)
16293 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16294 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16295 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16296 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16297 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16298 pperm_bytes[8*i+4] = sign_extend;
16299 pperm_bytes[8*i+5] = sign_extend;
16300 pperm_bytes[8*i+6] = sign_extend;
16301 pperm_bytes[8*i+7] = sign_extend;
16304 for (i = 0; i < 16; i++)
16305 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16307 for (i = 0; i < 2; i++)
16308 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16310 p = gen_rtx_PARALLEL (VOIDmode, vs);
16311 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16313 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16315 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16319 gcc_unreachable ();
16325 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16326 next narrower integer vector type */
16328 ix86_expand_sse5_pack (rtx operands[3])
16330 enum machine_mode imode = GET_MODE (operands[0]);
16331 int pperm_bytes[16];
16333 rtvec v = rtvec_alloc (16);
16335 rtx op0 = operands[0];
16336 rtx op1 = operands[1];
16337 rtx op2 = operands[2];
16342 for (i = 0; i < 8; i++)
16344 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16345 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16348 for (i = 0; i < 16; i++)
16349 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16351 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16352 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16356 for (i = 0; i < 4; i++)
16358 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16359 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16360 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16361 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16364 for (i = 0; i < 16; i++)
16365 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16367 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16368 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16372 for (i = 0; i < 2; i++)
16374 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16375 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16376 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16377 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16378 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16379 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16380 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16381 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16384 for (i = 0; i < 16; i++)
16385 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16387 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16388 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16392 gcc_unreachable ();
16398 /* Expand conditional increment or decrement using adb/sbb instructions.
16399 The default case using setcc followed by the conditional move can be
16400 done by generic code. */
16402 ix86_expand_int_addcc (rtx operands[])
16404 enum rtx_code code = GET_CODE (operands[1]);
16406 rtx val = const0_rtx;
16407 bool fpcmp = false;
16408 enum machine_mode mode = GET_MODE (operands[0]);
16410 ix86_compare_op0 = XEXP (operands[1], 0);
16411 ix86_compare_op1 = XEXP (operands[1], 1);
16412 if (operands[3] != const1_rtx
16413 && operands[3] != constm1_rtx)
16415 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16416 ix86_compare_op1, &compare_op))
16418 code = GET_CODE (compare_op);
16420 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16421 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16424 code = ix86_fp_compare_code_to_integer (code);
16431 PUT_CODE (compare_op,
16432 reverse_condition_maybe_unordered
16433 (GET_CODE (compare_op)));
16435 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16437 PUT_MODE (compare_op, mode);
16439 /* Construct either adc or sbb insn. */
16440 if ((code == LTU) == (operands[3] == constm1_rtx))
16442 switch (GET_MODE (operands[0]))
16445 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16448 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16451 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16454 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16457 gcc_unreachable ();
16462 switch (GET_MODE (operands[0]))
16465 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16468 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16471 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16474 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16477 gcc_unreachable ();
16480 return 1; /* DONE */
16484 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16485 works for floating pointer parameters and nonoffsetable memories.
16486 For pushes, it returns just stack offsets; the values will be saved
16487 in the right order. Maximally three parts are generated. */
16490 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16495 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16497 size = (GET_MODE_SIZE (mode) + 4) / 8;
16499 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16500 gcc_assert (size >= 2 && size <= 4);
16502 /* Optimize constant pool reference to immediates. This is used by fp
16503 moves, that force all constants to memory to allow combining. */
16504 if (MEM_P (operand) && MEM_READONLY_P (operand))
16506 rtx tmp = maybe_get_pool_constant (operand);
16511 if (MEM_P (operand) && !offsettable_memref_p (operand))
16513 /* The only non-offsetable memories we handle are pushes. */
16514 int ok = push_operand (operand, VOIDmode);
16518 operand = copy_rtx (operand);
16519 PUT_MODE (operand, Pmode);
16520 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16524 if (GET_CODE (operand) == CONST_VECTOR)
16526 enum machine_mode imode = int_mode_for_mode (mode);
16527 /* Caution: if we looked through a constant pool memory above,
16528 the operand may actually have a different mode now. That's
16529 ok, since we want to pun this all the way back to an integer. */
16530 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16531 gcc_assert (operand != NULL);
16537 if (mode == DImode)
16538 split_di (&operand, 1, &parts[0], &parts[1]);
16543 if (REG_P (operand))
16545 gcc_assert (reload_completed);
16546 for (i = 0; i < size; i++)
16547 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16549 else if (offsettable_memref_p (operand))
16551 operand = adjust_address (operand, SImode, 0);
16552 parts[0] = operand;
16553 for (i = 1; i < size; i++)
16554 parts[i] = adjust_address (operand, SImode, 4 * i);
16556 else if (GET_CODE (operand) == CONST_DOUBLE)
16561 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16565 real_to_target (l, &r, mode);
16566 parts[3] = gen_int_mode (l[3], SImode);
16567 parts[2] = gen_int_mode (l[2], SImode);
16570 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16571 parts[2] = gen_int_mode (l[2], SImode);
16574 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16577 gcc_unreachable ();
16579 parts[1] = gen_int_mode (l[1], SImode);
16580 parts[0] = gen_int_mode (l[0], SImode);
16583 gcc_unreachable ();
16588 if (mode == TImode)
16589 split_ti (&operand, 1, &parts[0], &parts[1]);
16590 if (mode == XFmode || mode == TFmode)
16592 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16593 if (REG_P (operand))
16595 gcc_assert (reload_completed);
16596 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16597 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16599 else if (offsettable_memref_p (operand))
16601 operand = adjust_address (operand, DImode, 0);
16602 parts[0] = operand;
16603 parts[1] = adjust_address (operand, upper_mode, 8);
16605 else if (GET_CODE (operand) == CONST_DOUBLE)
16610 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16611 real_to_target (l, &r, mode);
16613 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16614 if (HOST_BITS_PER_WIDE_INT >= 64)
16617 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16618 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16621 parts[0] = immed_double_const (l[0], l[1], DImode);
16623 if (upper_mode == SImode)
16624 parts[1] = gen_int_mode (l[2], SImode);
16625 else if (HOST_BITS_PER_WIDE_INT >= 64)
16628 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16629 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16632 parts[1] = immed_double_const (l[2], l[3], DImode);
16635 gcc_unreachable ();
16642 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16643 Return false when normal moves are needed; true when all required
16644 insns have been emitted. Operands 2-4 contain the input values
16645 int the correct order; operands 5-7 contain the output values. */
16648 ix86_split_long_move (rtx operands[])
16653 int collisions = 0;
16654 enum machine_mode mode = GET_MODE (operands[0]);
16655 bool collisionparts[4];
16657 /* The DFmode expanders may ask us to move double.
16658 For 64bit target this is single move. By hiding the fact
16659 here we simplify i386.md splitters. */
16660 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16662 /* Optimize constant pool reference to immediates. This is used by
16663 fp moves, that force all constants to memory to allow combining. */
16665 if (MEM_P (operands[1])
16666 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16667 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16668 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16669 if (push_operand (operands[0], VOIDmode))
16671 operands[0] = copy_rtx (operands[0]);
16672 PUT_MODE (operands[0], Pmode);
16675 operands[0] = gen_lowpart (DImode, operands[0]);
16676 operands[1] = gen_lowpart (DImode, operands[1]);
16677 emit_move_insn (operands[0], operands[1]);
16681 /* The only non-offsettable memory we handle is push. */
16682 if (push_operand (operands[0], VOIDmode))
16685 gcc_assert (!MEM_P (operands[0])
16686 || offsettable_memref_p (operands[0]));
16688 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16689 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16691 /* When emitting push, take care for source operands on the stack. */
16692 if (push && MEM_P (operands[1])
16693 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16694 for (i = 0; i < nparts - 1; i++)
16695 part[1][i] = change_address (part[1][i],
16696 GET_MODE (part[1][i]),
16697 XEXP (part[1][i + 1], 0));
16699 /* We need to do copy in the right order in case an address register
16700 of the source overlaps the destination. */
16701 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16705 for (i = 0; i < nparts; i++)
16708 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16709 if (collisionparts[i])
16713 /* Collision in the middle part can be handled by reordering. */
16714 if (collisions == 1 && nparts == 3 && collisionparts [1])
16716 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16717 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16719 else if (collisions == 1
16721 && (collisionparts [1] || collisionparts [2]))
16723 if (collisionparts [1])
16725 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16726 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16730 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16731 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16735 /* If there are more collisions, we can't handle it by reordering.
16736 Do an lea to the last part and use only one colliding move. */
16737 else if (collisions > 1)
16743 base = part[0][nparts - 1];
16745 /* Handle the case when the last part isn't valid for lea.
16746 Happens in 64-bit mode storing the 12-byte XFmode. */
16747 if (GET_MODE (base) != Pmode)
16748 base = gen_rtx_REG (Pmode, REGNO (base));
16750 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16751 part[1][0] = replace_equiv_address (part[1][0], base);
16752 for (i = 1; i < nparts; i++)
16754 tmp = plus_constant (base, UNITS_PER_WORD * i);
16755 part[1][i] = replace_equiv_address (part[1][i], tmp);
16766 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16767 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16768 emit_move_insn (part[0][2], part[1][2]);
16770 else if (nparts == 4)
16772 emit_move_insn (part[0][3], part[1][3]);
16773 emit_move_insn (part[0][2], part[1][2]);
16778 /* In 64bit mode we don't have 32bit push available. In case this is
16779 register, it is OK - we will just use larger counterpart. We also
16780 retype memory - these comes from attempt to avoid REX prefix on
16781 moving of second half of TFmode value. */
16782 if (GET_MODE (part[1][1]) == SImode)
16784 switch (GET_CODE (part[1][1]))
16787 part[1][1] = adjust_address (part[1][1], DImode, 0);
16791 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16795 gcc_unreachable ();
16798 if (GET_MODE (part[1][0]) == SImode)
16799 part[1][0] = part[1][1];
16802 emit_move_insn (part[0][1], part[1][1]);
16803 emit_move_insn (part[0][0], part[1][0]);
16807 /* Choose correct order to not overwrite the source before it is copied. */
16808 if ((REG_P (part[0][0])
16809 && REG_P (part[1][1])
16810 && (REGNO (part[0][0]) == REGNO (part[1][1])
16812 && REGNO (part[0][0]) == REGNO (part[1][2]))
16814 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16816 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16818 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16820 operands[2 + i] = part[0][j];
16821 operands[6 + i] = part[1][j];
16826 for (i = 0; i < nparts; i++)
16828 operands[2 + i] = part[0][i];
16829 operands[6 + i] = part[1][i];
16833 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16834 if (optimize_insn_for_size_p ())
16836 for (j = 0; j < nparts - 1; j++)
16837 if (CONST_INT_P (operands[6 + j])
16838 && operands[6 + j] != const0_rtx
16839 && REG_P (operands[2 + j]))
16840 for (i = j; i < nparts - 1; i++)
16841 if (CONST_INT_P (operands[7 + i])
16842 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16843 operands[7 + i] = operands[2 + j];
16846 for (i = 0; i < nparts; i++)
16847 emit_move_insn (operands[2 + i], operands[6 + i]);
16852 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16853 left shift by a constant, either using a single shift or
16854 a sequence of add instructions. */
16857 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16861 emit_insn ((mode == DImode
16863 : gen_adddi3) (operand, operand, operand));
16865 else if (!optimize_insn_for_size_p ()
16866 && count * ix86_cost->add <= ix86_cost->shift_const)
16869 for (i=0; i<count; i++)
16871 emit_insn ((mode == DImode
16873 : gen_adddi3) (operand, operand, operand));
16877 emit_insn ((mode == DImode
16879 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16883 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16885 rtx low[2], high[2];
16887 const int single_width = mode == DImode ? 32 : 64;
16889 if (CONST_INT_P (operands[2]))
16891 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16892 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16894 if (count >= single_width)
16896 emit_move_insn (high[0], low[1]);
16897 emit_move_insn (low[0], const0_rtx);
16899 if (count > single_width)
16900 ix86_expand_ashl_const (high[0], count - single_width, mode);
16904 if (!rtx_equal_p (operands[0], operands[1]))
16905 emit_move_insn (operands[0], operands[1]);
16906 emit_insn ((mode == DImode
16908 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16909 ix86_expand_ashl_const (low[0], count, mode);
16914 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16916 if (operands[1] == const1_rtx)
16918 /* Assuming we've chosen a QImode capable registers, then 1 << N
16919 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16920 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16922 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16924 ix86_expand_clear (low[0]);
16925 ix86_expand_clear (high[0]);
16926 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16928 d = gen_lowpart (QImode, low[0]);
16929 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16930 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16931 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16933 d = gen_lowpart (QImode, high[0]);
16934 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16935 s = gen_rtx_NE (QImode, flags, const0_rtx);
16936 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16939 /* Otherwise, we can get the same results by manually performing
16940 a bit extract operation on bit 5/6, and then performing the two
16941 shifts. The two methods of getting 0/1 into low/high are exactly
16942 the same size. Avoiding the shift in the bit extract case helps
16943 pentium4 a bit; no one else seems to care much either way. */
16948 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16949 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16951 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16952 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16954 emit_insn ((mode == DImode
16956 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16957 emit_insn ((mode == DImode
16959 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16960 emit_move_insn (low[0], high[0]);
16961 emit_insn ((mode == DImode
16963 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16966 emit_insn ((mode == DImode
16968 : gen_ashldi3) (low[0], low[0], operands[2]));
16969 emit_insn ((mode == DImode
16971 : gen_ashldi3) (high[0], high[0], operands[2]));
16975 if (operands[1] == constm1_rtx)
16977 /* For -1 << N, we can avoid the shld instruction, because we
16978 know that we're shifting 0...31/63 ones into a -1. */
16979 emit_move_insn (low[0], constm1_rtx);
16980 if (optimize_insn_for_size_p ())
16981 emit_move_insn (high[0], low[0]);
16983 emit_move_insn (high[0], constm1_rtx);
16987 if (!rtx_equal_p (operands[0], operands[1]))
16988 emit_move_insn (operands[0], operands[1]);
16990 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16991 emit_insn ((mode == DImode
16993 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16996 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16998 if (TARGET_CMOVE && scratch)
17000 ix86_expand_clear (scratch);
17001 emit_insn ((mode == DImode
17002 ? gen_x86_shift_adj_1
17003 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17007 emit_insn ((mode == DImode
17008 ? gen_x86_shift_adj_2
17009 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17013 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17015 rtx low[2], high[2];
17017 const int single_width = mode == DImode ? 32 : 64;
17019 if (CONST_INT_P (operands[2]))
17021 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17022 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17024 if (count == single_width * 2 - 1)
17026 emit_move_insn (high[0], high[1]);
17027 emit_insn ((mode == DImode
17029 : gen_ashrdi3) (high[0], high[0],
17030 GEN_INT (single_width - 1)));
17031 emit_move_insn (low[0], high[0]);
17034 else if (count >= single_width)
17036 emit_move_insn (low[0], high[1]);
17037 emit_move_insn (high[0], low[0]);
17038 emit_insn ((mode == DImode
17040 : gen_ashrdi3) (high[0], high[0],
17041 GEN_INT (single_width - 1)));
17042 if (count > single_width)
17043 emit_insn ((mode == DImode
17045 : gen_ashrdi3) (low[0], low[0],
17046 GEN_INT (count - single_width)));
17050 if (!rtx_equal_p (operands[0], operands[1]))
17051 emit_move_insn (operands[0], operands[1]);
17052 emit_insn ((mode == DImode
17054 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17055 emit_insn ((mode == DImode
17057 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17062 if (!rtx_equal_p (operands[0], operands[1]))
17063 emit_move_insn (operands[0], operands[1]);
17065 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17067 emit_insn ((mode == DImode
17069 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17070 emit_insn ((mode == DImode
17072 : gen_ashrdi3) (high[0], high[0], operands[2]));
17074 if (TARGET_CMOVE && scratch)
17076 emit_move_insn (scratch, high[0]);
17077 emit_insn ((mode == DImode
17079 : gen_ashrdi3) (scratch, scratch,
17080 GEN_INT (single_width - 1)));
17081 emit_insn ((mode == DImode
17082 ? gen_x86_shift_adj_1
17083 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17087 emit_insn ((mode == DImode
17088 ? gen_x86_shift_adj_3
17089 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17094 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17096 rtx low[2], high[2];
17098 const int single_width = mode == DImode ? 32 : 64;
17100 if (CONST_INT_P (operands[2]))
17102 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17103 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17105 if (count >= single_width)
17107 emit_move_insn (low[0], high[1]);
17108 ix86_expand_clear (high[0]);
17110 if (count > single_width)
17111 emit_insn ((mode == DImode
17113 : gen_lshrdi3) (low[0], low[0],
17114 GEN_INT (count - single_width)));
17118 if (!rtx_equal_p (operands[0], operands[1]))
17119 emit_move_insn (operands[0], operands[1]);
17120 emit_insn ((mode == DImode
17122 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17123 emit_insn ((mode == DImode
17125 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17130 if (!rtx_equal_p (operands[0], operands[1]))
17131 emit_move_insn (operands[0], operands[1]);
17133 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17135 emit_insn ((mode == DImode
17137 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17138 emit_insn ((mode == DImode
17140 : gen_lshrdi3) (high[0], high[0], operands[2]));
17142 /* Heh. By reversing the arguments, we can reuse this pattern. */
17143 if (TARGET_CMOVE && scratch)
17145 ix86_expand_clear (scratch);
17146 emit_insn ((mode == DImode
17147 ? gen_x86_shift_adj_1
17148 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17152 emit_insn ((mode == DImode
17153 ? gen_x86_shift_adj_2
17154 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17158 /* Predict just emitted jump instruction to be taken with probability PROB. */
17160 predict_jump (int prob)
17162 rtx insn = get_last_insn ();
17163 gcc_assert (JUMP_P (insn));
17164 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17167 /* Helper function for the string operations below. Dest VARIABLE whether
17168 it is aligned to VALUE bytes. If true, jump to the label. */
17170 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17172 rtx label = gen_label_rtx ();
17173 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17174 if (GET_MODE (variable) == DImode)
17175 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17177 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17178 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17181 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17183 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17187 /* Adjust COUNTER by the VALUE. */
17189 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17191 if (GET_MODE (countreg) == DImode)
17192 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17194 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17197 /* Zero extend possibly SImode EXP to Pmode register. */
17199 ix86_zero_extend_to_Pmode (rtx exp)
17202 if (GET_MODE (exp) == VOIDmode)
17203 return force_reg (Pmode, exp);
17204 if (GET_MODE (exp) == Pmode)
17205 return copy_to_mode_reg (Pmode, exp);
17206 r = gen_reg_rtx (Pmode);
17207 emit_insn (gen_zero_extendsidi2 (r, exp));
17211 /* Divide COUNTREG by SCALE. */
17213 scale_counter (rtx countreg, int scale)
17216 rtx piece_size_mask;
17220 if (CONST_INT_P (countreg))
17221 return GEN_INT (INTVAL (countreg) / scale);
17222 gcc_assert (REG_P (countreg));
17224 piece_size_mask = GEN_INT (scale - 1);
17225 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17226 GEN_INT (exact_log2 (scale)),
17227 NULL, 1, OPTAB_DIRECT);
17231 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17232 DImode for constant loop counts. */
17234 static enum machine_mode
17235 counter_mode (rtx count_exp)
17237 if (GET_MODE (count_exp) != VOIDmode)
17238 return GET_MODE (count_exp);
17239 if (!CONST_INT_P (count_exp))
17241 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17246 /* When SRCPTR is non-NULL, output simple loop to move memory
17247 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17248 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17249 equivalent loop to set memory by VALUE (supposed to be in MODE).
17251 The size is rounded down to whole number of chunk size moved at once.
17252 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17256 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17257 rtx destptr, rtx srcptr, rtx value,
17258 rtx count, enum machine_mode mode, int unroll,
17261 rtx out_label, top_label, iter, tmp;
17262 enum machine_mode iter_mode = counter_mode (count);
17263 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17264 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17270 top_label = gen_label_rtx ();
17271 out_label = gen_label_rtx ();
17272 iter = gen_reg_rtx (iter_mode);
17274 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17275 NULL, 1, OPTAB_DIRECT);
17276 /* Those two should combine. */
17277 if (piece_size == const1_rtx)
17279 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17281 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17283 emit_move_insn (iter, const0_rtx);
17285 emit_label (top_label);
17287 tmp = convert_modes (Pmode, iter_mode, iter, true);
17288 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17289 destmem = change_address (destmem, mode, x_addr);
17293 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17294 srcmem = change_address (srcmem, mode, y_addr);
17296 /* When unrolling for chips that reorder memory reads and writes,
17297 we can save registers by using single temporary.
17298 Also using 4 temporaries is overkill in 32bit mode. */
17299 if (!TARGET_64BIT && 0)
17301 for (i = 0; i < unroll; i++)
17306 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17308 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17310 emit_move_insn (destmem, srcmem);
17316 gcc_assert (unroll <= 4);
17317 for (i = 0; i < unroll; i++)
17319 tmpreg[i] = gen_reg_rtx (mode);
17323 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17325 emit_move_insn (tmpreg[i], srcmem);
17327 for (i = 0; i < unroll; i++)
17332 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17334 emit_move_insn (destmem, tmpreg[i]);
17339 for (i = 0; i < unroll; i++)
17343 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17344 emit_move_insn (destmem, value);
17347 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17348 true, OPTAB_LIB_WIDEN);
17350 emit_move_insn (iter, tmp);
17352 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17354 if (expected_size != -1)
17356 expected_size /= GET_MODE_SIZE (mode) * unroll;
17357 if (expected_size == 0)
17359 else if (expected_size > REG_BR_PROB_BASE)
17360 predict_jump (REG_BR_PROB_BASE - 1);
17362 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17365 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17366 iter = ix86_zero_extend_to_Pmode (iter);
17367 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17368 true, OPTAB_LIB_WIDEN);
17369 if (tmp != destptr)
17370 emit_move_insn (destptr, tmp);
17373 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17374 true, OPTAB_LIB_WIDEN);
17376 emit_move_insn (srcptr, tmp);
17378 emit_label (out_label);
17381 /* Output "rep; mov" instruction.
17382 Arguments have same meaning as for previous function */
17384 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17385 rtx destptr, rtx srcptr,
17387 enum machine_mode mode)
17393 /* If the size is known, it is shorter to use rep movs. */
17394 if (mode == QImode && CONST_INT_P (count)
17395 && !(INTVAL (count) & 3))
17398 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17399 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17400 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17401 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17402 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17403 if (mode != QImode)
17405 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17406 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17407 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17408 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17409 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17410 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17414 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17415 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17417 if (CONST_INT_P (count))
17419 count = GEN_INT (INTVAL (count)
17420 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17421 destmem = shallow_copy_rtx (destmem);
17422 srcmem = shallow_copy_rtx (srcmem);
17423 set_mem_size (destmem, count);
17424 set_mem_size (srcmem, count);
17428 if (MEM_SIZE (destmem))
17429 set_mem_size (destmem, NULL_RTX);
17430 if (MEM_SIZE (srcmem))
17431 set_mem_size (srcmem, NULL_RTX);
17433 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17437 /* Output "rep; stos" instruction.
17438 Arguments have same meaning as for previous function */
17440 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17441 rtx count, enum machine_mode mode,
17447 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17448 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17449 value = force_reg (mode, gen_lowpart (mode, value));
17450 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17451 if (mode != QImode)
17453 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17454 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17455 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17458 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17459 if (orig_value == const0_rtx && CONST_INT_P (count))
17461 count = GEN_INT (INTVAL (count)
17462 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17463 destmem = shallow_copy_rtx (destmem);
17464 set_mem_size (destmem, count);
17466 else if (MEM_SIZE (destmem))
17467 set_mem_size (destmem, NULL_RTX);
17468 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17472 emit_strmov (rtx destmem, rtx srcmem,
17473 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17475 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17476 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17477 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17480 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17482 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17483 rtx destptr, rtx srcptr, rtx count, int max_size)
17486 if (CONST_INT_P (count))
17488 HOST_WIDE_INT countval = INTVAL (count);
17491 if ((countval & 0x10) && max_size > 16)
17495 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17496 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17499 gcc_unreachable ();
17502 if ((countval & 0x08) && max_size > 8)
17505 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17508 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17509 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17513 if ((countval & 0x04) && max_size > 4)
17515 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17518 if ((countval & 0x02) && max_size > 2)
17520 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17523 if ((countval & 0x01) && max_size > 1)
17525 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17532 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17533 count, 1, OPTAB_DIRECT);
17534 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17535 count, QImode, 1, 4);
17539 /* When there are stringops, we can cheaply increase dest and src pointers.
17540 Otherwise we save code size by maintaining offset (zero is readily
17541 available from preceding rep operation) and using x86 addressing modes.
17543 if (TARGET_SINGLE_STRINGOP)
17547 rtx label = ix86_expand_aligntest (count, 4, true);
17548 src = change_address (srcmem, SImode, srcptr);
17549 dest = change_address (destmem, SImode, destptr);
17550 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17551 emit_label (label);
17552 LABEL_NUSES (label) = 1;
17556 rtx label = ix86_expand_aligntest (count, 2, true);
17557 src = change_address (srcmem, HImode, srcptr);
17558 dest = change_address (destmem, HImode, destptr);
17559 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17560 emit_label (label);
17561 LABEL_NUSES (label) = 1;
17565 rtx label = ix86_expand_aligntest (count, 1, true);
17566 src = change_address (srcmem, QImode, srcptr);
17567 dest = change_address (destmem, QImode, destptr);
17568 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17569 emit_label (label);
17570 LABEL_NUSES (label) = 1;
17575 rtx offset = force_reg (Pmode, const0_rtx);
17580 rtx label = ix86_expand_aligntest (count, 4, true);
17581 src = change_address (srcmem, SImode, srcptr);
17582 dest = change_address (destmem, SImode, destptr);
17583 emit_move_insn (dest, src);
17584 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17585 true, OPTAB_LIB_WIDEN);
17587 emit_move_insn (offset, tmp);
17588 emit_label (label);
17589 LABEL_NUSES (label) = 1;
17593 rtx label = ix86_expand_aligntest (count, 2, true);
17594 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17595 src = change_address (srcmem, HImode, tmp);
17596 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17597 dest = change_address (destmem, HImode, tmp);
17598 emit_move_insn (dest, src);
17599 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17600 true, OPTAB_LIB_WIDEN);
17602 emit_move_insn (offset, tmp);
17603 emit_label (label);
17604 LABEL_NUSES (label) = 1;
17608 rtx label = ix86_expand_aligntest (count, 1, true);
17609 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17610 src = change_address (srcmem, QImode, tmp);
17611 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17612 dest = change_address (destmem, QImode, tmp);
17613 emit_move_insn (dest, src);
17614 emit_label (label);
17615 LABEL_NUSES (label) = 1;
17620 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17622 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17623 rtx count, int max_size)
17626 expand_simple_binop (counter_mode (count), AND, count,
17627 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17628 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17629 gen_lowpart (QImode, value), count, QImode,
17633 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17635 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17639 if (CONST_INT_P (count))
17641 HOST_WIDE_INT countval = INTVAL (count);
17644 if ((countval & 0x10) && max_size > 16)
17648 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17649 emit_insn (gen_strset (destptr, dest, value));
17650 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17651 emit_insn (gen_strset (destptr, dest, value));
17654 gcc_unreachable ();
17657 if ((countval & 0x08) && max_size > 8)
17661 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17662 emit_insn (gen_strset (destptr, dest, value));
17666 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17667 emit_insn (gen_strset (destptr, dest, value));
17668 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17669 emit_insn (gen_strset (destptr, dest, value));
17673 if ((countval & 0x04) && max_size > 4)
17675 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17676 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17679 if ((countval & 0x02) && max_size > 2)
17681 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17682 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17685 if ((countval & 0x01) && max_size > 1)
17687 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17688 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17695 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17700 rtx label = ix86_expand_aligntest (count, 16, true);
17703 dest = change_address (destmem, DImode, destptr);
17704 emit_insn (gen_strset (destptr, dest, value));
17705 emit_insn (gen_strset (destptr, dest, value));
17709 dest = change_address (destmem, SImode, destptr);
17710 emit_insn (gen_strset (destptr, dest, value));
17711 emit_insn (gen_strset (destptr, dest, value));
17712 emit_insn (gen_strset (destptr, dest, value));
17713 emit_insn (gen_strset (destptr, dest, value));
17715 emit_label (label);
17716 LABEL_NUSES (label) = 1;
17720 rtx label = ix86_expand_aligntest (count, 8, true);
17723 dest = change_address (destmem, DImode, destptr);
17724 emit_insn (gen_strset (destptr, dest, value));
17728 dest = change_address (destmem, SImode, destptr);
17729 emit_insn (gen_strset (destptr, dest, value));
17730 emit_insn (gen_strset (destptr, dest, value));
17732 emit_label (label);
17733 LABEL_NUSES (label) = 1;
17737 rtx label = ix86_expand_aligntest (count, 4, true);
17738 dest = change_address (destmem, SImode, destptr);
17739 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17740 emit_label (label);
17741 LABEL_NUSES (label) = 1;
17745 rtx label = ix86_expand_aligntest (count, 2, true);
17746 dest = change_address (destmem, HImode, destptr);
17747 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17748 emit_label (label);
17749 LABEL_NUSES (label) = 1;
17753 rtx label = ix86_expand_aligntest (count, 1, true);
17754 dest = change_address (destmem, QImode, destptr);
17755 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17756 emit_label (label);
17757 LABEL_NUSES (label) = 1;
17761 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17762 DESIRED_ALIGNMENT. */
17764 expand_movmem_prologue (rtx destmem, rtx srcmem,
17765 rtx destptr, rtx srcptr, rtx count,
17766 int align, int desired_alignment)
17768 if (align <= 1 && desired_alignment > 1)
17770 rtx label = ix86_expand_aligntest (destptr, 1, false);
17771 srcmem = change_address (srcmem, QImode, srcptr);
17772 destmem = change_address (destmem, QImode, destptr);
17773 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17774 ix86_adjust_counter (count, 1);
17775 emit_label (label);
17776 LABEL_NUSES (label) = 1;
17778 if (align <= 2 && desired_alignment > 2)
17780 rtx label = ix86_expand_aligntest (destptr, 2, false);
17781 srcmem = change_address (srcmem, HImode, srcptr);
17782 destmem = change_address (destmem, HImode, destptr);
17783 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17784 ix86_adjust_counter (count, 2);
17785 emit_label (label);
17786 LABEL_NUSES (label) = 1;
17788 if (align <= 4 && desired_alignment > 4)
17790 rtx label = ix86_expand_aligntest (destptr, 4, false);
17791 srcmem = change_address (srcmem, SImode, srcptr);
17792 destmem = change_address (destmem, SImode, destptr);
17793 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17794 ix86_adjust_counter (count, 4);
17795 emit_label (label);
17796 LABEL_NUSES (label) = 1;
17798 gcc_assert (desired_alignment <= 8);
17801 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17802 ALIGN_BYTES is how many bytes need to be copied. */
17804 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17805 int desired_align, int align_bytes)
17808 rtx src_size, dst_size;
17810 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17811 if (src_align_bytes >= 0)
17812 src_align_bytes = desired_align - src_align_bytes;
17813 src_size = MEM_SIZE (src);
17814 dst_size = MEM_SIZE (dst);
17815 if (align_bytes & 1)
17817 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17818 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17820 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17822 if (align_bytes & 2)
17824 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17825 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17826 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17827 set_mem_align (dst, 2 * BITS_PER_UNIT);
17828 if (src_align_bytes >= 0
17829 && (src_align_bytes & 1) == (align_bytes & 1)
17830 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17831 set_mem_align (src, 2 * BITS_PER_UNIT);
17833 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17835 if (align_bytes & 4)
17837 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17838 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17839 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17840 set_mem_align (dst, 4 * BITS_PER_UNIT);
17841 if (src_align_bytes >= 0)
17843 unsigned int src_align = 0;
17844 if ((src_align_bytes & 3) == (align_bytes & 3))
17846 else if ((src_align_bytes & 1) == (align_bytes & 1))
17848 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17849 set_mem_align (src, src_align * BITS_PER_UNIT);
17852 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17854 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17855 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17856 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17857 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17858 if (src_align_bytes >= 0)
17860 unsigned int src_align = 0;
17861 if ((src_align_bytes & 7) == (align_bytes & 7))
17863 else if ((src_align_bytes & 3) == (align_bytes & 3))
17865 else if ((src_align_bytes & 1) == (align_bytes & 1))
17867 if (src_align > (unsigned int) desired_align)
17868 src_align = desired_align;
17869 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17870 set_mem_align (src, src_align * BITS_PER_UNIT);
17873 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17875 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17880 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17881 DESIRED_ALIGNMENT. */
17883 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17884 int align, int desired_alignment)
17886 if (align <= 1 && desired_alignment > 1)
17888 rtx label = ix86_expand_aligntest (destptr, 1, false);
17889 destmem = change_address (destmem, QImode, destptr);
17890 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17891 ix86_adjust_counter (count, 1);
17892 emit_label (label);
17893 LABEL_NUSES (label) = 1;
17895 if (align <= 2 && desired_alignment > 2)
17897 rtx label = ix86_expand_aligntest (destptr, 2, false);
17898 destmem = change_address (destmem, HImode, destptr);
17899 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17900 ix86_adjust_counter (count, 2);
17901 emit_label (label);
17902 LABEL_NUSES (label) = 1;
17904 if (align <= 4 && desired_alignment > 4)
17906 rtx label = ix86_expand_aligntest (destptr, 4, false);
17907 destmem = change_address (destmem, SImode, destptr);
17908 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17909 ix86_adjust_counter (count, 4);
17910 emit_label (label);
17911 LABEL_NUSES (label) = 1;
17913 gcc_assert (desired_alignment <= 8);
17916 /* Set enough from DST to align DST known to by aligned by ALIGN to
17917 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17919 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17920 int desired_align, int align_bytes)
17923 rtx dst_size = MEM_SIZE (dst);
17924 if (align_bytes & 1)
17926 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17928 emit_insn (gen_strset (destreg, dst,
17929 gen_lowpart (QImode, value)));
17931 if (align_bytes & 2)
17933 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17934 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17935 set_mem_align (dst, 2 * BITS_PER_UNIT);
17937 emit_insn (gen_strset (destreg, dst,
17938 gen_lowpart (HImode, value)));
17940 if (align_bytes & 4)
17942 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17943 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17944 set_mem_align (dst, 4 * BITS_PER_UNIT);
17946 emit_insn (gen_strset (destreg, dst,
17947 gen_lowpart (SImode, value)));
17949 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17950 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17951 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17953 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17957 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17958 static enum stringop_alg
17959 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17960 int *dynamic_check)
17962 const struct stringop_algs * algs;
17963 bool optimize_for_speed;
17964 /* Algorithms using the rep prefix want at least edi and ecx;
17965 additionally, memset wants eax and memcpy wants esi. Don't
17966 consider such algorithms if the user has appropriated those
17967 registers for their own purposes. */
17968 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17970 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17972 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17973 || (alg != rep_prefix_1_byte \
17974 && alg != rep_prefix_4_byte \
17975 && alg != rep_prefix_8_byte))
17976 const struct processor_costs *cost;
17978 /* Even if the string operation call is cold, we still might spend a lot
17979 of time processing large blocks. */
17980 if (optimize_function_for_size_p (cfun)
17981 || (optimize_insn_for_size_p ()
17982 && expected_size != -1 && expected_size < 256))
17983 optimize_for_speed = false;
17985 optimize_for_speed = true;
17987 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17989 *dynamic_check = -1;
17991 algs = &cost->memset[TARGET_64BIT != 0];
17993 algs = &cost->memcpy[TARGET_64BIT != 0];
17994 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17995 return stringop_alg;
17996 /* rep; movq or rep; movl is the smallest variant. */
17997 else if (!optimize_for_speed)
17999 if (!count || (count & 3))
18000 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18002 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18004 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18006 else if (expected_size != -1 && expected_size < 4)
18007 return loop_1_byte;
18008 else if (expected_size != -1)
18011 enum stringop_alg alg = libcall;
18012 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18014 /* We get here if the algorithms that were not libcall-based
18015 were rep-prefix based and we are unable to use rep prefixes
18016 based on global register usage. Break out of the loop and
18017 use the heuristic below. */
18018 if (algs->size[i].max == 0)
18020 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18022 enum stringop_alg candidate = algs->size[i].alg;
18024 if (candidate != libcall && ALG_USABLE_P (candidate))
18026 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18027 last non-libcall inline algorithm. */
18028 if (TARGET_INLINE_ALL_STRINGOPS)
18030 /* When the current size is best to be copied by a libcall,
18031 but we are still forced to inline, run the heuristic below
18032 that will pick code for medium sized blocks. */
18033 if (alg != libcall)
18037 else if (ALG_USABLE_P (candidate))
18041 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18043 /* When asked to inline the call anyway, try to pick meaningful choice.
18044 We look for maximal size of block that is faster to copy by hand and
18045 take blocks of at most of that size guessing that average size will
18046 be roughly half of the block.
18048 If this turns out to be bad, we might simply specify the preferred
18049 choice in ix86_costs. */
18050 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18051 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18054 enum stringop_alg alg;
18056 bool any_alg_usable_p = true;
18058 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18060 enum stringop_alg candidate = algs->size[i].alg;
18061 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18063 if (candidate != libcall && candidate
18064 && ALG_USABLE_P (candidate))
18065 max = algs->size[i].max;
18067 /* If there aren't any usable algorithms, then recursing on
18068 smaller sizes isn't going to find anything. Just return the
18069 simple byte-at-a-time copy loop. */
18070 if (!any_alg_usable_p)
18072 /* Pick something reasonable. */
18073 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18074 *dynamic_check = 128;
18075 return loop_1_byte;
18079 alg = decide_alg (count, max / 2, memset, dynamic_check);
18080 gcc_assert (*dynamic_check == -1);
18081 gcc_assert (alg != libcall);
18082 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18083 *dynamic_check = max;
18086 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18087 #undef ALG_USABLE_P
18090 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18091 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18093 decide_alignment (int align,
18094 enum stringop_alg alg,
18097 int desired_align = 0;
18101 gcc_unreachable ();
18103 case unrolled_loop:
18104 desired_align = GET_MODE_SIZE (Pmode);
18106 case rep_prefix_8_byte:
18109 case rep_prefix_4_byte:
18110 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18111 copying whole cacheline at once. */
18112 if (TARGET_PENTIUMPRO)
18117 case rep_prefix_1_byte:
18118 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18119 copying whole cacheline at once. */
18120 if (TARGET_PENTIUMPRO)
18134 if (desired_align < align)
18135 desired_align = align;
18136 if (expected_size != -1 && expected_size < 4)
18137 desired_align = align;
18138 return desired_align;
18141 /* Return the smallest power of 2 greater than VAL. */
18143 smallest_pow2_greater_than (int val)
18151 /* Expand string move (memcpy) operation. Use i386 string operations when
18152 profitable. expand_setmem contains similar code. The code depends upon
18153 architecture, block size and alignment, but always has the same
18156 1) Prologue guard: Conditional that jumps up to epilogues for small
18157 blocks that can be handled by epilogue alone. This is faster but
18158 also needed for correctness, since prologue assume the block is larger
18159 than the desired alignment.
18161 Optional dynamic check for size and libcall for large
18162 blocks is emitted here too, with -minline-stringops-dynamically.
18164 2) Prologue: copy first few bytes in order to get destination aligned
18165 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18166 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18167 We emit either a jump tree on power of two sized blocks, or a byte loop.
18169 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18170 with specified algorithm.
18172 4) Epilogue: code copying tail of the block that is too small to be
18173 handled by main body (or up to size guarded by prologue guard). */
18176 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18177 rtx expected_align_exp, rtx expected_size_exp)
18183 rtx jump_around_label = NULL;
18184 HOST_WIDE_INT align = 1;
18185 unsigned HOST_WIDE_INT count = 0;
18186 HOST_WIDE_INT expected_size = -1;
18187 int size_needed = 0, epilogue_size_needed;
18188 int desired_align = 0, align_bytes = 0;
18189 enum stringop_alg alg;
18191 bool need_zero_guard = false;
18193 if (CONST_INT_P (align_exp))
18194 align = INTVAL (align_exp);
18195 /* i386 can do misaligned access on reasonably increased cost. */
18196 if (CONST_INT_P (expected_align_exp)
18197 && INTVAL (expected_align_exp) > align)
18198 align = INTVAL (expected_align_exp);
18199 /* ALIGN is the minimum of destination and source alignment, but we care here
18200 just about destination alignment. */
18201 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18202 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18204 if (CONST_INT_P (count_exp))
18205 count = expected_size = INTVAL (count_exp);
18206 if (CONST_INT_P (expected_size_exp) && count == 0)
18207 expected_size = INTVAL (expected_size_exp);
18209 /* Make sure we don't need to care about overflow later on. */
18210 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18213 /* Step 0: Decide on preferred algorithm, desired alignment and
18214 size of chunks to be copied by main loop. */
18216 alg = decide_alg (count, expected_size, false, &dynamic_check);
18217 desired_align = decide_alignment (align, alg, expected_size);
18219 if (!TARGET_ALIGN_STRINGOPS)
18220 align = desired_align;
18222 if (alg == libcall)
18224 gcc_assert (alg != no_stringop);
18226 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18227 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18228 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18233 gcc_unreachable ();
18235 need_zero_guard = true;
18236 size_needed = GET_MODE_SIZE (Pmode);
18238 case unrolled_loop:
18239 need_zero_guard = true;
18240 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18242 case rep_prefix_8_byte:
18245 case rep_prefix_4_byte:
18248 case rep_prefix_1_byte:
18252 need_zero_guard = true;
18257 epilogue_size_needed = size_needed;
18259 /* Step 1: Prologue guard. */
18261 /* Alignment code needs count to be in register. */
18262 if (CONST_INT_P (count_exp) && desired_align > align)
18264 if (INTVAL (count_exp) > desired_align
18265 && INTVAL (count_exp) > size_needed)
18268 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18269 if (align_bytes <= 0)
18272 align_bytes = desired_align - align_bytes;
18274 if (align_bytes == 0)
18275 count_exp = force_reg (counter_mode (count_exp), count_exp);
18277 gcc_assert (desired_align >= 1 && align >= 1);
18279 /* Ensure that alignment prologue won't copy past end of block. */
18280 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18282 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18283 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18284 Make sure it is power of 2. */
18285 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18289 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18291 /* If main algorithm works on QImode, no epilogue is needed.
18292 For small sizes just don't align anything. */
18293 if (size_needed == 1)
18294 desired_align = align;
18301 label = gen_label_rtx ();
18302 emit_cmp_and_jump_insns (count_exp,
18303 GEN_INT (epilogue_size_needed),
18304 LTU, 0, counter_mode (count_exp), 1, label);
18305 if (expected_size == -1 || expected_size < epilogue_size_needed)
18306 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18308 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18312 /* Emit code to decide on runtime whether library call or inline should be
18314 if (dynamic_check != -1)
18316 if (CONST_INT_P (count_exp))
18318 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18320 emit_block_move_via_libcall (dst, src, count_exp, false);
18321 count_exp = const0_rtx;
18327 rtx hot_label = gen_label_rtx ();
18328 jump_around_label = gen_label_rtx ();
18329 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18330 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18331 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18332 emit_block_move_via_libcall (dst, src, count_exp, false);
18333 emit_jump (jump_around_label);
18334 emit_label (hot_label);
18338 /* Step 2: Alignment prologue. */
18340 if (desired_align > align)
18342 if (align_bytes == 0)
18344 /* Except for the first move in epilogue, we no longer know
18345 constant offset in aliasing info. It don't seems to worth
18346 the pain to maintain it for the first move, so throw away
18348 src = change_address (src, BLKmode, srcreg);
18349 dst = change_address (dst, BLKmode, destreg);
18350 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18355 /* If we know how many bytes need to be stored before dst is
18356 sufficiently aligned, maintain aliasing info accurately. */
18357 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18358 desired_align, align_bytes);
18359 count_exp = plus_constant (count_exp, -align_bytes);
18360 count -= align_bytes;
18362 if (need_zero_guard
18363 && (count < (unsigned HOST_WIDE_INT) size_needed
18364 || (align_bytes == 0
18365 && count < ((unsigned HOST_WIDE_INT) size_needed
18366 + desired_align - align))))
18368 /* It is possible that we copied enough so the main loop will not
18370 gcc_assert (size_needed > 1);
18371 if (label == NULL_RTX)
18372 label = gen_label_rtx ();
18373 emit_cmp_and_jump_insns (count_exp,
18374 GEN_INT (size_needed),
18375 LTU, 0, counter_mode (count_exp), 1, label);
18376 if (expected_size == -1
18377 || expected_size < (desired_align - align) / 2 + size_needed)
18378 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18380 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18383 if (label && size_needed == 1)
18385 emit_label (label);
18386 LABEL_NUSES (label) = 1;
18388 epilogue_size_needed = 1;
18390 else if (label == NULL_RTX)
18391 epilogue_size_needed = size_needed;
18393 /* Step 3: Main loop. */
18399 gcc_unreachable ();
18401 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18402 count_exp, QImode, 1, expected_size);
18405 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18406 count_exp, Pmode, 1, expected_size);
18408 case unrolled_loop:
18409 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18410 registers for 4 temporaries anyway. */
18411 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18412 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18415 case rep_prefix_8_byte:
18416 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18419 case rep_prefix_4_byte:
18420 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18423 case rep_prefix_1_byte:
18424 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18428 /* Adjust properly the offset of src and dest memory for aliasing. */
18429 if (CONST_INT_P (count_exp))
18431 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18432 (count / size_needed) * size_needed);
18433 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18434 (count / size_needed) * size_needed);
18438 src = change_address (src, BLKmode, srcreg);
18439 dst = change_address (dst, BLKmode, destreg);
18442 /* Step 4: Epilogue to copy the remaining bytes. */
18446 /* When the main loop is done, COUNT_EXP might hold original count,
18447 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18448 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18449 bytes. Compensate if needed. */
18451 if (size_needed < epilogue_size_needed)
18454 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18455 GEN_INT (size_needed - 1), count_exp, 1,
18457 if (tmp != count_exp)
18458 emit_move_insn (count_exp, tmp);
18460 emit_label (label);
18461 LABEL_NUSES (label) = 1;
18464 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18465 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18466 epilogue_size_needed);
18467 if (jump_around_label)
18468 emit_label (jump_around_label);
18472 /* Helper function for memcpy. For QImode value 0xXY produce
18473 0xXYXYXYXY of wide specified by MODE. This is essentially
18474 a * 0x10101010, but we can do slightly better than
18475 synth_mult by unwinding the sequence by hand on CPUs with
18478 promote_duplicated_reg (enum machine_mode mode, rtx val)
18480 enum machine_mode valmode = GET_MODE (val);
18482 int nops = mode == DImode ? 3 : 2;
18484 gcc_assert (mode == SImode || mode == DImode);
18485 if (val == const0_rtx)
18486 return copy_to_mode_reg (mode, const0_rtx);
18487 if (CONST_INT_P (val))
18489 HOST_WIDE_INT v = INTVAL (val) & 255;
18493 if (mode == DImode)
18494 v |= (v << 16) << 16;
18495 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18498 if (valmode == VOIDmode)
18500 if (valmode != QImode)
18501 val = gen_lowpart (QImode, val);
18502 if (mode == QImode)
18504 if (!TARGET_PARTIAL_REG_STALL)
18506 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18507 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18508 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18509 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18511 rtx reg = convert_modes (mode, QImode, val, true);
18512 tmp = promote_duplicated_reg (mode, const1_rtx);
18513 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18518 rtx reg = convert_modes (mode, QImode, val, true);
18520 if (!TARGET_PARTIAL_REG_STALL)
18521 if (mode == SImode)
18522 emit_insn (gen_movsi_insv_1 (reg, reg));
18524 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18527 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18528 NULL, 1, OPTAB_DIRECT);
18530 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18532 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18533 NULL, 1, OPTAB_DIRECT);
18534 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18535 if (mode == SImode)
18537 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18538 NULL, 1, OPTAB_DIRECT);
18539 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18544 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18545 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18546 alignment from ALIGN to DESIRED_ALIGN. */
18548 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18553 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18554 promoted_val = promote_duplicated_reg (DImode, val);
18555 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18556 promoted_val = promote_duplicated_reg (SImode, val);
18557 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18558 promoted_val = promote_duplicated_reg (HImode, val);
18560 promoted_val = val;
18562 return promoted_val;
18565 /* Expand string clear operation (bzero). Use i386 string operations when
18566 profitable. See expand_movmem comment for explanation of individual
18567 steps performed. */
18569 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18570 rtx expected_align_exp, rtx expected_size_exp)
18575 rtx jump_around_label = NULL;
18576 HOST_WIDE_INT align = 1;
18577 unsigned HOST_WIDE_INT count = 0;
18578 HOST_WIDE_INT expected_size = -1;
18579 int size_needed = 0, epilogue_size_needed;
18580 int desired_align = 0, align_bytes = 0;
18581 enum stringop_alg alg;
18582 rtx promoted_val = NULL;
18583 bool force_loopy_epilogue = false;
18585 bool need_zero_guard = false;
18587 if (CONST_INT_P (align_exp))
18588 align = INTVAL (align_exp);
18589 /* i386 can do misaligned access on reasonably increased cost. */
18590 if (CONST_INT_P (expected_align_exp)
18591 && INTVAL (expected_align_exp) > align)
18592 align = INTVAL (expected_align_exp);
18593 if (CONST_INT_P (count_exp))
18594 count = expected_size = INTVAL (count_exp);
18595 if (CONST_INT_P (expected_size_exp) && count == 0)
18596 expected_size = INTVAL (expected_size_exp);
18598 /* Make sure we don't need to care about overflow later on. */
18599 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18602 /* Step 0: Decide on preferred algorithm, desired alignment and
18603 size of chunks to be copied by main loop. */
18605 alg = decide_alg (count, expected_size, true, &dynamic_check);
18606 desired_align = decide_alignment (align, alg, expected_size);
18608 if (!TARGET_ALIGN_STRINGOPS)
18609 align = desired_align;
18611 if (alg == libcall)
18613 gcc_assert (alg != no_stringop);
18615 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18616 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18621 gcc_unreachable ();
18623 need_zero_guard = true;
18624 size_needed = GET_MODE_SIZE (Pmode);
18626 case unrolled_loop:
18627 need_zero_guard = true;
18628 size_needed = GET_MODE_SIZE (Pmode) * 4;
18630 case rep_prefix_8_byte:
18633 case rep_prefix_4_byte:
18636 case rep_prefix_1_byte:
18640 need_zero_guard = true;
18644 epilogue_size_needed = size_needed;
18646 /* Step 1: Prologue guard. */
18648 /* Alignment code needs count to be in register. */
18649 if (CONST_INT_P (count_exp) && desired_align > align)
18651 if (INTVAL (count_exp) > desired_align
18652 && INTVAL (count_exp) > size_needed)
18655 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18656 if (align_bytes <= 0)
18659 align_bytes = desired_align - align_bytes;
18661 if (align_bytes == 0)
18663 enum machine_mode mode = SImode;
18664 if (TARGET_64BIT && (count & ~0xffffffff))
18666 count_exp = force_reg (mode, count_exp);
18669 /* Do the cheap promotion to allow better CSE across the
18670 main loop and epilogue (ie one load of the big constant in the
18671 front of all code. */
18672 if (CONST_INT_P (val_exp))
18673 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18674 desired_align, align);
18675 /* Ensure that alignment prologue won't copy past end of block. */
18676 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18678 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18679 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18680 Make sure it is power of 2. */
18681 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18683 /* To improve performance of small blocks, we jump around the VAL
18684 promoting mode. This mean that if the promoted VAL is not constant,
18685 we might not use it in the epilogue and have to use byte
18687 if (epilogue_size_needed > 2 && !promoted_val)
18688 force_loopy_epilogue = true;
18691 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18693 /* If main algorithm works on QImode, no epilogue is needed.
18694 For small sizes just don't align anything. */
18695 if (size_needed == 1)
18696 desired_align = align;
18703 label = gen_label_rtx ();
18704 emit_cmp_and_jump_insns (count_exp,
18705 GEN_INT (epilogue_size_needed),
18706 LTU, 0, counter_mode (count_exp), 1, label);
18707 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18708 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18710 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18713 if (dynamic_check != -1)
18715 rtx hot_label = gen_label_rtx ();
18716 jump_around_label = gen_label_rtx ();
18717 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18718 LEU, 0, counter_mode (count_exp), 1, hot_label);
18719 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18720 set_storage_via_libcall (dst, count_exp, val_exp, false);
18721 emit_jump (jump_around_label);
18722 emit_label (hot_label);
18725 /* Step 2: Alignment prologue. */
18727 /* Do the expensive promotion once we branched off the small blocks. */
18729 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18730 desired_align, align);
18731 gcc_assert (desired_align >= 1 && align >= 1);
18733 if (desired_align > align)
18735 if (align_bytes == 0)
18737 /* Except for the first move in epilogue, we no longer know
18738 constant offset in aliasing info. It don't seems to worth
18739 the pain to maintain it for the first move, so throw away
18741 dst = change_address (dst, BLKmode, destreg);
18742 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18747 /* If we know how many bytes need to be stored before dst is
18748 sufficiently aligned, maintain aliasing info accurately. */
18749 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18750 desired_align, align_bytes);
18751 count_exp = plus_constant (count_exp, -align_bytes);
18752 count -= align_bytes;
18754 if (need_zero_guard
18755 && (count < (unsigned HOST_WIDE_INT) size_needed
18756 || (align_bytes == 0
18757 && count < ((unsigned HOST_WIDE_INT) size_needed
18758 + desired_align - align))))
18760 /* It is possible that we copied enough so the main loop will not
18762 gcc_assert (size_needed > 1);
18763 if (label == NULL_RTX)
18764 label = gen_label_rtx ();
18765 emit_cmp_and_jump_insns (count_exp,
18766 GEN_INT (size_needed),
18767 LTU, 0, counter_mode (count_exp), 1, label);
18768 if (expected_size == -1
18769 || expected_size < (desired_align - align) / 2 + size_needed)
18770 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18772 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18775 if (label && size_needed == 1)
18777 emit_label (label);
18778 LABEL_NUSES (label) = 1;
18780 promoted_val = val_exp;
18781 epilogue_size_needed = 1;
18783 else if (label == NULL_RTX)
18784 epilogue_size_needed = size_needed;
18786 /* Step 3: Main loop. */
18792 gcc_unreachable ();
18794 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18795 count_exp, QImode, 1, expected_size);
18798 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18799 count_exp, Pmode, 1, expected_size);
18801 case unrolled_loop:
18802 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18803 count_exp, Pmode, 4, expected_size);
18805 case rep_prefix_8_byte:
18806 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18809 case rep_prefix_4_byte:
18810 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18813 case rep_prefix_1_byte:
18814 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18818 /* Adjust properly the offset of src and dest memory for aliasing. */
18819 if (CONST_INT_P (count_exp))
18820 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18821 (count / size_needed) * size_needed);
18823 dst = change_address (dst, BLKmode, destreg);
18825 /* Step 4: Epilogue to copy the remaining bytes. */
18829 /* When the main loop is done, COUNT_EXP might hold original count,
18830 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18831 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18832 bytes. Compensate if needed. */
18834 if (size_needed < epilogue_size_needed)
18837 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18838 GEN_INT (size_needed - 1), count_exp, 1,
18840 if (tmp != count_exp)
18841 emit_move_insn (count_exp, tmp);
18843 emit_label (label);
18844 LABEL_NUSES (label) = 1;
18847 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18849 if (force_loopy_epilogue)
18850 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18851 epilogue_size_needed);
18853 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18854 epilogue_size_needed);
18856 if (jump_around_label)
18857 emit_label (jump_around_label);
18861 /* Expand the appropriate insns for doing strlen if not just doing
18864 out = result, initialized with the start address
18865 align_rtx = alignment of the address.
18866 scratch = scratch register, initialized with the startaddress when
18867 not aligned, otherwise undefined
18869 This is just the body. It needs the initializations mentioned above and
18870 some address computing at the end. These things are done in i386.md. */
18873 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18877 rtx align_2_label = NULL_RTX;
18878 rtx align_3_label = NULL_RTX;
18879 rtx align_4_label = gen_label_rtx ();
18880 rtx end_0_label = gen_label_rtx ();
18882 rtx tmpreg = gen_reg_rtx (SImode);
18883 rtx scratch = gen_reg_rtx (SImode);
18887 if (CONST_INT_P (align_rtx))
18888 align = INTVAL (align_rtx);
18890 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18892 /* Is there a known alignment and is it less than 4? */
18895 rtx scratch1 = gen_reg_rtx (Pmode);
18896 emit_move_insn (scratch1, out);
18897 /* Is there a known alignment and is it not 2? */
18900 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18901 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18903 /* Leave just the 3 lower bits. */
18904 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18905 NULL_RTX, 0, OPTAB_WIDEN);
18907 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18908 Pmode, 1, align_4_label);
18909 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18910 Pmode, 1, align_2_label);
18911 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18912 Pmode, 1, align_3_label);
18916 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18917 check if is aligned to 4 - byte. */
18919 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18920 NULL_RTX, 0, OPTAB_WIDEN);
18922 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18923 Pmode, 1, align_4_label);
18926 mem = change_address (src, QImode, out);
18928 /* Now compare the bytes. */
18930 /* Compare the first n unaligned byte on a byte per byte basis. */
18931 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18932 QImode, 1, end_0_label);
18934 /* Increment the address. */
18935 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18937 /* Not needed with an alignment of 2 */
18940 emit_label (align_2_label);
18942 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18945 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18947 emit_label (align_3_label);
18950 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18953 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18956 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18957 align this loop. It gives only huge programs, but does not help to
18959 emit_label (align_4_label);
18961 mem = change_address (src, SImode, out);
18962 emit_move_insn (scratch, mem);
18963 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18965 /* This formula yields a nonzero result iff one of the bytes is zero.
18966 This saves three branches inside loop and many cycles. */
18968 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18969 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18970 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18971 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18972 gen_int_mode (0x80808080, SImode)));
18973 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18978 rtx reg = gen_reg_rtx (SImode);
18979 rtx reg2 = gen_reg_rtx (Pmode);
18980 emit_move_insn (reg, tmpreg);
18981 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18983 /* If zero is not in the first two bytes, move two bytes forward. */
18984 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18985 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18986 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18987 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18988 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18991 /* Emit lea manually to avoid clobbering of flags. */
18992 emit_insn (gen_rtx_SET (SImode, reg2,
18993 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18995 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18996 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18997 emit_insn (gen_rtx_SET (VOIDmode, out,
18998 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19005 rtx end_2_label = gen_label_rtx ();
19006 /* Is zero in the first two bytes? */
19008 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19009 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19010 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19011 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19012 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19014 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19015 JUMP_LABEL (tmp) = end_2_label;
19017 /* Not in the first two. Move two bytes forward. */
19018 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19019 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19021 emit_label (end_2_label);
19025 /* Avoid branch in fixing the byte. */
19026 tmpreg = gen_lowpart (QImode, tmpreg);
19027 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19028 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19029 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19031 emit_label (end_0_label);
19034 /* Expand strlen. */
19037 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19039 rtx addr, scratch1, scratch2, scratch3, scratch4;
19041 /* The generic case of strlen expander is long. Avoid it's
19042 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19044 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19045 && !TARGET_INLINE_ALL_STRINGOPS
19046 && !optimize_insn_for_size_p ()
19047 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19050 addr = force_reg (Pmode, XEXP (src, 0));
19051 scratch1 = gen_reg_rtx (Pmode);
19053 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19054 && !optimize_insn_for_size_p ())
19056 /* Well it seems that some optimizer does not combine a call like
19057 foo(strlen(bar), strlen(bar));
19058 when the move and the subtraction is done here. It does calculate
19059 the length just once when these instructions are done inside of
19060 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19061 often used and I use one fewer register for the lifetime of
19062 output_strlen_unroll() this is better. */
19064 emit_move_insn (out, addr);
19066 ix86_expand_strlensi_unroll_1 (out, src, align);
19068 /* strlensi_unroll_1 returns the address of the zero at the end of
19069 the string, like memchr(), so compute the length by subtracting
19070 the start address. */
19071 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19077 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19078 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19081 scratch2 = gen_reg_rtx (Pmode);
19082 scratch3 = gen_reg_rtx (Pmode);
19083 scratch4 = force_reg (Pmode, constm1_rtx);
19085 emit_move_insn (scratch3, addr);
19086 eoschar = force_reg (QImode, eoschar);
19088 src = replace_equiv_address_nv (src, scratch3);
19090 /* If .md starts supporting :P, this can be done in .md. */
19091 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19092 scratch4), UNSPEC_SCAS);
19093 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19094 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19095 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19100 /* For given symbol (function) construct code to compute address of it's PLT
19101 entry in large x86-64 PIC model. */
19103 construct_plt_address (rtx symbol)
19105 rtx tmp = gen_reg_rtx (Pmode);
19106 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19108 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19109 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19111 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19112 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19117 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19119 rtx pop, int sibcall)
19121 rtx use = NULL, call;
19123 if (pop == const0_rtx)
19125 gcc_assert (!TARGET_64BIT || !pop);
19127 if (TARGET_MACHO && !TARGET_64BIT)
19130 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19131 fnaddr = machopic_indirect_call_target (fnaddr);
19136 /* Static functions and indirect calls don't need the pic register. */
19137 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19138 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19139 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19140 use_reg (&use, pic_offset_table_rtx);
19143 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19145 rtx al = gen_rtx_REG (QImode, AX_REG);
19146 emit_move_insn (al, callarg2);
19147 use_reg (&use, al);
19150 if (ix86_cmodel == CM_LARGE_PIC
19152 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19153 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19154 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19155 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19157 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19158 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19160 if (sibcall && TARGET_64BIT
19161 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19164 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19165 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19166 emit_move_insn (fnaddr, addr);
19167 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19170 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19172 call = gen_rtx_SET (VOIDmode, retval, call);
19175 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19176 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19177 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19180 && ix86_cfun_abi () == MS_ABI
19181 && (!callarg2 || INTVAL (callarg2) != -2))
19183 /* We need to represent that SI and DI registers are clobbered
19185 static int clobbered_registers[] = {
19186 XMM6_REG, XMM7_REG, XMM8_REG,
19187 XMM9_REG, XMM10_REG, XMM11_REG,
19188 XMM12_REG, XMM13_REG, XMM14_REG,
19189 XMM15_REG, SI_REG, DI_REG
19192 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19193 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19194 UNSPEC_MS_TO_SYSV_CALL);
19198 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19199 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19202 (SSE_REGNO_P (clobbered_registers[i])
19204 clobbered_registers[i]));
19206 call = gen_rtx_PARALLEL (VOIDmode,
19207 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19211 call = emit_call_insn (call);
19213 CALL_INSN_FUNCTION_USAGE (call) = use;
19217 /* Clear stack slot assignments remembered from previous functions.
19218 This is called from INIT_EXPANDERS once before RTL is emitted for each
19221 static struct machine_function *
19222 ix86_init_machine_status (void)
19224 struct machine_function *f;
19226 f = GGC_CNEW (struct machine_function);
19227 f->use_fast_prologue_epilogue_nregs = -1;
19228 f->tls_descriptor_call_expanded_p = 0;
19229 f->call_abi = ix86_abi;
19234 /* Return a MEM corresponding to a stack slot with mode MODE.
19235 Allocate a new slot if necessary.
19237 The RTL for a function can have several slots available: N is
19238 which slot to use. */
19241 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19243 struct stack_local_entry *s;
19245 gcc_assert (n < MAX_386_STACK_LOCALS);
19247 /* Virtual slot is valid only before vregs are instantiated. */
19248 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19250 for (s = ix86_stack_locals; s; s = s->next)
19251 if (s->mode == mode && s->n == n)
19252 return copy_rtx (s->rtl);
19254 s = (struct stack_local_entry *)
19255 ggc_alloc (sizeof (struct stack_local_entry));
19258 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19260 s->next = ix86_stack_locals;
19261 ix86_stack_locals = s;
19265 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19267 static GTY(()) rtx ix86_tls_symbol;
19269 ix86_tls_get_addr (void)
19272 if (!ix86_tls_symbol)
19274 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19275 (TARGET_ANY_GNU_TLS
19277 ? "___tls_get_addr"
19278 : "__tls_get_addr");
19281 return ix86_tls_symbol;
19284 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19286 static GTY(()) rtx ix86_tls_module_base_symbol;
19288 ix86_tls_module_base (void)
19291 if (!ix86_tls_module_base_symbol)
19293 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19294 "_TLS_MODULE_BASE_");
19295 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19296 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19299 return ix86_tls_module_base_symbol;
19302 /* Calculate the length of the memory address in the instruction
19303 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19306 memory_address_length (rtx addr)
19308 struct ix86_address parts;
19309 rtx base, index, disp;
19313 if (GET_CODE (addr) == PRE_DEC
19314 || GET_CODE (addr) == POST_INC
19315 || GET_CODE (addr) == PRE_MODIFY
19316 || GET_CODE (addr) == POST_MODIFY)
19319 ok = ix86_decompose_address (addr, &parts);
19322 if (parts.base && GET_CODE (parts.base) == SUBREG)
19323 parts.base = SUBREG_REG (parts.base);
19324 if (parts.index && GET_CODE (parts.index) == SUBREG)
19325 parts.index = SUBREG_REG (parts.index);
19328 index = parts.index;
19333 - esp as the base always wants an index,
19334 - ebp as the base always wants a displacement,
19335 - r12 as the base always wants an index,
19336 - r13 as the base always wants a displacement. */
19338 /* Register Indirect. */
19339 if (base && !index && !disp)
19341 /* esp (for its index) and ebp (for its displacement) need
19342 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19345 && (addr == arg_pointer_rtx
19346 || addr == frame_pointer_rtx
19347 || REGNO (addr) == SP_REG
19348 || REGNO (addr) == BP_REG
19349 || REGNO (addr) == R12_REG
19350 || REGNO (addr) == R13_REG))
19354 /* Direct Addressing. */
19355 else if (disp && !base && !index)
19360 /* Find the length of the displacement constant. */
19363 if (base && satisfies_constraint_K (disp))
19368 /* ebp always wants a displacement. Similarly r13. */
19369 else if (REG_P (base)
19370 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19373 /* An index requires the two-byte modrm form.... */
19375 /* ...like esp (or r12), which always wants an index. */
19376 || base == arg_pointer_rtx
19377 || base == frame_pointer_rtx
19379 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19386 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19387 is set, expect that insn have 8bit immediate alternative. */
19389 ix86_attr_length_immediate_default (rtx insn, int shortform)
19393 extract_insn_cached (insn);
19394 for (i = recog_data.n_operands - 1; i >= 0; --i)
19395 if (CONSTANT_P (recog_data.operand[i]))
19398 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19402 switch (get_attr_mode (insn))
19413 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19418 fatal_insn ("unknown insn mode", insn);
19424 /* Compute default value for "length_address" attribute. */
19426 ix86_attr_length_address_default (rtx insn)
19430 if (get_attr_type (insn) == TYPE_LEA)
19432 rtx set = PATTERN (insn), addr;
19434 if (GET_CODE (set) == PARALLEL)
19435 set = XVECEXP (set, 0, 0);
19437 gcc_assert (GET_CODE (set) == SET);
19439 addr = SET_SRC (set);
19440 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19442 if (GET_CODE (addr) == ZERO_EXTEND)
19443 addr = XEXP (addr, 0);
19444 if (GET_CODE (addr) == SUBREG)
19445 addr = SUBREG_REG (addr);
19448 return memory_address_length (addr);
19451 extract_insn_cached (insn);
19452 for (i = recog_data.n_operands - 1; i >= 0; --i)
19453 if (MEM_P (recog_data.operand[i]))
19455 return memory_address_length (XEXP (recog_data.operand[i], 0));
19461 /* Compute default value for "length_vex" attribute. It includes
19462 2 or 3 byte VEX prefix and 1 opcode byte. */
19465 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19470 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19471 byte VEX prefix. */
19472 if (!has_0f_opcode || has_vex_w)
19475 /* We can always use 2 byte VEX prefix in 32bit. */
19479 extract_insn_cached (insn);
19481 for (i = recog_data.n_operands - 1; i >= 0; --i)
19482 if (REG_P (recog_data.operand[i]))
19484 /* REX.W bit uses 3 byte VEX prefix. */
19485 if (GET_MODE (recog_data.operand[i]) == DImode)
19490 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19491 if (MEM_P (recog_data.operand[i])
19492 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19499 /* Return the maximum number of instructions a cpu can issue. */
19502 ix86_issue_rate (void)
19506 case PROCESSOR_PENTIUM:
19507 case PROCESSOR_ATOM:
19511 case PROCESSOR_PENTIUMPRO:
19512 case PROCESSOR_PENTIUM4:
19513 case PROCESSOR_ATHLON:
19515 case PROCESSOR_AMDFAM10:
19516 case PROCESSOR_NOCONA:
19517 case PROCESSOR_GENERIC32:
19518 case PROCESSOR_GENERIC64:
19521 case PROCESSOR_CORE2:
19529 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19530 by DEP_INSN and nothing set by DEP_INSN. */
19533 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19537 /* Simplify the test for uninteresting insns. */
19538 if (insn_type != TYPE_SETCC
19539 && insn_type != TYPE_ICMOV
19540 && insn_type != TYPE_FCMOV
19541 && insn_type != TYPE_IBR)
19544 if ((set = single_set (dep_insn)) != 0)
19546 set = SET_DEST (set);
19549 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19550 && XVECLEN (PATTERN (dep_insn), 0) == 2
19551 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19552 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19554 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19555 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19560 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19563 /* This test is true if the dependent insn reads the flags but
19564 not any other potentially set register. */
19565 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19568 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19574 /* Return true iff USE_INSN has a memory address with operands set by
19578 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19581 extract_insn_cached (use_insn);
19582 for (i = recog_data.n_operands - 1; i >= 0; --i)
19583 if (MEM_P (recog_data.operand[i]))
19585 rtx addr = XEXP (recog_data.operand[i], 0);
19586 return modified_in_p (addr, set_insn) != 0;
19592 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19594 enum attr_type insn_type, dep_insn_type;
19595 enum attr_memory memory;
19597 int dep_insn_code_number;
19599 /* Anti and output dependencies have zero cost on all CPUs. */
19600 if (REG_NOTE_KIND (link) != 0)
19603 dep_insn_code_number = recog_memoized (dep_insn);
19605 /* If we can't recognize the insns, we can't really do anything. */
19606 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19609 insn_type = get_attr_type (insn);
19610 dep_insn_type = get_attr_type (dep_insn);
19614 case PROCESSOR_PENTIUM:
19615 /* Address Generation Interlock adds a cycle of latency. */
19616 if (insn_type == TYPE_LEA)
19618 rtx addr = PATTERN (insn);
19620 if (GET_CODE (addr) == PARALLEL)
19621 addr = XVECEXP (addr, 0, 0);
19623 gcc_assert (GET_CODE (addr) == SET);
19625 addr = SET_SRC (addr);
19626 if (modified_in_p (addr, dep_insn))
19629 else if (ix86_agi_dependent (dep_insn, insn))
19632 /* ??? Compares pair with jump/setcc. */
19633 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19636 /* Floating point stores require value to be ready one cycle earlier. */
19637 if (insn_type == TYPE_FMOV
19638 && get_attr_memory (insn) == MEMORY_STORE
19639 && !ix86_agi_dependent (dep_insn, insn))
19643 case PROCESSOR_PENTIUMPRO:
19644 memory = get_attr_memory (insn);
19646 /* INT->FP conversion is expensive. */
19647 if (get_attr_fp_int_src (dep_insn))
19650 /* There is one cycle extra latency between an FP op and a store. */
19651 if (insn_type == TYPE_FMOV
19652 && (set = single_set (dep_insn)) != NULL_RTX
19653 && (set2 = single_set (insn)) != NULL_RTX
19654 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19655 && MEM_P (SET_DEST (set2)))
19658 /* Show ability of reorder buffer to hide latency of load by executing
19659 in parallel with previous instruction in case
19660 previous instruction is not needed to compute the address. */
19661 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19662 && !ix86_agi_dependent (dep_insn, insn))
19664 /* Claim moves to take one cycle, as core can issue one load
19665 at time and the next load can start cycle later. */
19666 if (dep_insn_type == TYPE_IMOV
19667 || dep_insn_type == TYPE_FMOV)
19675 memory = get_attr_memory (insn);
19677 /* The esp dependency is resolved before the instruction is really
19679 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19680 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19683 /* INT->FP conversion is expensive. */
19684 if (get_attr_fp_int_src (dep_insn))
19687 /* Show ability of reorder buffer to hide latency of load by executing
19688 in parallel with previous instruction in case
19689 previous instruction is not needed to compute the address. */
19690 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19691 && !ix86_agi_dependent (dep_insn, insn))
19693 /* Claim moves to take one cycle, as core can issue one load
19694 at time and the next load can start cycle later. */
19695 if (dep_insn_type == TYPE_IMOV
19696 || dep_insn_type == TYPE_FMOV)
19705 case PROCESSOR_ATHLON:
19707 case PROCESSOR_AMDFAM10:
19708 case PROCESSOR_ATOM:
19709 case PROCESSOR_GENERIC32:
19710 case PROCESSOR_GENERIC64:
19711 memory = get_attr_memory (insn);
19713 /* Show ability of reorder buffer to hide latency of load by executing
19714 in parallel with previous instruction in case
19715 previous instruction is not needed to compute the address. */
19716 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19717 && !ix86_agi_dependent (dep_insn, insn))
19719 enum attr_unit unit = get_attr_unit (insn);
19722 /* Because of the difference between the length of integer and
19723 floating unit pipeline preparation stages, the memory operands
19724 for floating point are cheaper.
19726 ??? For Athlon it the difference is most probably 2. */
19727 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19730 loadcost = TARGET_ATHLON ? 2 : 0;
19732 if (cost >= loadcost)
19745 /* How many alternative schedules to try. This should be as wide as the
19746 scheduling freedom in the DFA, but no wider. Making this value too
19747 large results extra work for the scheduler. */
19750 ia32_multipass_dfa_lookahead (void)
19754 case PROCESSOR_PENTIUM:
19757 case PROCESSOR_PENTIUMPRO:
19767 /* Compute the alignment given to a constant that is being placed in memory.
19768 EXP is the constant and ALIGN is the alignment that the object would
19770 The value of this function is used instead of that alignment to align
19774 ix86_constant_alignment (tree exp, int align)
19776 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19777 || TREE_CODE (exp) == INTEGER_CST)
19779 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19781 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19784 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19785 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19786 return BITS_PER_WORD;
19791 /* Compute the alignment for a static variable.
19792 TYPE is the data type, and ALIGN is the alignment that
19793 the object would ordinarily have. The value of this function is used
19794 instead of that alignment to align the object. */
19797 ix86_data_alignment (tree type, int align)
19799 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19801 if (AGGREGATE_TYPE_P (type)
19802 && TYPE_SIZE (type)
19803 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19804 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19805 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19806 && align < max_align)
19809 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19810 to 16byte boundary. */
19813 if (AGGREGATE_TYPE_P (type)
19814 && TYPE_SIZE (type)
19815 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19816 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19817 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19821 if (TREE_CODE (type) == ARRAY_TYPE)
19823 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19825 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19828 else if (TREE_CODE (type) == COMPLEX_TYPE)
19831 if (TYPE_MODE (type) == DCmode && align < 64)
19833 if ((TYPE_MODE (type) == XCmode
19834 || TYPE_MODE (type) == TCmode) && align < 128)
19837 else if ((TREE_CODE (type) == RECORD_TYPE
19838 || TREE_CODE (type) == UNION_TYPE
19839 || TREE_CODE (type) == QUAL_UNION_TYPE)
19840 && TYPE_FIELDS (type))
19842 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19844 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19847 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19848 || TREE_CODE (type) == INTEGER_TYPE)
19850 if (TYPE_MODE (type) == DFmode && align < 64)
19852 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19859 /* Compute the alignment for a local variable or a stack slot. EXP is
19860 the data type or decl itself, MODE is the widest mode available and
19861 ALIGN is the alignment that the object would ordinarily have. The
19862 value of this macro is used instead of that alignment to align the
19866 ix86_local_alignment (tree exp, enum machine_mode mode,
19867 unsigned int align)
19871 if (exp && DECL_P (exp))
19873 type = TREE_TYPE (exp);
19882 /* Don't do dynamic stack realignment for long long objects with
19883 -mpreferred-stack-boundary=2. */
19886 && ix86_preferred_stack_boundary < 64
19887 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19888 && (!type || !TYPE_USER_ALIGN (type))
19889 && (!decl || !DECL_USER_ALIGN (decl)))
19892 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19893 register in MODE. We will return the largest alignment of XF
19897 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19898 align = GET_MODE_ALIGNMENT (DFmode);
19902 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19903 to 16byte boundary. */
19906 if (AGGREGATE_TYPE_P (type)
19907 && TYPE_SIZE (type)
19908 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19909 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19910 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19913 if (TREE_CODE (type) == ARRAY_TYPE)
19915 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19917 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19920 else if (TREE_CODE (type) == COMPLEX_TYPE)
19922 if (TYPE_MODE (type) == DCmode && align < 64)
19924 if ((TYPE_MODE (type) == XCmode
19925 || TYPE_MODE (type) == TCmode) && align < 128)
19928 else if ((TREE_CODE (type) == RECORD_TYPE
19929 || TREE_CODE (type) == UNION_TYPE
19930 || TREE_CODE (type) == QUAL_UNION_TYPE)
19931 && TYPE_FIELDS (type))
19933 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19935 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19938 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19939 || TREE_CODE (type) == INTEGER_TYPE)
19942 if (TYPE_MODE (type) == DFmode && align < 64)
19944 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19950 /* Emit RTL insns to initialize the variable parts of a trampoline.
19951 FNADDR is an RTX for the address of the function's pure code.
19952 CXT is an RTX for the static chain value for the function. */
19954 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19958 /* Compute offset from the end of the jmp to the target function. */
19959 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19960 plus_constant (tramp, 10),
19961 NULL_RTX, 1, OPTAB_DIRECT);
19962 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19963 gen_int_mode (0xb9, QImode));
19964 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19965 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19966 gen_int_mode (0xe9, QImode));
19967 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19972 /* Try to load address using shorter movl instead of movabs.
19973 We may want to support movq for kernel mode, but kernel does not use
19974 trampolines at the moment. */
19975 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19977 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19978 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19979 gen_int_mode (0xbb41, HImode));
19980 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19981 gen_lowpart (SImode, fnaddr));
19986 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19987 gen_int_mode (0xbb49, HImode));
19988 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19992 /* Load static chain using movabs to r10. */
19993 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19994 gen_int_mode (0xba49, HImode));
19995 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19998 /* Jump to the r11 */
19999 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20000 gen_int_mode (0xff49, HImode));
20001 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
20002 gen_int_mode (0xe3, QImode));
20004 gcc_assert (offset <= TRAMPOLINE_SIZE);
20007 #ifdef ENABLE_EXECUTE_STACK
20008 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20009 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
20013 /* Codes for all the SSE/MMX builtins. */
20016 IX86_BUILTIN_ADDPS,
20017 IX86_BUILTIN_ADDSS,
20018 IX86_BUILTIN_DIVPS,
20019 IX86_BUILTIN_DIVSS,
20020 IX86_BUILTIN_MULPS,
20021 IX86_BUILTIN_MULSS,
20022 IX86_BUILTIN_SUBPS,
20023 IX86_BUILTIN_SUBSS,
20025 IX86_BUILTIN_CMPEQPS,
20026 IX86_BUILTIN_CMPLTPS,
20027 IX86_BUILTIN_CMPLEPS,
20028 IX86_BUILTIN_CMPGTPS,
20029 IX86_BUILTIN_CMPGEPS,
20030 IX86_BUILTIN_CMPNEQPS,
20031 IX86_BUILTIN_CMPNLTPS,
20032 IX86_BUILTIN_CMPNLEPS,
20033 IX86_BUILTIN_CMPNGTPS,
20034 IX86_BUILTIN_CMPNGEPS,
20035 IX86_BUILTIN_CMPORDPS,
20036 IX86_BUILTIN_CMPUNORDPS,
20037 IX86_BUILTIN_CMPEQSS,
20038 IX86_BUILTIN_CMPLTSS,
20039 IX86_BUILTIN_CMPLESS,
20040 IX86_BUILTIN_CMPNEQSS,
20041 IX86_BUILTIN_CMPNLTSS,
20042 IX86_BUILTIN_CMPNLESS,
20043 IX86_BUILTIN_CMPNGTSS,
20044 IX86_BUILTIN_CMPNGESS,
20045 IX86_BUILTIN_CMPORDSS,
20046 IX86_BUILTIN_CMPUNORDSS,
20048 IX86_BUILTIN_COMIEQSS,
20049 IX86_BUILTIN_COMILTSS,
20050 IX86_BUILTIN_COMILESS,
20051 IX86_BUILTIN_COMIGTSS,
20052 IX86_BUILTIN_COMIGESS,
20053 IX86_BUILTIN_COMINEQSS,
20054 IX86_BUILTIN_UCOMIEQSS,
20055 IX86_BUILTIN_UCOMILTSS,
20056 IX86_BUILTIN_UCOMILESS,
20057 IX86_BUILTIN_UCOMIGTSS,
20058 IX86_BUILTIN_UCOMIGESS,
20059 IX86_BUILTIN_UCOMINEQSS,
20061 IX86_BUILTIN_CVTPI2PS,
20062 IX86_BUILTIN_CVTPS2PI,
20063 IX86_BUILTIN_CVTSI2SS,
20064 IX86_BUILTIN_CVTSI642SS,
20065 IX86_BUILTIN_CVTSS2SI,
20066 IX86_BUILTIN_CVTSS2SI64,
20067 IX86_BUILTIN_CVTTPS2PI,
20068 IX86_BUILTIN_CVTTSS2SI,
20069 IX86_BUILTIN_CVTTSS2SI64,
20071 IX86_BUILTIN_MAXPS,
20072 IX86_BUILTIN_MAXSS,
20073 IX86_BUILTIN_MINPS,
20074 IX86_BUILTIN_MINSS,
20076 IX86_BUILTIN_LOADUPS,
20077 IX86_BUILTIN_STOREUPS,
20078 IX86_BUILTIN_MOVSS,
20080 IX86_BUILTIN_MOVHLPS,
20081 IX86_BUILTIN_MOVLHPS,
20082 IX86_BUILTIN_LOADHPS,
20083 IX86_BUILTIN_LOADLPS,
20084 IX86_BUILTIN_STOREHPS,
20085 IX86_BUILTIN_STORELPS,
20087 IX86_BUILTIN_MASKMOVQ,
20088 IX86_BUILTIN_MOVMSKPS,
20089 IX86_BUILTIN_PMOVMSKB,
20091 IX86_BUILTIN_MOVNTPS,
20092 IX86_BUILTIN_MOVNTQ,
20094 IX86_BUILTIN_LOADDQU,
20095 IX86_BUILTIN_STOREDQU,
20097 IX86_BUILTIN_PACKSSWB,
20098 IX86_BUILTIN_PACKSSDW,
20099 IX86_BUILTIN_PACKUSWB,
20101 IX86_BUILTIN_PADDB,
20102 IX86_BUILTIN_PADDW,
20103 IX86_BUILTIN_PADDD,
20104 IX86_BUILTIN_PADDQ,
20105 IX86_BUILTIN_PADDSB,
20106 IX86_BUILTIN_PADDSW,
20107 IX86_BUILTIN_PADDUSB,
20108 IX86_BUILTIN_PADDUSW,
20109 IX86_BUILTIN_PSUBB,
20110 IX86_BUILTIN_PSUBW,
20111 IX86_BUILTIN_PSUBD,
20112 IX86_BUILTIN_PSUBQ,
20113 IX86_BUILTIN_PSUBSB,
20114 IX86_BUILTIN_PSUBSW,
20115 IX86_BUILTIN_PSUBUSB,
20116 IX86_BUILTIN_PSUBUSW,
20119 IX86_BUILTIN_PANDN,
20123 IX86_BUILTIN_PAVGB,
20124 IX86_BUILTIN_PAVGW,
20126 IX86_BUILTIN_PCMPEQB,
20127 IX86_BUILTIN_PCMPEQW,
20128 IX86_BUILTIN_PCMPEQD,
20129 IX86_BUILTIN_PCMPGTB,
20130 IX86_BUILTIN_PCMPGTW,
20131 IX86_BUILTIN_PCMPGTD,
20133 IX86_BUILTIN_PMADDWD,
20135 IX86_BUILTIN_PMAXSW,
20136 IX86_BUILTIN_PMAXUB,
20137 IX86_BUILTIN_PMINSW,
20138 IX86_BUILTIN_PMINUB,
20140 IX86_BUILTIN_PMULHUW,
20141 IX86_BUILTIN_PMULHW,
20142 IX86_BUILTIN_PMULLW,
20144 IX86_BUILTIN_PSADBW,
20145 IX86_BUILTIN_PSHUFW,
20147 IX86_BUILTIN_PSLLW,
20148 IX86_BUILTIN_PSLLD,
20149 IX86_BUILTIN_PSLLQ,
20150 IX86_BUILTIN_PSRAW,
20151 IX86_BUILTIN_PSRAD,
20152 IX86_BUILTIN_PSRLW,
20153 IX86_BUILTIN_PSRLD,
20154 IX86_BUILTIN_PSRLQ,
20155 IX86_BUILTIN_PSLLWI,
20156 IX86_BUILTIN_PSLLDI,
20157 IX86_BUILTIN_PSLLQI,
20158 IX86_BUILTIN_PSRAWI,
20159 IX86_BUILTIN_PSRADI,
20160 IX86_BUILTIN_PSRLWI,
20161 IX86_BUILTIN_PSRLDI,
20162 IX86_BUILTIN_PSRLQI,
20164 IX86_BUILTIN_PUNPCKHBW,
20165 IX86_BUILTIN_PUNPCKHWD,
20166 IX86_BUILTIN_PUNPCKHDQ,
20167 IX86_BUILTIN_PUNPCKLBW,
20168 IX86_BUILTIN_PUNPCKLWD,
20169 IX86_BUILTIN_PUNPCKLDQ,
20171 IX86_BUILTIN_SHUFPS,
20173 IX86_BUILTIN_RCPPS,
20174 IX86_BUILTIN_RCPSS,
20175 IX86_BUILTIN_RSQRTPS,
20176 IX86_BUILTIN_RSQRTPS_NR,
20177 IX86_BUILTIN_RSQRTSS,
20178 IX86_BUILTIN_RSQRTF,
20179 IX86_BUILTIN_SQRTPS,
20180 IX86_BUILTIN_SQRTPS_NR,
20181 IX86_BUILTIN_SQRTSS,
20183 IX86_BUILTIN_UNPCKHPS,
20184 IX86_BUILTIN_UNPCKLPS,
20186 IX86_BUILTIN_ANDPS,
20187 IX86_BUILTIN_ANDNPS,
20189 IX86_BUILTIN_XORPS,
20192 IX86_BUILTIN_LDMXCSR,
20193 IX86_BUILTIN_STMXCSR,
20194 IX86_BUILTIN_SFENCE,
20196 /* 3DNow! Original */
20197 IX86_BUILTIN_FEMMS,
20198 IX86_BUILTIN_PAVGUSB,
20199 IX86_BUILTIN_PF2ID,
20200 IX86_BUILTIN_PFACC,
20201 IX86_BUILTIN_PFADD,
20202 IX86_BUILTIN_PFCMPEQ,
20203 IX86_BUILTIN_PFCMPGE,
20204 IX86_BUILTIN_PFCMPGT,
20205 IX86_BUILTIN_PFMAX,
20206 IX86_BUILTIN_PFMIN,
20207 IX86_BUILTIN_PFMUL,
20208 IX86_BUILTIN_PFRCP,
20209 IX86_BUILTIN_PFRCPIT1,
20210 IX86_BUILTIN_PFRCPIT2,
20211 IX86_BUILTIN_PFRSQIT1,
20212 IX86_BUILTIN_PFRSQRT,
20213 IX86_BUILTIN_PFSUB,
20214 IX86_BUILTIN_PFSUBR,
20215 IX86_BUILTIN_PI2FD,
20216 IX86_BUILTIN_PMULHRW,
20218 /* 3DNow! Athlon Extensions */
20219 IX86_BUILTIN_PF2IW,
20220 IX86_BUILTIN_PFNACC,
20221 IX86_BUILTIN_PFPNACC,
20222 IX86_BUILTIN_PI2FW,
20223 IX86_BUILTIN_PSWAPDSI,
20224 IX86_BUILTIN_PSWAPDSF,
20227 IX86_BUILTIN_ADDPD,
20228 IX86_BUILTIN_ADDSD,
20229 IX86_BUILTIN_DIVPD,
20230 IX86_BUILTIN_DIVSD,
20231 IX86_BUILTIN_MULPD,
20232 IX86_BUILTIN_MULSD,
20233 IX86_BUILTIN_SUBPD,
20234 IX86_BUILTIN_SUBSD,
20236 IX86_BUILTIN_CMPEQPD,
20237 IX86_BUILTIN_CMPLTPD,
20238 IX86_BUILTIN_CMPLEPD,
20239 IX86_BUILTIN_CMPGTPD,
20240 IX86_BUILTIN_CMPGEPD,
20241 IX86_BUILTIN_CMPNEQPD,
20242 IX86_BUILTIN_CMPNLTPD,
20243 IX86_BUILTIN_CMPNLEPD,
20244 IX86_BUILTIN_CMPNGTPD,
20245 IX86_BUILTIN_CMPNGEPD,
20246 IX86_BUILTIN_CMPORDPD,
20247 IX86_BUILTIN_CMPUNORDPD,
20248 IX86_BUILTIN_CMPEQSD,
20249 IX86_BUILTIN_CMPLTSD,
20250 IX86_BUILTIN_CMPLESD,
20251 IX86_BUILTIN_CMPNEQSD,
20252 IX86_BUILTIN_CMPNLTSD,
20253 IX86_BUILTIN_CMPNLESD,
20254 IX86_BUILTIN_CMPORDSD,
20255 IX86_BUILTIN_CMPUNORDSD,
20257 IX86_BUILTIN_COMIEQSD,
20258 IX86_BUILTIN_COMILTSD,
20259 IX86_BUILTIN_COMILESD,
20260 IX86_BUILTIN_COMIGTSD,
20261 IX86_BUILTIN_COMIGESD,
20262 IX86_BUILTIN_COMINEQSD,
20263 IX86_BUILTIN_UCOMIEQSD,
20264 IX86_BUILTIN_UCOMILTSD,
20265 IX86_BUILTIN_UCOMILESD,
20266 IX86_BUILTIN_UCOMIGTSD,
20267 IX86_BUILTIN_UCOMIGESD,
20268 IX86_BUILTIN_UCOMINEQSD,
20270 IX86_BUILTIN_MAXPD,
20271 IX86_BUILTIN_MAXSD,
20272 IX86_BUILTIN_MINPD,
20273 IX86_BUILTIN_MINSD,
20275 IX86_BUILTIN_ANDPD,
20276 IX86_BUILTIN_ANDNPD,
20278 IX86_BUILTIN_XORPD,
20280 IX86_BUILTIN_SQRTPD,
20281 IX86_BUILTIN_SQRTSD,
20283 IX86_BUILTIN_UNPCKHPD,
20284 IX86_BUILTIN_UNPCKLPD,
20286 IX86_BUILTIN_SHUFPD,
20288 IX86_BUILTIN_LOADUPD,
20289 IX86_BUILTIN_STOREUPD,
20290 IX86_BUILTIN_MOVSD,
20292 IX86_BUILTIN_LOADHPD,
20293 IX86_BUILTIN_LOADLPD,
20295 IX86_BUILTIN_CVTDQ2PD,
20296 IX86_BUILTIN_CVTDQ2PS,
20298 IX86_BUILTIN_CVTPD2DQ,
20299 IX86_BUILTIN_CVTPD2PI,
20300 IX86_BUILTIN_CVTPD2PS,
20301 IX86_BUILTIN_CVTTPD2DQ,
20302 IX86_BUILTIN_CVTTPD2PI,
20304 IX86_BUILTIN_CVTPI2PD,
20305 IX86_BUILTIN_CVTSI2SD,
20306 IX86_BUILTIN_CVTSI642SD,
20308 IX86_BUILTIN_CVTSD2SI,
20309 IX86_BUILTIN_CVTSD2SI64,
20310 IX86_BUILTIN_CVTSD2SS,
20311 IX86_BUILTIN_CVTSS2SD,
20312 IX86_BUILTIN_CVTTSD2SI,
20313 IX86_BUILTIN_CVTTSD2SI64,
20315 IX86_BUILTIN_CVTPS2DQ,
20316 IX86_BUILTIN_CVTPS2PD,
20317 IX86_BUILTIN_CVTTPS2DQ,
20319 IX86_BUILTIN_MOVNTI,
20320 IX86_BUILTIN_MOVNTPD,
20321 IX86_BUILTIN_MOVNTDQ,
20323 IX86_BUILTIN_MOVQ128,
20326 IX86_BUILTIN_MASKMOVDQU,
20327 IX86_BUILTIN_MOVMSKPD,
20328 IX86_BUILTIN_PMOVMSKB128,
20330 IX86_BUILTIN_PACKSSWB128,
20331 IX86_BUILTIN_PACKSSDW128,
20332 IX86_BUILTIN_PACKUSWB128,
20334 IX86_BUILTIN_PADDB128,
20335 IX86_BUILTIN_PADDW128,
20336 IX86_BUILTIN_PADDD128,
20337 IX86_BUILTIN_PADDQ128,
20338 IX86_BUILTIN_PADDSB128,
20339 IX86_BUILTIN_PADDSW128,
20340 IX86_BUILTIN_PADDUSB128,
20341 IX86_BUILTIN_PADDUSW128,
20342 IX86_BUILTIN_PSUBB128,
20343 IX86_BUILTIN_PSUBW128,
20344 IX86_BUILTIN_PSUBD128,
20345 IX86_BUILTIN_PSUBQ128,
20346 IX86_BUILTIN_PSUBSB128,
20347 IX86_BUILTIN_PSUBSW128,
20348 IX86_BUILTIN_PSUBUSB128,
20349 IX86_BUILTIN_PSUBUSW128,
20351 IX86_BUILTIN_PAND128,
20352 IX86_BUILTIN_PANDN128,
20353 IX86_BUILTIN_POR128,
20354 IX86_BUILTIN_PXOR128,
20356 IX86_BUILTIN_PAVGB128,
20357 IX86_BUILTIN_PAVGW128,
20359 IX86_BUILTIN_PCMPEQB128,
20360 IX86_BUILTIN_PCMPEQW128,
20361 IX86_BUILTIN_PCMPEQD128,
20362 IX86_BUILTIN_PCMPGTB128,
20363 IX86_BUILTIN_PCMPGTW128,
20364 IX86_BUILTIN_PCMPGTD128,
20366 IX86_BUILTIN_PMADDWD128,
20368 IX86_BUILTIN_PMAXSW128,
20369 IX86_BUILTIN_PMAXUB128,
20370 IX86_BUILTIN_PMINSW128,
20371 IX86_BUILTIN_PMINUB128,
20373 IX86_BUILTIN_PMULUDQ,
20374 IX86_BUILTIN_PMULUDQ128,
20375 IX86_BUILTIN_PMULHUW128,
20376 IX86_BUILTIN_PMULHW128,
20377 IX86_BUILTIN_PMULLW128,
20379 IX86_BUILTIN_PSADBW128,
20380 IX86_BUILTIN_PSHUFHW,
20381 IX86_BUILTIN_PSHUFLW,
20382 IX86_BUILTIN_PSHUFD,
20384 IX86_BUILTIN_PSLLDQI128,
20385 IX86_BUILTIN_PSLLWI128,
20386 IX86_BUILTIN_PSLLDI128,
20387 IX86_BUILTIN_PSLLQI128,
20388 IX86_BUILTIN_PSRAWI128,
20389 IX86_BUILTIN_PSRADI128,
20390 IX86_BUILTIN_PSRLDQI128,
20391 IX86_BUILTIN_PSRLWI128,
20392 IX86_BUILTIN_PSRLDI128,
20393 IX86_BUILTIN_PSRLQI128,
20395 IX86_BUILTIN_PSLLDQ128,
20396 IX86_BUILTIN_PSLLW128,
20397 IX86_BUILTIN_PSLLD128,
20398 IX86_BUILTIN_PSLLQ128,
20399 IX86_BUILTIN_PSRAW128,
20400 IX86_BUILTIN_PSRAD128,
20401 IX86_BUILTIN_PSRLW128,
20402 IX86_BUILTIN_PSRLD128,
20403 IX86_BUILTIN_PSRLQ128,
20405 IX86_BUILTIN_PUNPCKHBW128,
20406 IX86_BUILTIN_PUNPCKHWD128,
20407 IX86_BUILTIN_PUNPCKHDQ128,
20408 IX86_BUILTIN_PUNPCKHQDQ128,
20409 IX86_BUILTIN_PUNPCKLBW128,
20410 IX86_BUILTIN_PUNPCKLWD128,
20411 IX86_BUILTIN_PUNPCKLDQ128,
20412 IX86_BUILTIN_PUNPCKLQDQ128,
20414 IX86_BUILTIN_CLFLUSH,
20415 IX86_BUILTIN_MFENCE,
20416 IX86_BUILTIN_LFENCE,
20419 IX86_BUILTIN_ADDSUBPS,
20420 IX86_BUILTIN_HADDPS,
20421 IX86_BUILTIN_HSUBPS,
20422 IX86_BUILTIN_MOVSHDUP,
20423 IX86_BUILTIN_MOVSLDUP,
20424 IX86_BUILTIN_ADDSUBPD,
20425 IX86_BUILTIN_HADDPD,
20426 IX86_BUILTIN_HSUBPD,
20427 IX86_BUILTIN_LDDQU,
20429 IX86_BUILTIN_MONITOR,
20430 IX86_BUILTIN_MWAIT,
20433 IX86_BUILTIN_PHADDW,
20434 IX86_BUILTIN_PHADDD,
20435 IX86_BUILTIN_PHADDSW,
20436 IX86_BUILTIN_PHSUBW,
20437 IX86_BUILTIN_PHSUBD,
20438 IX86_BUILTIN_PHSUBSW,
20439 IX86_BUILTIN_PMADDUBSW,
20440 IX86_BUILTIN_PMULHRSW,
20441 IX86_BUILTIN_PSHUFB,
20442 IX86_BUILTIN_PSIGNB,
20443 IX86_BUILTIN_PSIGNW,
20444 IX86_BUILTIN_PSIGND,
20445 IX86_BUILTIN_PALIGNR,
20446 IX86_BUILTIN_PABSB,
20447 IX86_BUILTIN_PABSW,
20448 IX86_BUILTIN_PABSD,
20450 IX86_BUILTIN_PHADDW128,
20451 IX86_BUILTIN_PHADDD128,
20452 IX86_BUILTIN_PHADDSW128,
20453 IX86_BUILTIN_PHSUBW128,
20454 IX86_BUILTIN_PHSUBD128,
20455 IX86_BUILTIN_PHSUBSW128,
20456 IX86_BUILTIN_PMADDUBSW128,
20457 IX86_BUILTIN_PMULHRSW128,
20458 IX86_BUILTIN_PSHUFB128,
20459 IX86_BUILTIN_PSIGNB128,
20460 IX86_BUILTIN_PSIGNW128,
20461 IX86_BUILTIN_PSIGND128,
20462 IX86_BUILTIN_PALIGNR128,
20463 IX86_BUILTIN_PABSB128,
20464 IX86_BUILTIN_PABSW128,
20465 IX86_BUILTIN_PABSD128,
20467 /* AMDFAM10 - SSE4A New Instructions. */
20468 IX86_BUILTIN_MOVNTSD,
20469 IX86_BUILTIN_MOVNTSS,
20470 IX86_BUILTIN_EXTRQI,
20471 IX86_BUILTIN_EXTRQ,
20472 IX86_BUILTIN_INSERTQI,
20473 IX86_BUILTIN_INSERTQ,
20476 IX86_BUILTIN_BLENDPD,
20477 IX86_BUILTIN_BLENDPS,
20478 IX86_BUILTIN_BLENDVPD,
20479 IX86_BUILTIN_BLENDVPS,
20480 IX86_BUILTIN_PBLENDVB128,
20481 IX86_BUILTIN_PBLENDW128,
20486 IX86_BUILTIN_INSERTPS128,
20488 IX86_BUILTIN_MOVNTDQA,
20489 IX86_BUILTIN_MPSADBW128,
20490 IX86_BUILTIN_PACKUSDW128,
20491 IX86_BUILTIN_PCMPEQQ,
20492 IX86_BUILTIN_PHMINPOSUW128,
20494 IX86_BUILTIN_PMAXSB128,
20495 IX86_BUILTIN_PMAXSD128,
20496 IX86_BUILTIN_PMAXUD128,
20497 IX86_BUILTIN_PMAXUW128,
20499 IX86_BUILTIN_PMINSB128,
20500 IX86_BUILTIN_PMINSD128,
20501 IX86_BUILTIN_PMINUD128,
20502 IX86_BUILTIN_PMINUW128,
20504 IX86_BUILTIN_PMOVSXBW128,
20505 IX86_BUILTIN_PMOVSXBD128,
20506 IX86_BUILTIN_PMOVSXBQ128,
20507 IX86_BUILTIN_PMOVSXWD128,
20508 IX86_BUILTIN_PMOVSXWQ128,
20509 IX86_BUILTIN_PMOVSXDQ128,
20511 IX86_BUILTIN_PMOVZXBW128,
20512 IX86_BUILTIN_PMOVZXBD128,
20513 IX86_BUILTIN_PMOVZXBQ128,
20514 IX86_BUILTIN_PMOVZXWD128,
20515 IX86_BUILTIN_PMOVZXWQ128,
20516 IX86_BUILTIN_PMOVZXDQ128,
20518 IX86_BUILTIN_PMULDQ128,
20519 IX86_BUILTIN_PMULLD128,
20521 IX86_BUILTIN_ROUNDPD,
20522 IX86_BUILTIN_ROUNDPS,
20523 IX86_BUILTIN_ROUNDSD,
20524 IX86_BUILTIN_ROUNDSS,
20526 IX86_BUILTIN_PTESTZ,
20527 IX86_BUILTIN_PTESTC,
20528 IX86_BUILTIN_PTESTNZC,
20530 IX86_BUILTIN_VEC_INIT_V2SI,
20531 IX86_BUILTIN_VEC_INIT_V4HI,
20532 IX86_BUILTIN_VEC_INIT_V8QI,
20533 IX86_BUILTIN_VEC_EXT_V2DF,
20534 IX86_BUILTIN_VEC_EXT_V2DI,
20535 IX86_BUILTIN_VEC_EXT_V4SF,
20536 IX86_BUILTIN_VEC_EXT_V4SI,
20537 IX86_BUILTIN_VEC_EXT_V8HI,
20538 IX86_BUILTIN_VEC_EXT_V2SI,
20539 IX86_BUILTIN_VEC_EXT_V4HI,
20540 IX86_BUILTIN_VEC_EXT_V16QI,
20541 IX86_BUILTIN_VEC_SET_V2DI,
20542 IX86_BUILTIN_VEC_SET_V4SF,
20543 IX86_BUILTIN_VEC_SET_V4SI,
20544 IX86_BUILTIN_VEC_SET_V8HI,
20545 IX86_BUILTIN_VEC_SET_V4HI,
20546 IX86_BUILTIN_VEC_SET_V16QI,
20548 IX86_BUILTIN_VEC_PACK_SFIX,
20551 IX86_BUILTIN_CRC32QI,
20552 IX86_BUILTIN_CRC32HI,
20553 IX86_BUILTIN_CRC32SI,
20554 IX86_BUILTIN_CRC32DI,
20556 IX86_BUILTIN_PCMPESTRI128,
20557 IX86_BUILTIN_PCMPESTRM128,
20558 IX86_BUILTIN_PCMPESTRA128,
20559 IX86_BUILTIN_PCMPESTRC128,
20560 IX86_BUILTIN_PCMPESTRO128,
20561 IX86_BUILTIN_PCMPESTRS128,
20562 IX86_BUILTIN_PCMPESTRZ128,
20563 IX86_BUILTIN_PCMPISTRI128,
20564 IX86_BUILTIN_PCMPISTRM128,
20565 IX86_BUILTIN_PCMPISTRA128,
20566 IX86_BUILTIN_PCMPISTRC128,
20567 IX86_BUILTIN_PCMPISTRO128,
20568 IX86_BUILTIN_PCMPISTRS128,
20569 IX86_BUILTIN_PCMPISTRZ128,
20571 IX86_BUILTIN_PCMPGTQ,
20573 /* AES instructions */
20574 IX86_BUILTIN_AESENC128,
20575 IX86_BUILTIN_AESENCLAST128,
20576 IX86_BUILTIN_AESDEC128,
20577 IX86_BUILTIN_AESDECLAST128,
20578 IX86_BUILTIN_AESIMC128,
20579 IX86_BUILTIN_AESKEYGENASSIST128,
20581 /* PCLMUL instruction */
20582 IX86_BUILTIN_PCLMULQDQ128,
20585 IX86_BUILTIN_ADDPD256,
20586 IX86_BUILTIN_ADDPS256,
20587 IX86_BUILTIN_ADDSUBPD256,
20588 IX86_BUILTIN_ADDSUBPS256,
20589 IX86_BUILTIN_ANDPD256,
20590 IX86_BUILTIN_ANDPS256,
20591 IX86_BUILTIN_ANDNPD256,
20592 IX86_BUILTIN_ANDNPS256,
20593 IX86_BUILTIN_BLENDPD256,
20594 IX86_BUILTIN_BLENDPS256,
20595 IX86_BUILTIN_BLENDVPD256,
20596 IX86_BUILTIN_BLENDVPS256,
20597 IX86_BUILTIN_DIVPD256,
20598 IX86_BUILTIN_DIVPS256,
20599 IX86_BUILTIN_DPPS256,
20600 IX86_BUILTIN_HADDPD256,
20601 IX86_BUILTIN_HADDPS256,
20602 IX86_BUILTIN_HSUBPD256,
20603 IX86_BUILTIN_HSUBPS256,
20604 IX86_BUILTIN_MAXPD256,
20605 IX86_BUILTIN_MAXPS256,
20606 IX86_BUILTIN_MINPD256,
20607 IX86_BUILTIN_MINPS256,
20608 IX86_BUILTIN_MULPD256,
20609 IX86_BUILTIN_MULPS256,
20610 IX86_BUILTIN_ORPD256,
20611 IX86_BUILTIN_ORPS256,
20612 IX86_BUILTIN_SHUFPD256,
20613 IX86_BUILTIN_SHUFPS256,
20614 IX86_BUILTIN_SUBPD256,
20615 IX86_BUILTIN_SUBPS256,
20616 IX86_BUILTIN_XORPD256,
20617 IX86_BUILTIN_XORPS256,
20618 IX86_BUILTIN_CMPSD,
20619 IX86_BUILTIN_CMPSS,
20620 IX86_BUILTIN_CMPPD,
20621 IX86_BUILTIN_CMPPS,
20622 IX86_BUILTIN_CMPPD256,
20623 IX86_BUILTIN_CMPPS256,
20624 IX86_BUILTIN_CVTDQ2PD256,
20625 IX86_BUILTIN_CVTDQ2PS256,
20626 IX86_BUILTIN_CVTPD2PS256,
20627 IX86_BUILTIN_CVTPS2DQ256,
20628 IX86_BUILTIN_CVTPS2PD256,
20629 IX86_BUILTIN_CVTTPD2DQ256,
20630 IX86_BUILTIN_CVTPD2DQ256,
20631 IX86_BUILTIN_CVTTPS2DQ256,
20632 IX86_BUILTIN_EXTRACTF128PD256,
20633 IX86_BUILTIN_EXTRACTF128PS256,
20634 IX86_BUILTIN_EXTRACTF128SI256,
20635 IX86_BUILTIN_VZEROALL,
20636 IX86_BUILTIN_VZEROUPPER,
20637 IX86_BUILTIN_VZEROUPPER_REX64,
20638 IX86_BUILTIN_VPERMILVARPD,
20639 IX86_BUILTIN_VPERMILVARPS,
20640 IX86_BUILTIN_VPERMILVARPD256,
20641 IX86_BUILTIN_VPERMILVARPS256,
20642 IX86_BUILTIN_VPERMILPD,
20643 IX86_BUILTIN_VPERMILPS,
20644 IX86_BUILTIN_VPERMILPD256,
20645 IX86_BUILTIN_VPERMILPS256,
20646 IX86_BUILTIN_VPERM2F128PD256,
20647 IX86_BUILTIN_VPERM2F128PS256,
20648 IX86_BUILTIN_VPERM2F128SI256,
20649 IX86_BUILTIN_VBROADCASTSS,
20650 IX86_BUILTIN_VBROADCASTSD256,
20651 IX86_BUILTIN_VBROADCASTSS256,
20652 IX86_BUILTIN_VBROADCASTPD256,
20653 IX86_BUILTIN_VBROADCASTPS256,
20654 IX86_BUILTIN_VINSERTF128PD256,
20655 IX86_BUILTIN_VINSERTF128PS256,
20656 IX86_BUILTIN_VINSERTF128SI256,
20657 IX86_BUILTIN_LOADUPD256,
20658 IX86_BUILTIN_LOADUPS256,
20659 IX86_BUILTIN_STOREUPD256,
20660 IX86_BUILTIN_STOREUPS256,
20661 IX86_BUILTIN_LDDQU256,
20662 IX86_BUILTIN_MOVNTDQ256,
20663 IX86_BUILTIN_MOVNTPD256,
20664 IX86_BUILTIN_MOVNTPS256,
20665 IX86_BUILTIN_LOADDQU256,
20666 IX86_BUILTIN_STOREDQU256,
20667 IX86_BUILTIN_MASKLOADPD,
20668 IX86_BUILTIN_MASKLOADPS,
20669 IX86_BUILTIN_MASKSTOREPD,
20670 IX86_BUILTIN_MASKSTOREPS,
20671 IX86_BUILTIN_MASKLOADPD256,
20672 IX86_BUILTIN_MASKLOADPS256,
20673 IX86_BUILTIN_MASKSTOREPD256,
20674 IX86_BUILTIN_MASKSTOREPS256,
20675 IX86_BUILTIN_MOVSHDUP256,
20676 IX86_BUILTIN_MOVSLDUP256,
20677 IX86_BUILTIN_MOVDDUP256,
20679 IX86_BUILTIN_SQRTPD256,
20680 IX86_BUILTIN_SQRTPS256,
20681 IX86_BUILTIN_SQRTPS_NR256,
20682 IX86_BUILTIN_RSQRTPS256,
20683 IX86_BUILTIN_RSQRTPS_NR256,
20685 IX86_BUILTIN_RCPPS256,
20687 IX86_BUILTIN_ROUNDPD256,
20688 IX86_BUILTIN_ROUNDPS256,
20690 IX86_BUILTIN_UNPCKHPD256,
20691 IX86_BUILTIN_UNPCKLPD256,
20692 IX86_BUILTIN_UNPCKHPS256,
20693 IX86_BUILTIN_UNPCKLPS256,
20695 IX86_BUILTIN_SI256_SI,
20696 IX86_BUILTIN_PS256_PS,
20697 IX86_BUILTIN_PD256_PD,
20698 IX86_BUILTIN_SI_SI256,
20699 IX86_BUILTIN_PS_PS256,
20700 IX86_BUILTIN_PD_PD256,
20702 IX86_BUILTIN_VTESTZPD,
20703 IX86_BUILTIN_VTESTCPD,
20704 IX86_BUILTIN_VTESTNZCPD,
20705 IX86_BUILTIN_VTESTZPS,
20706 IX86_BUILTIN_VTESTCPS,
20707 IX86_BUILTIN_VTESTNZCPS,
20708 IX86_BUILTIN_VTESTZPD256,
20709 IX86_BUILTIN_VTESTCPD256,
20710 IX86_BUILTIN_VTESTNZCPD256,
20711 IX86_BUILTIN_VTESTZPS256,
20712 IX86_BUILTIN_VTESTCPS256,
20713 IX86_BUILTIN_VTESTNZCPS256,
20714 IX86_BUILTIN_PTESTZ256,
20715 IX86_BUILTIN_PTESTC256,
20716 IX86_BUILTIN_PTESTNZC256,
20718 IX86_BUILTIN_MOVMSKPD256,
20719 IX86_BUILTIN_MOVMSKPS256,
20721 /* TFmode support builtins. */
20723 IX86_BUILTIN_HUGE_VALQ,
20724 IX86_BUILTIN_FABSQ,
20725 IX86_BUILTIN_COPYSIGNQ,
20727 /* SSE5 instructions */
20728 IX86_BUILTIN_FMADDSS,
20729 IX86_BUILTIN_FMADDSD,
20730 IX86_BUILTIN_FMADDPS,
20731 IX86_BUILTIN_FMADDPD,
20732 IX86_BUILTIN_FMSUBSS,
20733 IX86_BUILTIN_FMSUBSD,
20734 IX86_BUILTIN_FMSUBPS,
20735 IX86_BUILTIN_FMSUBPD,
20736 IX86_BUILTIN_FNMADDSS,
20737 IX86_BUILTIN_FNMADDSD,
20738 IX86_BUILTIN_FNMADDPS,
20739 IX86_BUILTIN_FNMADDPD,
20740 IX86_BUILTIN_FNMSUBSS,
20741 IX86_BUILTIN_FNMSUBSD,
20742 IX86_BUILTIN_FNMSUBPS,
20743 IX86_BUILTIN_FNMSUBPD,
20744 IX86_BUILTIN_PCMOV,
20745 IX86_BUILTIN_PCMOV_V2DI,
20746 IX86_BUILTIN_PCMOV_V4SI,
20747 IX86_BUILTIN_PCMOV_V8HI,
20748 IX86_BUILTIN_PCMOV_V16QI,
20749 IX86_BUILTIN_PCMOV_V4SF,
20750 IX86_BUILTIN_PCMOV_V2DF,
20751 IX86_BUILTIN_PPERM,
20752 IX86_BUILTIN_PERMPS,
20753 IX86_BUILTIN_PERMPD,
20754 IX86_BUILTIN_PMACSSWW,
20755 IX86_BUILTIN_PMACSWW,
20756 IX86_BUILTIN_PMACSSWD,
20757 IX86_BUILTIN_PMACSWD,
20758 IX86_BUILTIN_PMACSSDD,
20759 IX86_BUILTIN_PMACSDD,
20760 IX86_BUILTIN_PMACSSDQL,
20761 IX86_BUILTIN_PMACSSDQH,
20762 IX86_BUILTIN_PMACSDQL,
20763 IX86_BUILTIN_PMACSDQH,
20764 IX86_BUILTIN_PMADCSSWD,
20765 IX86_BUILTIN_PMADCSWD,
20766 IX86_BUILTIN_PHADDBW,
20767 IX86_BUILTIN_PHADDBD,
20768 IX86_BUILTIN_PHADDBQ,
20769 IX86_BUILTIN_PHADDWD,
20770 IX86_BUILTIN_PHADDWQ,
20771 IX86_BUILTIN_PHADDDQ,
20772 IX86_BUILTIN_PHADDUBW,
20773 IX86_BUILTIN_PHADDUBD,
20774 IX86_BUILTIN_PHADDUBQ,
20775 IX86_BUILTIN_PHADDUWD,
20776 IX86_BUILTIN_PHADDUWQ,
20777 IX86_BUILTIN_PHADDUDQ,
20778 IX86_BUILTIN_PHSUBBW,
20779 IX86_BUILTIN_PHSUBWD,
20780 IX86_BUILTIN_PHSUBDQ,
20781 IX86_BUILTIN_PROTB,
20782 IX86_BUILTIN_PROTW,
20783 IX86_BUILTIN_PROTD,
20784 IX86_BUILTIN_PROTQ,
20785 IX86_BUILTIN_PROTB_IMM,
20786 IX86_BUILTIN_PROTW_IMM,
20787 IX86_BUILTIN_PROTD_IMM,
20788 IX86_BUILTIN_PROTQ_IMM,
20789 IX86_BUILTIN_PSHLB,
20790 IX86_BUILTIN_PSHLW,
20791 IX86_BUILTIN_PSHLD,
20792 IX86_BUILTIN_PSHLQ,
20793 IX86_BUILTIN_PSHAB,
20794 IX86_BUILTIN_PSHAW,
20795 IX86_BUILTIN_PSHAD,
20796 IX86_BUILTIN_PSHAQ,
20797 IX86_BUILTIN_FRCZSS,
20798 IX86_BUILTIN_FRCZSD,
20799 IX86_BUILTIN_FRCZPS,
20800 IX86_BUILTIN_FRCZPD,
20801 IX86_BUILTIN_CVTPH2PS,
20802 IX86_BUILTIN_CVTPS2PH,
20804 IX86_BUILTIN_COMEQSS,
20805 IX86_BUILTIN_COMNESS,
20806 IX86_BUILTIN_COMLTSS,
20807 IX86_BUILTIN_COMLESS,
20808 IX86_BUILTIN_COMGTSS,
20809 IX86_BUILTIN_COMGESS,
20810 IX86_BUILTIN_COMUEQSS,
20811 IX86_BUILTIN_COMUNESS,
20812 IX86_BUILTIN_COMULTSS,
20813 IX86_BUILTIN_COMULESS,
20814 IX86_BUILTIN_COMUGTSS,
20815 IX86_BUILTIN_COMUGESS,
20816 IX86_BUILTIN_COMORDSS,
20817 IX86_BUILTIN_COMUNORDSS,
20818 IX86_BUILTIN_COMFALSESS,
20819 IX86_BUILTIN_COMTRUESS,
20821 IX86_BUILTIN_COMEQSD,
20822 IX86_BUILTIN_COMNESD,
20823 IX86_BUILTIN_COMLTSD,
20824 IX86_BUILTIN_COMLESD,
20825 IX86_BUILTIN_COMGTSD,
20826 IX86_BUILTIN_COMGESD,
20827 IX86_BUILTIN_COMUEQSD,
20828 IX86_BUILTIN_COMUNESD,
20829 IX86_BUILTIN_COMULTSD,
20830 IX86_BUILTIN_COMULESD,
20831 IX86_BUILTIN_COMUGTSD,
20832 IX86_BUILTIN_COMUGESD,
20833 IX86_BUILTIN_COMORDSD,
20834 IX86_BUILTIN_COMUNORDSD,
20835 IX86_BUILTIN_COMFALSESD,
20836 IX86_BUILTIN_COMTRUESD,
20838 IX86_BUILTIN_COMEQPS,
20839 IX86_BUILTIN_COMNEPS,
20840 IX86_BUILTIN_COMLTPS,
20841 IX86_BUILTIN_COMLEPS,
20842 IX86_BUILTIN_COMGTPS,
20843 IX86_BUILTIN_COMGEPS,
20844 IX86_BUILTIN_COMUEQPS,
20845 IX86_BUILTIN_COMUNEPS,
20846 IX86_BUILTIN_COMULTPS,
20847 IX86_BUILTIN_COMULEPS,
20848 IX86_BUILTIN_COMUGTPS,
20849 IX86_BUILTIN_COMUGEPS,
20850 IX86_BUILTIN_COMORDPS,
20851 IX86_BUILTIN_COMUNORDPS,
20852 IX86_BUILTIN_COMFALSEPS,
20853 IX86_BUILTIN_COMTRUEPS,
20855 IX86_BUILTIN_COMEQPD,
20856 IX86_BUILTIN_COMNEPD,
20857 IX86_BUILTIN_COMLTPD,
20858 IX86_BUILTIN_COMLEPD,
20859 IX86_BUILTIN_COMGTPD,
20860 IX86_BUILTIN_COMGEPD,
20861 IX86_BUILTIN_COMUEQPD,
20862 IX86_BUILTIN_COMUNEPD,
20863 IX86_BUILTIN_COMULTPD,
20864 IX86_BUILTIN_COMULEPD,
20865 IX86_BUILTIN_COMUGTPD,
20866 IX86_BUILTIN_COMUGEPD,
20867 IX86_BUILTIN_COMORDPD,
20868 IX86_BUILTIN_COMUNORDPD,
20869 IX86_BUILTIN_COMFALSEPD,
20870 IX86_BUILTIN_COMTRUEPD,
20872 IX86_BUILTIN_PCOMEQUB,
20873 IX86_BUILTIN_PCOMNEUB,
20874 IX86_BUILTIN_PCOMLTUB,
20875 IX86_BUILTIN_PCOMLEUB,
20876 IX86_BUILTIN_PCOMGTUB,
20877 IX86_BUILTIN_PCOMGEUB,
20878 IX86_BUILTIN_PCOMFALSEUB,
20879 IX86_BUILTIN_PCOMTRUEUB,
20880 IX86_BUILTIN_PCOMEQUW,
20881 IX86_BUILTIN_PCOMNEUW,
20882 IX86_BUILTIN_PCOMLTUW,
20883 IX86_BUILTIN_PCOMLEUW,
20884 IX86_BUILTIN_PCOMGTUW,
20885 IX86_BUILTIN_PCOMGEUW,
20886 IX86_BUILTIN_PCOMFALSEUW,
20887 IX86_BUILTIN_PCOMTRUEUW,
20888 IX86_BUILTIN_PCOMEQUD,
20889 IX86_BUILTIN_PCOMNEUD,
20890 IX86_BUILTIN_PCOMLTUD,
20891 IX86_BUILTIN_PCOMLEUD,
20892 IX86_BUILTIN_PCOMGTUD,
20893 IX86_BUILTIN_PCOMGEUD,
20894 IX86_BUILTIN_PCOMFALSEUD,
20895 IX86_BUILTIN_PCOMTRUEUD,
20896 IX86_BUILTIN_PCOMEQUQ,
20897 IX86_BUILTIN_PCOMNEUQ,
20898 IX86_BUILTIN_PCOMLTUQ,
20899 IX86_BUILTIN_PCOMLEUQ,
20900 IX86_BUILTIN_PCOMGTUQ,
20901 IX86_BUILTIN_PCOMGEUQ,
20902 IX86_BUILTIN_PCOMFALSEUQ,
20903 IX86_BUILTIN_PCOMTRUEUQ,
20905 IX86_BUILTIN_PCOMEQB,
20906 IX86_BUILTIN_PCOMNEB,
20907 IX86_BUILTIN_PCOMLTB,
20908 IX86_BUILTIN_PCOMLEB,
20909 IX86_BUILTIN_PCOMGTB,
20910 IX86_BUILTIN_PCOMGEB,
20911 IX86_BUILTIN_PCOMFALSEB,
20912 IX86_BUILTIN_PCOMTRUEB,
20913 IX86_BUILTIN_PCOMEQW,
20914 IX86_BUILTIN_PCOMNEW,
20915 IX86_BUILTIN_PCOMLTW,
20916 IX86_BUILTIN_PCOMLEW,
20917 IX86_BUILTIN_PCOMGTW,
20918 IX86_BUILTIN_PCOMGEW,
20919 IX86_BUILTIN_PCOMFALSEW,
20920 IX86_BUILTIN_PCOMTRUEW,
20921 IX86_BUILTIN_PCOMEQD,
20922 IX86_BUILTIN_PCOMNED,
20923 IX86_BUILTIN_PCOMLTD,
20924 IX86_BUILTIN_PCOMLED,
20925 IX86_BUILTIN_PCOMGTD,
20926 IX86_BUILTIN_PCOMGED,
20927 IX86_BUILTIN_PCOMFALSED,
20928 IX86_BUILTIN_PCOMTRUED,
20929 IX86_BUILTIN_PCOMEQQ,
20930 IX86_BUILTIN_PCOMNEQ,
20931 IX86_BUILTIN_PCOMLTQ,
20932 IX86_BUILTIN_PCOMLEQ,
20933 IX86_BUILTIN_PCOMGTQ,
20934 IX86_BUILTIN_PCOMGEQ,
20935 IX86_BUILTIN_PCOMFALSEQ,
20936 IX86_BUILTIN_PCOMTRUEQ,
20941 /* Table for the ix86 builtin decls. */
20942 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20944 /* Table of all of the builtin functions that are possible with different ISA's
20945 but are waiting to be built until a function is declared to use that
20947 struct GTY(()) builtin_isa {
20948 tree type; /* builtin type to use in the declaration */
20949 const char *name; /* function name */
20950 int isa; /* isa_flags this builtin is defined for */
20951 bool const_p; /* true if the declaration is constant */
20954 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20957 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20958 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20959 * function decl in the ix86_builtins array. Returns the function decl or
20960 * NULL_TREE, if the builtin was not added.
20962 * If the front end has a special hook for builtin functions, delay adding
20963 * builtin functions that aren't in the current ISA until the ISA is changed
20964 * with function specific optimization. Doing so, can save about 300K for the
20965 * default compiler. When the builtin is expanded, check at that time whether
20968 * If the front end doesn't have a special hook, record all builtins, even if
20969 * it isn't an instruction set in the current ISA in case the user uses
20970 * function specific options for a different ISA, so that we don't get scope
20971 * errors if a builtin is added in the middle of a function scope. */
20974 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20976 tree decl = NULL_TREE;
20978 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20980 ix86_builtins_isa[(int) code].isa = mask;
20982 if ((mask & ix86_isa_flags) != 0
20983 || (lang_hooks.builtin_function
20984 == lang_hooks.builtin_function_ext_scope))
20987 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20989 ix86_builtins[(int) code] = decl;
20990 ix86_builtins_isa[(int) code].type = NULL_TREE;
20994 ix86_builtins[(int) code] = NULL_TREE;
20995 ix86_builtins_isa[(int) code].const_p = false;
20996 ix86_builtins_isa[(int) code].type = type;
20997 ix86_builtins_isa[(int) code].name = name;
21004 /* Like def_builtin, but also marks the function decl "const". */
21007 def_builtin_const (int mask, const char *name, tree type,
21008 enum ix86_builtins code)
21010 tree decl = def_builtin (mask, name, type, code);
21012 TREE_READONLY (decl) = 1;
21014 ix86_builtins_isa[(int) code].const_p = true;
21019 /* Add any new builtin functions for a given ISA that may not have been
21020 declared. This saves a bit of space compared to adding all of the
21021 declarations to the tree, even if we didn't use them. */
21024 ix86_add_new_builtins (int isa)
21029 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21031 if ((ix86_builtins_isa[i].isa & isa) != 0
21032 && ix86_builtins_isa[i].type != NULL_TREE)
21034 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21035 ix86_builtins_isa[i].type,
21036 i, BUILT_IN_MD, NULL,
21039 ix86_builtins[i] = decl;
21040 ix86_builtins_isa[i].type = NULL_TREE;
21041 if (ix86_builtins_isa[i].const_p)
21042 TREE_READONLY (decl) = 1;
21047 /* Bits for builtin_description.flag. */
21049 /* Set when we don't support the comparison natively, and should
21050 swap_comparison in order to support it. */
21051 #define BUILTIN_DESC_SWAP_OPERANDS 1
21053 struct builtin_description
21055 const unsigned int mask;
21056 const enum insn_code icode;
21057 const char *const name;
21058 const enum ix86_builtins code;
21059 const enum rtx_code comparison;
21063 static const struct builtin_description bdesc_comi[] =
21065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21069 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21070 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21071 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21072 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21073 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21074 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21075 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21076 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21087 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21091 static const struct builtin_description bdesc_pcmpestr[] =
21094 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21095 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21096 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21097 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21098 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21099 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21100 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21103 static const struct builtin_description bdesc_pcmpistr[] =
21106 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21107 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21108 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21109 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21110 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21111 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21112 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21115 /* Special builtin types */
21116 enum ix86_special_builtin_type
21118 SPECIAL_FTYPE_UNKNOWN,
21120 V32QI_FTYPE_PCCHAR,
21121 V16QI_FTYPE_PCCHAR,
21123 V8SF_FTYPE_PCFLOAT,
21125 V4DF_FTYPE_PCDOUBLE,
21126 V4SF_FTYPE_PCFLOAT,
21127 V2DF_FTYPE_PCDOUBLE,
21128 V8SF_FTYPE_PCV8SF_V8SF,
21129 V4DF_FTYPE_PCV4DF_V4DF,
21130 V4SF_FTYPE_V4SF_PCV2SF,
21131 V4SF_FTYPE_PCV4SF_V4SF,
21132 V2DF_FTYPE_V2DF_PCDOUBLE,
21133 V2DF_FTYPE_PCV2DF_V2DF,
21135 VOID_FTYPE_PV2SF_V4SF,
21136 VOID_FTYPE_PV4DI_V4DI,
21137 VOID_FTYPE_PV2DI_V2DI,
21138 VOID_FTYPE_PCHAR_V32QI,
21139 VOID_FTYPE_PCHAR_V16QI,
21140 VOID_FTYPE_PFLOAT_V8SF,
21141 VOID_FTYPE_PFLOAT_V4SF,
21142 VOID_FTYPE_PDOUBLE_V4DF,
21143 VOID_FTYPE_PDOUBLE_V2DF,
21145 VOID_FTYPE_PINT_INT,
21146 VOID_FTYPE_PV8SF_V8SF_V8SF,
21147 VOID_FTYPE_PV4DF_V4DF_V4DF,
21148 VOID_FTYPE_PV4SF_V4SF_V4SF,
21149 VOID_FTYPE_PV2DF_V2DF_V2DF
21152 /* Builtin types */
21153 enum ix86_builtin_type
21156 FLOAT128_FTYPE_FLOAT128,
21158 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21159 INT_FTYPE_V8SF_V8SF_PTEST,
21160 INT_FTYPE_V4DI_V4DI_PTEST,
21161 INT_FTYPE_V4DF_V4DF_PTEST,
21162 INT_FTYPE_V4SF_V4SF_PTEST,
21163 INT_FTYPE_V2DI_V2DI_PTEST,
21164 INT_FTYPE_V2DF_V2DF_PTEST,
21196 V4SF_FTYPE_V4SF_VEC_MERGE,
21205 V2DF_FTYPE_V2DF_VEC_MERGE,
21216 V16QI_FTYPE_V16QI_V16QI,
21217 V16QI_FTYPE_V8HI_V8HI,
21218 V8QI_FTYPE_V8QI_V8QI,
21219 V8QI_FTYPE_V4HI_V4HI,
21220 V8HI_FTYPE_V8HI_V8HI,
21221 V8HI_FTYPE_V8HI_V8HI_COUNT,
21222 V8HI_FTYPE_V16QI_V16QI,
21223 V8HI_FTYPE_V4SI_V4SI,
21224 V8HI_FTYPE_V8HI_SI_COUNT,
21225 V8SF_FTYPE_V8SF_V8SF,
21226 V8SF_FTYPE_V8SF_V8SI,
21227 V4SI_FTYPE_V4SI_V4SI,
21228 V4SI_FTYPE_V4SI_V4SI_COUNT,
21229 V4SI_FTYPE_V8HI_V8HI,
21230 V4SI_FTYPE_V4SF_V4SF,
21231 V4SI_FTYPE_V2DF_V2DF,
21232 V4SI_FTYPE_V4SI_SI_COUNT,
21233 V4HI_FTYPE_V4HI_V4HI,
21234 V4HI_FTYPE_V4HI_V4HI_COUNT,
21235 V4HI_FTYPE_V8QI_V8QI,
21236 V4HI_FTYPE_V2SI_V2SI,
21237 V4HI_FTYPE_V4HI_SI_COUNT,
21238 V4DF_FTYPE_V4DF_V4DF,
21239 V4DF_FTYPE_V4DF_V4DI,
21240 V4SF_FTYPE_V4SF_V4SF,
21241 V4SF_FTYPE_V4SF_V4SF_SWAP,
21242 V4SF_FTYPE_V4SF_V4SI,
21243 V4SF_FTYPE_V4SF_V2SI,
21244 V4SF_FTYPE_V4SF_V2DF,
21245 V4SF_FTYPE_V4SF_DI,
21246 V4SF_FTYPE_V4SF_SI,
21247 V2DI_FTYPE_V2DI_V2DI,
21248 V2DI_FTYPE_V2DI_V2DI_COUNT,
21249 V2DI_FTYPE_V16QI_V16QI,
21250 V2DI_FTYPE_V4SI_V4SI,
21251 V2DI_FTYPE_V2DI_V16QI,
21252 V2DI_FTYPE_V2DF_V2DF,
21253 V2DI_FTYPE_V2DI_SI_COUNT,
21254 V2SI_FTYPE_V2SI_V2SI,
21255 V2SI_FTYPE_V2SI_V2SI_COUNT,
21256 V2SI_FTYPE_V4HI_V4HI,
21257 V2SI_FTYPE_V2SF_V2SF,
21258 V2SI_FTYPE_V2SI_SI_COUNT,
21259 V2DF_FTYPE_V2DF_V2DF,
21260 V2DF_FTYPE_V2DF_V2DF_SWAP,
21261 V2DF_FTYPE_V2DF_V4SF,
21262 V2DF_FTYPE_V2DF_V2DI,
21263 V2DF_FTYPE_V2DF_DI,
21264 V2DF_FTYPE_V2DF_SI,
21265 V2SF_FTYPE_V2SF_V2SF,
21266 V1DI_FTYPE_V1DI_V1DI,
21267 V1DI_FTYPE_V1DI_V1DI_COUNT,
21268 V1DI_FTYPE_V8QI_V8QI,
21269 V1DI_FTYPE_V2SI_V2SI,
21270 V1DI_FTYPE_V1DI_SI_COUNT,
21271 UINT64_FTYPE_UINT64_UINT64,
21272 UINT_FTYPE_UINT_UINT,
21273 UINT_FTYPE_UINT_USHORT,
21274 UINT_FTYPE_UINT_UCHAR,
21275 V8HI_FTYPE_V8HI_INT,
21276 V4SI_FTYPE_V4SI_INT,
21277 V4HI_FTYPE_V4HI_INT,
21278 V8SF_FTYPE_V8SF_INT,
21279 V4SI_FTYPE_V8SI_INT,
21280 V4SF_FTYPE_V8SF_INT,
21281 V2DF_FTYPE_V4DF_INT,
21282 V4DF_FTYPE_V4DF_INT,
21283 V4SF_FTYPE_V4SF_INT,
21284 V2DI_FTYPE_V2DI_INT,
21285 V2DI2TI_FTYPE_V2DI_INT,
21286 V2DF_FTYPE_V2DF_INT,
21287 V16QI_FTYPE_V16QI_V16QI_V16QI,
21288 V8SF_FTYPE_V8SF_V8SF_V8SF,
21289 V4DF_FTYPE_V4DF_V4DF_V4DF,
21290 V4SF_FTYPE_V4SF_V4SF_V4SF,
21291 V2DF_FTYPE_V2DF_V2DF_V2DF,
21292 V16QI_FTYPE_V16QI_V16QI_INT,
21293 V8SI_FTYPE_V8SI_V8SI_INT,
21294 V8SI_FTYPE_V8SI_V4SI_INT,
21295 V8HI_FTYPE_V8HI_V8HI_INT,
21296 V8SF_FTYPE_V8SF_V8SF_INT,
21297 V8SF_FTYPE_V8SF_V4SF_INT,
21298 V4SI_FTYPE_V4SI_V4SI_INT,
21299 V4DF_FTYPE_V4DF_V4DF_INT,
21300 V4DF_FTYPE_V4DF_V2DF_INT,
21301 V4SF_FTYPE_V4SF_V4SF_INT,
21302 V2DI_FTYPE_V2DI_V2DI_INT,
21303 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21304 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21305 V2DF_FTYPE_V2DF_V2DF_INT,
21306 V2DI_FTYPE_V2DI_UINT_UINT,
21307 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21310 /* Special builtins with variable number of arguments. */
21311 static const struct builtin_description bdesc_special_args[] =
21314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21317 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21329 /* SSE or 3DNow!A */
21330 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21331 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21348 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21351 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21354 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21355 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21360 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21376 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21378 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21381 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21390 /* Builtins with variable number of arguments. */
21391 static const struct builtin_description bdesc_args[] =
21394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21397 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21399 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21403 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21404 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21405 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21406 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21408 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21410 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21411 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21413 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21414 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21416 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21418 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21419 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21420 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21421 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21422 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21423 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21425 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21426 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21427 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21428 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21429 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21430 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21432 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21433 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21434 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21436 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21438 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21439 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21440 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21441 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21442 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21443 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21445 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21446 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21447 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21448 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21449 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21450 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21452 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21453 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21454 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21455 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21458 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21459 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21460 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21461 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21463 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21464 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21465 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21466 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21467 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21468 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21469 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21470 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21471 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21472 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21473 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21474 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21475 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21476 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21477 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21480 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21481 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21482 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21483 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21484 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21485 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21490 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21492 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21496 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21497 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21499 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21503 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21504 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21505 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21510 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21512 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21515 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21516 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21517 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21518 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21521 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21522 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21523 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21524 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21525 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21526 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21527 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21528 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21529 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21530 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21531 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21532 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21535 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21536 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21537 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21540 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21541 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21542 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21543 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21545 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21546 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21547 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21548 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21549 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21551 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21552 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21553 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21555 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21557 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21558 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21559 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21561 /* SSE MMX or 3Dnow!A */
21562 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21563 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21564 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21566 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21567 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21568 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21569 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21571 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21572 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21574 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21595 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21596 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21602 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21603 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21605 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21633 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21637 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21639 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21640 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21646 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21648 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21649 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21650 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21651 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21652 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21653 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21654 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21655 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21666 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21667 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21669 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21671 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21672 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21684 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21685 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21686 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21702 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21711 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21716 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21717 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21718 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21719 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21720 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21721 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21724 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21725 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21726 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21727 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21728 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21729 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21731 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21732 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21733 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21734 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21736 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21740 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21742 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21743 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21748 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21749 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21752 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21753 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21755 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21756 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21757 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21758 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21759 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21760 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21763 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21764 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21765 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21766 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21767 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21768 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21770 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21771 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21772 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21773 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21774 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21775 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21776 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21777 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21778 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21779 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21780 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21781 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21782 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21783 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21784 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21785 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21786 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21787 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21788 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21789 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21790 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21791 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21792 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21793 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21796 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21797 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21800 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21801 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21802 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21803 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21804 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21805 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21806 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21807 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21808 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21809 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21811 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21812 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21813 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21814 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21815 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21816 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21817 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21818 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21819 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21820 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21821 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21822 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21823 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21825 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21826 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21827 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21828 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21829 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21830 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21831 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21832 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21833 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21834 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21835 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21836 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21838 /* SSE4.1 and SSE5 */
21839 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21840 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21841 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21842 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21844 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21845 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21846 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21849 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21850 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21851 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21852 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21853 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21856 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21857 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21858 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21859 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21862 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21863 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21865 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21866 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21867 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21868 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21871 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21874 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21875 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21878 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21879 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21882 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21888 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21889 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21890 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21891 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21892 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21893 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21894 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21895 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21896 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21897 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21898 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21899 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21928 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21942 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21945 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21947 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21948 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21949 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21951 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21954 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21956 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21957 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21958 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21959 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21961 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21962 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21963 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21964 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21965 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21966 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21968 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21973 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21974 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21989 enum multi_arg_type {
21999 MULTI_ARG_3_PERMPS,
22000 MULTI_ARG_3_PERMPD,
22007 MULTI_ARG_2_DI_IMM,
22008 MULTI_ARG_2_SI_IMM,
22009 MULTI_ARG_2_HI_IMM,
22010 MULTI_ARG_2_QI_IMM,
22011 MULTI_ARG_2_SF_CMP,
22012 MULTI_ARG_2_DF_CMP,
22013 MULTI_ARG_2_DI_CMP,
22014 MULTI_ARG_2_SI_CMP,
22015 MULTI_ARG_2_HI_CMP,
22016 MULTI_ARG_2_QI_CMP,
22039 static const struct builtin_description bdesc_multi_arg[] =
22041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22129 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22196 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22198 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22199 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22201 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22202 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22209 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22219 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22228 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22229 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22236 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22237 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22238 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22239 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22241 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22242 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22243 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22244 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22245 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22246 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22247 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22249 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22250 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22251 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22252 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22253 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22254 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22255 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22256 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22258 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22259 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22260 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22261 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22263 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22269 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22270 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22277 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22278 in the current target ISA to allow the user to compile particular modules
22279 with different target specific options that differ from the command line
22282 ix86_init_mmx_sse_builtins (void)
22284 const struct builtin_description * d;
22287 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22288 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22289 tree V1DI_type_node
22290 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22291 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22292 tree V2DI_type_node
22293 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22294 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22295 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22296 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22297 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22298 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22299 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22301 tree pchar_type_node = build_pointer_type (char_type_node);
22302 tree pcchar_type_node
22303 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22304 tree pfloat_type_node = build_pointer_type (float_type_node);
22305 tree pcfloat_type_node
22306 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22307 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22308 tree pcv2sf_type_node
22309 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22310 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22311 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22314 tree int_ftype_v4sf_v4sf
22315 = build_function_type_list (integer_type_node,
22316 V4SF_type_node, V4SF_type_node, NULL_TREE);
22317 tree v4si_ftype_v4sf_v4sf
22318 = build_function_type_list (V4SI_type_node,
22319 V4SF_type_node, V4SF_type_node, NULL_TREE);
22320 /* MMX/SSE/integer conversions. */
22321 tree int_ftype_v4sf
22322 = build_function_type_list (integer_type_node,
22323 V4SF_type_node, NULL_TREE);
22324 tree int64_ftype_v4sf
22325 = build_function_type_list (long_long_integer_type_node,
22326 V4SF_type_node, NULL_TREE);
22327 tree int_ftype_v8qi
22328 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22329 tree v4sf_ftype_v4sf_int
22330 = build_function_type_list (V4SF_type_node,
22331 V4SF_type_node, integer_type_node, NULL_TREE);
22332 tree v4sf_ftype_v4sf_int64
22333 = build_function_type_list (V4SF_type_node,
22334 V4SF_type_node, long_long_integer_type_node,
22336 tree v4sf_ftype_v4sf_v2si
22337 = build_function_type_list (V4SF_type_node,
22338 V4SF_type_node, V2SI_type_node, NULL_TREE);
22340 /* Miscellaneous. */
22341 tree v8qi_ftype_v4hi_v4hi
22342 = build_function_type_list (V8QI_type_node,
22343 V4HI_type_node, V4HI_type_node, NULL_TREE);
22344 tree v4hi_ftype_v2si_v2si
22345 = build_function_type_list (V4HI_type_node,
22346 V2SI_type_node, V2SI_type_node, NULL_TREE);
22347 tree v4sf_ftype_v4sf_v4sf_int
22348 = build_function_type_list (V4SF_type_node,
22349 V4SF_type_node, V4SF_type_node,
22350 integer_type_node, NULL_TREE);
22351 tree v2si_ftype_v4hi_v4hi
22352 = build_function_type_list (V2SI_type_node,
22353 V4HI_type_node, V4HI_type_node, NULL_TREE);
22354 tree v4hi_ftype_v4hi_int
22355 = build_function_type_list (V4HI_type_node,
22356 V4HI_type_node, integer_type_node, NULL_TREE);
22357 tree v2si_ftype_v2si_int
22358 = build_function_type_list (V2SI_type_node,
22359 V2SI_type_node, integer_type_node, NULL_TREE);
22360 tree v1di_ftype_v1di_int
22361 = build_function_type_list (V1DI_type_node,
22362 V1DI_type_node, integer_type_node, NULL_TREE);
22364 tree void_ftype_void
22365 = build_function_type (void_type_node, void_list_node);
22366 tree void_ftype_unsigned
22367 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22368 tree void_ftype_unsigned_unsigned
22369 = build_function_type_list (void_type_node, unsigned_type_node,
22370 unsigned_type_node, NULL_TREE);
22371 tree void_ftype_pcvoid_unsigned_unsigned
22372 = build_function_type_list (void_type_node, const_ptr_type_node,
22373 unsigned_type_node, unsigned_type_node,
22375 tree unsigned_ftype_void
22376 = build_function_type (unsigned_type_node, void_list_node);
22377 tree v2si_ftype_v4sf
22378 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22379 /* Loads/stores. */
22380 tree void_ftype_v8qi_v8qi_pchar
22381 = build_function_type_list (void_type_node,
22382 V8QI_type_node, V8QI_type_node,
22383 pchar_type_node, NULL_TREE);
22384 tree v4sf_ftype_pcfloat
22385 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22386 tree v4sf_ftype_v4sf_pcv2sf
22387 = build_function_type_list (V4SF_type_node,
22388 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22389 tree void_ftype_pv2sf_v4sf
22390 = build_function_type_list (void_type_node,
22391 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22392 tree void_ftype_pfloat_v4sf
22393 = build_function_type_list (void_type_node,
22394 pfloat_type_node, V4SF_type_node, NULL_TREE);
22395 tree void_ftype_pdi_di
22396 = build_function_type_list (void_type_node,
22397 pdi_type_node, long_long_unsigned_type_node,
22399 tree void_ftype_pv2di_v2di
22400 = build_function_type_list (void_type_node,
22401 pv2di_type_node, V2DI_type_node, NULL_TREE);
22402 /* Normal vector unops. */
22403 tree v4sf_ftype_v4sf
22404 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22405 tree v16qi_ftype_v16qi
22406 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22407 tree v8hi_ftype_v8hi
22408 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22409 tree v4si_ftype_v4si
22410 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22411 tree v8qi_ftype_v8qi
22412 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22413 tree v4hi_ftype_v4hi
22414 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22416 /* Normal vector binops. */
22417 tree v4sf_ftype_v4sf_v4sf
22418 = build_function_type_list (V4SF_type_node,
22419 V4SF_type_node, V4SF_type_node, NULL_TREE);
22420 tree v8qi_ftype_v8qi_v8qi
22421 = build_function_type_list (V8QI_type_node,
22422 V8QI_type_node, V8QI_type_node, NULL_TREE);
22423 tree v4hi_ftype_v4hi_v4hi
22424 = build_function_type_list (V4HI_type_node,
22425 V4HI_type_node, V4HI_type_node, NULL_TREE);
22426 tree v2si_ftype_v2si_v2si
22427 = build_function_type_list (V2SI_type_node,
22428 V2SI_type_node, V2SI_type_node, NULL_TREE);
22429 tree v1di_ftype_v1di_v1di
22430 = build_function_type_list (V1DI_type_node,
22431 V1DI_type_node, V1DI_type_node, NULL_TREE);
22432 tree v1di_ftype_v1di_v1di_int
22433 = build_function_type_list (V1DI_type_node,
22434 V1DI_type_node, V1DI_type_node,
22435 integer_type_node, NULL_TREE);
22436 tree v2si_ftype_v2sf
22437 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22438 tree v2sf_ftype_v2si
22439 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22440 tree v2si_ftype_v2si
22441 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22442 tree v2sf_ftype_v2sf
22443 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22444 tree v2sf_ftype_v2sf_v2sf
22445 = build_function_type_list (V2SF_type_node,
22446 V2SF_type_node, V2SF_type_node, NULL_TREE);
22447 tree v2si_ftype_v2sf_v2sf
22448 = build_function_type_list (V2SI_type_node,
22449 V2SF_type_node, V2SF_type_node, NULL_TREE);
22450 tree pint_type_node = build_pointer_type (integer_type_node);
22451 tree pdouble_type_node = build_pointer_type (double_type_node);
22452 tree pcdouble_type_node = build_pointer_type (
22453 build_type_variant (double_type_node, 1, 0));
22454 tree int_ftype_v2df_v2df
22455 = build_function_type_list (integer_type_node,
22456 V2DF_type_node, V2DF_type_node, NULL_TREE);
22458 tree void_ftype_pcvoid
22459 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22460 tree v4sf_ftype_v4si
22461 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22462 tree v4si_ftype_v4sf
22463 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22464 tree v2df_ftype_v4si
22465 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22466 tree v4si_ftype_v2df
22467 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22468 tree v4si_ftype_v2df_v2df
22469 = build_function_type_list (V4SI_type_node,
22470 V2DF_type_node, V2DF_type_node, NULL_TREE);
22471 tree v2si_ftype_v2df
22472 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22473 tree v4sf_ftype_v2df
22474 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22475 tree v2df_ftype_v2si
22476 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22477 tree v2df_ftype_v4sf
22478 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22479 tree int_ftype_v2df
22480 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22481 tree int64_ftype_v2df
22482 = build_function_type_list (long_long_integer_type_node,
22483 V2DF_type_node, NULL_TREE);
22484 tree v2df_ftype_v2df_int
22485 = build_function_type_list (V2DF_type_node,
22486 V2DF_type_node, integer_type_node, NULL_TREE);
22487 tree v2df_ftype_v2df_int64
22488 = build_function_type_list (V2DF_type_node,
22489 V2DF_type_node, long_long_integer_type_node,
22491 tree v4sf_ftype_v4sf_v2df
22492 = build_function_type_list (V4SF_type_node,
22493 V4SF_type_node, V2DF_type_node, NULL_TREE);
22494 tree v2df_ftype_v2df_v4sf
22495 = build_function_type_list (V2DF_type_node,
22496 V2DF_type_node, V4SF_type_node, NULL_TREE);
22497 tree v2df_ftype_v2df_v2df_int
22498 = build_function_type_list (V2DF_type_node,
22499 V2DF_type_node, V2DF_type_node,
22502 tree v2df_ftype_v2df_pcdouble
22503 = build_function_type_list (V2DF_type_node,
22504 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22505 tree void_ftype_pdouble_v2df
22506 = build_function_type_list (void_type_node,
22507 pdouble_type_node, V2DF_type_node, NULL_TREE);
22508 tree void_ftype_pint_int
22509 = build_function_type_list (void_type_node,
22510 pint_type_node, integer_type_node, NULL_TREE);
22511 tree void_ftype_v16qi_v16qi_pchar
22512 = build_function_type_list (void_type_node,
22513 V16QI_type_node, V16QI_type_node,
22514 pchar_type_node, NULL_TREE);
22515 tree v2df_ftype_pcdouble
22516 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22517 tree v2df_ftype_v2df_v2df
22518 = build_function_type_list (V2DF_type_node,
22519 V2DF_type_node, V2DF_type_node, NULL_TREE);
22520 tree v16qi_ftype_v16qi_v16qi
22521 = build_function_type_list (V16QI_type_node,
22522 V16QI_type_node, V16QI_type_node, NULL_TREE);
22523 tree v8hi_ftype_v8hi_v8hi
22524 = build_function_type_list (V8HI_type_node,
22525 V8HI_type_node, V8HI_type_node, NULL_TREE);
22526 tree v4si_ftype_v4si_v4si
22527 = build_function_type_list (V4SI_type_node,
22528 V4SI_type_node, V4SI_type_node, NULL_TREE);
22529 tree v2di_ftype_v2di_v2di
22530 = build_function_type_list (V2DI_type_node,
22531 V2DI_type_node, V2DI_type_node, NULL_TREE);
22532 tree v2di_ftype_v2df_v2df
22533 = build_function_type_list (V2DI_type_node,
22534 V2DF_type_node, V2DF_type_node, NULL_TREE);
22535 tree v2df_ftype_v2df
22536 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22537 tree v2di_ftype_v2di_int
22538 = build_function_type_list (V2DI_type_node,
22539 V2DI_type_node, integer_type_node, NULL_TREE);
22540 tree v2di_ftype_v2di_v2di_int
22541 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22542 V2DI_type_node, integer_type_node, NULL_TREE);
22543 tree v4si_ftype_v4si_int
22544 = build_function_type_list (V4SI_type_node,
22545 V4SI_type_node, integer_type_node, NULL_TREE);
22546 tree v8hi_ftype_v8hi_int
22547 = build_function_type_list (V8HI_type_node,
22548 V8HI_type_node, integer_type_node, NULL_TREE);
22549 tree v4si_ftype_v8hi_v8hi
22550 = build_function_type_list (V4SI_type_node,
22551 V8HI_type_node, V8HI_type_node, NULL_TREE);
22552 tree v1di_ftype_v8qi_v8qi
22553 = build_function_type_list (V1DI_type_node,
22554 V8QI_type_node, V8QI_type_node, NULL_TREE);
22555 tree v1di_ftype_v2si_v2si
22556 = build_function_type_list (V1DI_type_node,
22557 V2SI_type_node, V2SI_type_node, NULL_TREE);
22558 tree v2di_ftype_v16qi_v16qi
22559 = build_function_type_list (V2DI_type_node,
22560 V16QI_type_node, V16QI_type_node, NULL_TREE);
22561 tree v2di_ftype_v4si_v4si
22562 = build_function_type_list (V2DI_type_node,
22563 V4SI_type_node, V4SI_type_node, NULL_TREE);
22564 tree int_ftype_v16qi
22565 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22566 tree v16qi_ftype_pcchar
22567 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22568 tree void_ftype_pchar_v16qi
22569 = build_function_type_list (void_type_node,
22570 pchar_type_node, V16QI_type_node, NULL_TREE);
22572 tree v2di_ftype_v2di_unsigned_unsigned
22573 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22574 unsigned_type_node, unsigned_type_node,
22576 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22577 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22578 unsigned_type_node, unsigned_type_node,
22580 tree v2di_ftype_v2di_v16qi
22581 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22583 tree v2df_ftype_v2df_v2df_v2df
22584 = build_function_type_list (V2DF_type_node,
22585 V2DF_type_node, V2DF_type_node,
22586 V2DF_type_node, NULL_TREE);
22587 tree v4sf_ftype_v4sf_v4sf_v4sf
22588 = build_function_type_list (V4SF_type_node,
22589 V4SF_type_node, V4SF_type_node,
22590 V4SF_type_node, NULL_TREE);
22591 tree v8hi_ftype_v16qi
22592 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22594 tree v4si_ftype_v16qi
22595 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22597 tree v2di_ftype_v16qi
22598 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22600 tree v4si_ftype_v8hi
22601 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22603 tree v2di_ftype_v8hi
22604 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22606 tree v2di_ftype_v4si
22607 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22609 tree v2di_ftype_pv2di
22610 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22612 tree v16qi_ftype_v16qi_v16qi_int
22613 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22614 V16QI_type_node, integer_type_node,
22616 tree v16qi_ftype_v16qi_v16qi_v16qi
22617 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22618 V16QI_type_node, V16QI_type_node,
22620 tree v8hi_ftype_v8hi_v8hi_int
22621 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22622 V8HI_type_node, integer_type_node,
22624 tree v4si_ftype_v4si_v4si_int
22625 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22626 V4SI_type_node, integer_type_node,
22628 tree int_ftype_v2di_v2di
22629 = build_function_type_list (integer_type_node,
22630 V2DI_type_node, V2DI_type_node,
22632 tree int_ftype_v16qi_int_v16qi_int_int
22633 = build_function_type_list (integer_type_node,
22640 tree v16qi_ftype_v16qi_int_v16qi_int_int
22641 = build_function_type_list (V16QI_type_node,
22648 tree int_ftype_v16qi_v16qi_int
22649 = build_function_type_list (integer_type_node,
22655 /* SSE5 instructions */
22656 tree v2di_ftype_v2di_v2di_v2di
22657 = build_function_type_list (V2DI_type_node,
22663 tree v4si_ftype_v4si_v4si_v4si
22664 = build_function_type_list (V4SI_type_node,
22670 tree v4si_ftype_v4si_v4si_v2di
22671 = build_function_type_list (V4SI_type_node,
22677 tree v8hi_ftype_v8hi_v8hi_v8hi
22678 = build_function_type_list (V8HI_type_node,
22684 tree v8hi_ftype_v8hi_v8hi_v4si
22685 = build_function_type_list (V8HI_type_node,
22691 tree v2df_ftype_v2df_v2df_v16qi
22692 = build_function_type_list (V2DF_type_node,
22698 tree v4sf_ftype_v4sf_v4sf_v16qi
22699 = build_function_type_list (V4SF_type_node,
22705 tree v2di_ftype_v2di_si
22706 = build_function_type_list (V2DI_type_node,
22711 tree v4si_ftype_v4si_si
22712 = build_function_type_list (V4SI_type_node,
22717 tree v8hi_ftype_v8hi_si
22718 = build_function_type_list (V8HI_type_node,
22723 tree v16qi_ftype_v16qi_si
22724 = build_function_type_list (V16QI_type_node,
22728 tree v4sf_ftype_v4hi
22729 = build_function_type_list (V4SF_type_node,
22733 tree v4hi_ftype_v4sf
22734 = build_function_type_list (V4HI_type_node,
22738 tree v2di_ftype_v2di
22739 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22741 tree v16qi_ftype_v8hi_v8hi
22742 = build_function_type_list (V16QI_type_node,
22743 V8HI_type_node, V8HI_type_node,
22745 tree v8hi_ftype_v4si_v4si
22746 = build_function_type_list (V8HI_type_node,
22747 V4SI_type_node, V4SI_type_node,
22749 tree v8hi_ftype_v16qi_v16qi
22750 = build_function_type_list (V8HI_type_node,
22751 V16QI_type_node, V16QI_type_node,
22753 tree v4hi_ftype_v8qi_v8qi
22754 = build_function_type_list (V4HI_type_node,
22755 V8QI_type_node, V8QI_type_node,
22757 tree unsigned_ftype_unsigned_uchar
22758 = build_function_type_list (unsigned_type_node,
22759 unsigned_type_node,
22760 unsigned_char_type_node,
22762 tree unsigned_ftype_unsigned_ushort
22763 = build_function_type_list (unsigned_type_node,
22764 unsigned_type_node,
22765 short_unsigned_type_node,
22767 tree unsigned_ftype_unsigned_unsigned
22768 = build_function_type_list (unsigned_type_node,
22769 unsigned_type_node,
22770 unsigned_type_node,
22772 tree uint64_ftype_uint64_uint64
22773 = build_function_type_list (long_long_unsigned_type_node,
22774 long_long_unsigned_type_node,
22775 long_long_unsigned_type_node,
22777 tree float_ftype_float
22778 = build_function_type_list (float_type_node,
22783 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22785 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22787 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22789 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22791 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22793 tree v8sf_ftype_v8sf
22794 = build_function_type_list (V8SF_type_node,
22797 tree v8si_ftype_v8sf
22798 = build_function_type_list (V8SI_type_node,
22801 tree v8sf_ftype_v8si
22802 = build_function_type_list (V8SF_type_node,
22805 tree v4si_ftype_v4df
22806 = build_function_type_list (V4SI_type_node,
22809 tree v4df_ftype_v4df
22810 = build_function_type_list (V4DF_type_node,
22813 tree v4df_ftype_v4si
22814 = build_function_type_list (V4DF_type_node,
22817 tree v4df_ftype_v4sf
22818 = build_function_type_list (V4DF_type_node,
22821 tree v4sf_ftype_v4df
22822 = build_function_type_list (V4SF_type_node,
22825 tree v8sf_ftype_v8sf_v8sf
22826 = build_function_type_list (V8SF_type_node,
22827 V8SF_type_node, V8SF_type_node,
22829 tree v4df_ftype_v4df_v4df
22830 = build_function_type_list (V4DF_type_node,
22831 V4DF_type_node, V4DF_type_node,
22833 tree v8sf_ftype_v8sf_int
22834 = build_function_type_list (V8SF_type_node,
22835 V8SF_type_node, integer_type_node,
22837 tree v4si_ftype_v8si_int
22838 = build_function_type_list (V4SI_type_node,
22839 V8SI_type_node, integer_type_node,
22841 tree v4df_ftype_v4df_int
22842 = build_function_type_list (V4DF_type_node,
22843 V4DF_type_node, integer_type_node,
22845 tree v4sf_ftype_v8sf_int
22846 = build_function_type_list (V4SF_type_node,
22847 V8SF_type_node, integer_type_node,
22849 tree v2df_ftype_v4df_int
22850 = build_function_type_list (V2DF_type_node,
22851 V4DF_type_node, integer_type_node,
22853 tree v8sf_ftype_v8sf_v8sf_int
22854 = build_function_type_list (V8SF_type_node,
22855 V8SF_type_node, V8SF_type_node,
22858 tree v8sf_ftype_v8sf_v8sf_v8sf
22859 = build_function_type_list (V8SF_type_node,
22860 V8SF_type_node, V8SF_type_node,
22863 tree v4df_ftype_v4df_v4df_v4df
22864 = build_function_type_list (V4DF_type_node,
22865 V4DF_type_node, V4DF_type_node,
22868 tree v8si_ftype_v8si_v8si_int
22869 = build_function_type_list (V8SI_type_node,
22870 V8SI_type_node, V8SI_type_node,
22873 tree v4df_ftype_v4df_v4df_int
22874 = build_function_type_list (V4DF_type_node,
22875 V4DF_type_node, V4DF_type_node,
22878 tree v8sf_ftype_pcfloat
22879 = build_function_type_list (V8SF_type_node,
22882 tree v4df_ftype_pcdouble
22883 = build_function_type_list (V4DF_type_node,
22884 pcdouble_type_node,
22886 tree pcv4sf_type_node
22887 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22888 tree pcv2df_type_node
22889 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22890 tree v8sf_ftype_pcv4sf
22891 = build_function_type_list (V8SF_type_node,
22894 tree v4df_ftype_pcv2df
22895 = build_function_type_list (V4DF_type_node,
22898 tree v32qi_ftype_pcchar
22899 = build_function_type_list (V32QI_type_node,
22902 tree void_ftype_pchar_v32qi
22903 = build_function_type_list (void_type_node,
22904 pchar_type_node, V32QI_type_node,
22906 tree v8si_ftype_v8si_v4si_int
22907 = build_function_type_list (V8SI_type_node,
22908 V8SI_type_node, V4SI_type_node,
22911 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22912 tree void_ftype_pv4di_v4di
22913 = build_function_type_list (void_type_node,
22914 pv4di_type_node, V4DI_type_node,
22916 tree v8sf_ftype_v8sf_v4sf_int
22917 = build_function_type_list (V8SF_type_node,
22918 V8SF_type_node, V4SF_type_node,
22921 tree v4df_ftype_v4df_v2df_int
22922 = build_function_type_list (V4DF_type_node,
22923 V4DF_type_node, V2DF_type_node,
22926 tree void_ftype_pfloat_v8sf
22927 = build_function_type_list (void_type_node,
22928 pfloat_type_node, V8SF_type_node,
22930 tree void_ftype_pdouble_v4df
22931 = build_function_type_list (void_type_node,
22932 pdouble_type_node, V4DF_type_node,
22934 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22935 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22936 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22937 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22938 tree pcv8sf_type_node
22939 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22940 tree pcv4df_type_node
22941 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22942 tree v8sf_ftype_pcv8sf_v8sf
22943 = build_function_type_list (V8SF_type_node,
22944 pcv8sf_type_node, V8SF_type_node,
22946 tree v4df_ftype_pcv4df_v4df
22947 = build_function_type_list (V4DF_type_node,
22948 pcv4df_type_node, V4DF_type_node,
22950 tree v4sf_ftype_pcv4sf_v4sf
22951 = build_function_type_list (V4SF_type_node,
22952 pcv4sf_type_node, V4SF_type_node,
22954 tree v2df_ftype_pcv2df_v2df
22955 = build_function_type_list (V2DF_type_node,
22956 pcv2df_type_node, V2DF_type_node,
22958 tree void_ftype_pv8sf_v8sf_v8sf
22959 = build_function_type_list (void_type_node,
22960 pv8sf_type_node, V8SF_type_node,
22963 tree void_ftype_pv4df_v4df_v4df
22964 = build_function_type_list (void_type_node,
22965 pv4df_type_node, V4DF_type_node,
22968 tree void_ftype_pv4sf_v4sf_v4sf
22969 = build_function_type_list (void_type_node,
22970 pv4sf_type_node, V4SF_type_node,
22973 tree void_ftype_pv2df_v2df_v2df
22974 = build_function_type_list (void_type_node,
22975 pv2df_type_node, V2DF_type_node,
22978 tree v4df_ftype_v2df
22979 = build_function_type_list (V4DF_type_node,
22982 tree v8sf_ftype_v4sf
22983 = build_function_type_list (V8SF_type_node,
22986 tree v8si_ftype_v4si
22987 = build_function_type_list (V8SI_type_node,
22990 tree v2df_ftype_v4df
22991 = build_function_type_list (V2DF_type_node,
22994 tree v4sf_ftype_v8sf
22995 = build_function_type_list (V4SF_type_node,
22998 tree v4si_ftype_v8si
22999 = build_function_type_list (V4SI_type_node,
23002 tree int_ftype_v4df
23003 = build_function_type_list (integer_type_node,
23006 tree int_ftype_v8sf
23007 = build_function_type_list (integer_type_node,
23010 tree int_ftype_v8sf_v8sf
23011 = build_function_type_list (integer_type_node,
23012 V8SF_type_node, V8SF_type_node,
23014 tree int_ftype_v4di_v4di
23015 = build_function_type_list (integer_type_node,
23016 V4DI_type_node, V4DI_type_node,
23018 tree int_ftype_v4df_v4df
23019 = build_function_type_list (integer_type_node,
23020 V4DF_type_node, V4DF_type_node,
23022 tree v8sf_ftype_v8sf_v8si
23023 = build_function_type_list (V8SF_type_node,
23024 V8SF_type_node, V8SI_type_node,
23026 tree v4df_ftype_v4df_v4di
23027 = build_function_type_list (V4DF_type_node,
23028 V4DF_type_node, V4DI_type_node,
23030 tree v4sf_ftype_v4sf_v4si
23031 = build_function_type_list (V4SF_type_node,
23032 V4SF_type_node, V4SI_type_node, NULL_TREE);
23033 tree v2df_ftype_v2df_v2di
23034 = build_function_type_list (V2DF_type_node,
23035 V2DF_type_node, V2DI_type_node, NULL_TREE);
23039 /* Add all special builtins with variable number of operands. */
23040 for (i = 0, d = bdesc_special_args;
23041 i < ARRAY_SIZE (bdesc_special_args);
23049 switch ((enum ix86_special_builtin_type) d->flag)
23051 case VOID_FTYPE_VOID:
23052 type = void_ftype_void;
23054 case V32QI_FTYPE_PCCHAR:
23055 type = v32qi_ftype_pcchar;
23057 case V16QI_FTYPE_PCCHAR:
23058 type = v16qi_ftype_pcchar;
23060 case V8SF_FTYPE_PCV4SF:
23061 type = v8sf_ftype_pcv4sf;
23063 case V8SF_FTYPE_PCFLOAT:
23064 type = v8sf_ftype_pcfloat;
23066 case V4DF_FTYPE_PCV2DF:
23067 type = v4df_ftype_pcv2df;
23069 case V4DF_FTYPE_PCDOUBLE:
23070 type = v4df_ftype_pcdouble;
23072 case V4SF_FTYPE_PCFLOAT:
23073 type = v4sf_ftype_pcfloat;
23075 case V2DI_FTYPE_PV2DI:
23076 type = v2di_ftype_pv2di;
23078 case V2DF_FTYPE_PCDOUBLE:
23079 type = v2df_ftype_pcdouble;
23081 case V8SF_FTYPE_PCV8SF_V8SF:
23082 type = v8sf_ftype_pcv8sf_v8sf;
23084 case V4DF_FTYPE_PCV4DF_V4DF:
23085 type = v4df_ftype_pcv4df_v4df;
23087 case V4SF_FTYPE_V4SF_PCV2SF:
23088 type = v4sf_ftype_v4sf_pcv2sf;
23090 case V4SF_FTYPE_PCV4SF_V4SF:
23091 type = v4sf_ftype_pcv4sf_v4sf;
23093 case V2DF_FTYPE_V2DF_PCDOUBLE:
23094 type = v2df_ftype_v2df_pcdouble;
23096 case V2DF_FTYPE_PCV2DF_V2DF:
23097 type = v2df_ftype_pcv2df_v2df;
23099 case VOID_FTYPE_PV2SF_V4SF:
23100 type = void_ftype_pv2sf_v4sf;
23102 case VOID_FTYPE_PV4DI_V4DI:
23103 type = void_ftype_pv4di_v4di;
23105 case VOID_FTYPE_PV2DI_V2DI:
23106 type = void_ftype_pv2di_v2di;
23108 case VOID_FTYPE_PCHAR_V32QI:
23109 type = void_ftype_pchar_v32qi;
23111 case VOID_FTYPE_PCHAR_V16QI:
23112 type = void_ftype_pchar_v16qi;
23114 case VOID_FTYPE_PFLOAT_V8SF:
23115 type = void_ftype_pfloat_v8sf;
23117 case VOID_FTYPE_PFLOAT_V4SF:
23118 type = void_ftype_pfloat_v4sf;
23120 case VOID_FTYPE_PDOUBLE_V4DF:
23121 type = void_ftype_pdouble_v4df;
23123 case VOID_FTYPE_PDOUBLE_V2DF:
23124 type = void_ftype_pdouble_v2df;
23126 case VOID_FTYPE_PDI_DI:
23127 type = void_ftype_pdi_di;
23129 case VOID_FTYPE_PINT_INT:
23130 type = void_ftype_pint_int;
23132 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23133 type = void_ftype_pv8sf_v8sf_v8sf;
23135 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23136 type = void_ftype_pv4df_v4df_v4df;
23138 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23139 type = void_ftype_pv4sf_v4sf_v4sf;
23141 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23142 type = void_ftype_pv2df_v2df_v2df;
23145 gcc_unreachable ();
23148 def_builtin (d->mask, d->name, type, d->code);
23151 /* Add all builtins with variable number of operands. */
23152 for (i = 0, d = bdesc_args;
23153 i < ARRAY_SIZE (bdesc_args);
23161 switch ((enum ix86_builtin_type) d->flag)
23163 case FLOAT_FTYPE_FLOAT:
23164 type = float_ftype_float;
23166 case INT_FTYPE_V8SF_V8SF_PTEST:
23167 type = int_ftype_v8sf_v8sf;
23169 case INT_FTYPE_V4DI_V4DI_PTEST:
23170 type = int_ftype_v4di_v4di;
23172 case INT_FTYPE_V4DF_V4DF_PTEST:
23173 type = int_ftype_v4df_v4df;
23175 case INT_FTYPE_V4SF_V4SF_PTEST:
23176 type = int_ftype_v4sf_v4sf;
23178 case INT_FTYPE_V2DI_V2DI_PTEST:
23179 type = int_ftype_v2di_v2di;
23181 case INT_FTYPE_V2DF_V2DF_PTEST:
23182 type = int_ftype_v2df_v2df;
23184 case INT64_FTYPE_V4SF:
23185 type = int64_ftype_v4sf;
23187 case INT64_FTYPE_V2DF:
23188 type = int64_ftype_v2df;
23190 case INT_FTYPE_V16QI:
23191 type = int_ftype_v16qi;
23193 case INT_FTYPE_V8QI:
23194 type = int_ftype_v8qi;
23196 case INT_FTYPE_V8SF:
23197 type = int_ftype_v8sf;
23199 case INT_FTYPE_V4DF:
23200 type = int_ftype_v4df;
23202 case INT_FTYPE_V4SF:
23203 type = int_ftype_v4sf;
23205 case INT_FTYPE_V2DF:
23206 type = int_ftype_v2df;
23208 case V16QI_FTYPE_V16QI:
23209 type = v16qi_ftype_v16qi;
23211 case V8SI_FTYPE_V8SF:
23212 type = v8si_ftype_v8sf;
23214 case V8SI_FTYPE_V4SI:
23215 type = v8si_ftype_v4si;
23217 case V8HI_FTYPE_V8HI:
23218 type = v8hi_ftype_v8hi;
23220 case V8HI_FTYPE_V16QI:
23221 type = v8hi_ftype_v16qi;
23223 case V8QI_FTYPE_V8QI:
23224 type = v8qi_ftype_v8qi;
23226 case V8SF_FTYPE_V8SF:
23227 type = v8sf_ftype_v8sf;
23229 case V8SF_FTYPE_V8SI:
23230 type = v8sf_ftype_v8si;
23232 case V8SF_FTYPE_V4SF:
23233 type = v8sf_ftype_v4sf;
23235 case V4SI_FTYPE_V4DF:
23236 type = v4si_ftype_v4df;
23238 case V4SI_FTYPE_V4SI:
23239 type = v4si_ftype_v4si;
23241 case V4SI_FTYPE_V16QI:
23242 type = v4si_ftype_v16qi;
23244 case V4SI_FTYPE_V8SI:
23245 type = v4si_ftype_v8si;
23247 case V4SI_FTYPE_V8HI:
23248 type = v4si_ftype_v8hi;
23250 case V4SI_FTYPE_V4SF:
23251 type = v4si_ftype_v4sf;
23253 case V4SI_FTYPE_V2DF:
23254 type = v4si_ftype_v2df;
23256 case V4HI_FTYPE_V4HI:
23257 type = v4hi_ftype_v4hi;
23259 case V4DF_FTYPE_V4DF:
23260 type = v4df_ftype_v4df;
23262 case V4DF_FTYPE_V4SI:
23263 type = v4df_ftype_v4si;
23265 case V4DF_FTYPE_V4SF:
23266 type = v4df_ftype_v4sf;
23268 case V4DF_FTYPE_V2DF:
23269 type = v4df_ftype_v2df;
23271 case V4SF_FTYPE_V4SF:
23272 case V4SF_FTYPE_V4SF_VEC_MERGE:
23273 type = v4sf_ftype_v4sf;
23275 case V4SF_FTYPE_V8SF:
23276 type = v4sf_ftype_v8sf;
23278 case V4SF_FTYPE_V4SI:
23279 type = v4sf_ftype_v4si;
23281 case V4SF_FTYPE_V4DF:
23282 type = v4sf_ftype_v4df;
23284 case V4SF_FTYPE_V2DF:
23285 type = v4sf_ftype_v2df;
23287 case V2DI_FTYPE_V2DI:
23288 type = v2di_ftype_v2di;
23290 case V2DI_FTYPE_V16QI:
23291 type = v2di_ftype_v16qi;
23293 case V2DI_FTYPE_V8HI:
23294 type = v2di_ftype_v8hi;
23296 case V2DI_FTYPE_V4SI:
23297 type = v2di_ftype_v4si;
23299 case V2SI_FTYPE_V2SI:
23300 type = v2si_ftype_v2si;
23302 case V2SI_FTYPE_V4SF:
23303 type = v2si_ftype_v4sf;
23305 case V2SI_FTYPE_V2DF:
23306 type = v2si_ftype_v2df;
23308 case V2SI_FTYPE_V2SF:
23309 type = v2si_ftype_v2sf;
23311 case V2DF_FTYPE_V4DF:
23312 type = v2df_ftype_v4df;
23314 case V2DF_FTYPE_V4SF:
23315 type = v2df_ftype_v4sf;
23317 case V2DF_FTYPE_V2DF:
23318 case V2DF_FTYPE_V2DF_VEC_MERGE:
23319 type = v2df_ftype_v2df;
23321 case V2DF_FTYPE_V2SI:
23322 type = v2df_ftype_v2si;
23324 case V2DF_FTYPE_V4SI:
23325 type = v2df_ftype_v4si;
23327 case V2SF_FTYPE_V2SF:
23328 type = v2sf_ftype_v2sf;
23330 case V2SF_FTYPE_V2SI:
23331 type = v2sf_ftype_v2si;
23333 case V16QI_FTYPE_V16QI_V16QI:
23334 type = v16qi_ftype_v16qi_v16qi;
23336 case V16QI_FTYPE_V8HI_V8HI:
23337 type = v16qi_ftype_v8hi_v8hi;
23339 case V8QI_FTYPE_V8QI_V8QI:
23340 type = v8qi_ftype_v8qi_v8qi;
23342 case V8QI_FTYPE_V4HI_V4HI:
23343 type = v8qi_ftype_v4hi_v4hi;
23345 case V8HI_FTYPE_V8HI_V8HI:
23346 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23347 type = v8hi_ftype_v8hi_v8hi;
23349 case V8HI_FTYPE_V16QI_V16QI:
23350 type = v8hi_ftype_v16qi_v16qi;
23352 case V8HI_FTYPE_V4SI_V4SI:
23353 type = v8hi_ftype_v4si_v4si;
23355 case V8HI_FTYPE_V8HI_SI_COUNT:
23356 type = v8hi_ftype_v8hi_int;
23358 case V8SF_FTYPE_V8SF_V8SF:
23359 type = v8sf_ftype_v8sf_v8sf;
23361 case V8SF_FTYPE_V8SF_V8SI:
23362 type = v8sf_ftype_v8sf_v8si;
23364 case V4SI_FTYPE_V4SI_V4SI:
23365 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23366 type = v4si_ftype_v4si_v4si;
23368 case V4SI_FTYPE_V8HI_V8HI:
23369 type = v4si_ftype_v8hi_v8hi;
23371 case V4SI_FTYPE_V4SF_V4SF:
23372 type = v4si_ftype_v4sf_v4sf;
23374 case V4SI_FTYPE_V2DF_V2DF:
23375 type = v4si_ftype_v2df_v2df;
23377 case V4SI_FTYPE_V4SI_SI_COUNT:
23378 type = v4si_ftype_v4si_int;
23380 case V4HI_FTYPE_V4HI_V4HI:
23381 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23382 type = v4hi_ftype_v4hi_v4hi;
23384 case V4HI_FTYPE_V8QI_V8QI:
23385 type = v4hi_ftype_v8qi_v8qi;
23387 case V4HI_FTYPE_V2SI_V2SI:
23388 type = v4hi_ftype_v2si_v2si;
23390 case V4HI_FTYPE_V4HI_SI_COUNT:
23391 type = v4hi_ftype_v4hi_int;
23393 case V4DF_FTYPE_V4DF_V4DF:
23394 type = v4df_ftype_v4df_v4df;
23396 case V4DF_FTYPE_V4DF_V4DI:
23397 type = v4df_ftype_v4df_v4di;
23399 case V4SF_FTYPE_V4SF_V4SF:
23400 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23401 type = v4sf_ftype_v4sf_v4sf;
23403 case V4SF_FTYPE_V4SF_V4SI:
23404 type = v4sf_ftype_v4sf_v4si;
23406 case V4SF_FTYPE_V4SF_V2SI:
23407 type = v4sf_ftype_v4sf_v2si;
23409 case V4SF_FTYPE_V4SF_V2DF:
23410 type = v4sf_ftype_v4sf_v2df;
23412 case V4SF_FTYPE_V4SF_DI:
23413 type = v4sf_ftype_v4sf_int64;
23415 case V4SF_FTYPE_V4SF_SI:
23416 type = v4sf_ftype_v4sf_int;
23418 case V2DI_FTYPE_V2DI_V2DI:
23419 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23420 type = v2di_ftype_v2di_v2di;
23422 case V2DI_FTYPE_V16QI_V16QI:
23423 type = v2di_ftype_v16qi_v16qi;
23425 case V2DI_FTYPE_V4SI_V4SI:
23426 type = v2di_ftype_v4si_v4si;
23428 case V2DI_FTYPE_V2DI_V16QI:
23429 type = v2di_ftype_v2di_v16qi;
23431 case V2DI_FTYPE_V2DF_V2DF:
23432 type = v2di_ftype_v2df_v2df;
23434 case V2DI_FTYPE_V2DI_SI_COUNT:
23435 type = v2di_ftype_v2di_int;
23437 case V2SI_FTYPE_V2SI_V2SI:
23438 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23439 type = v2si_ftype_v2si_v2si;
23441 case V2SI_FTYPE_V4HI_V4HI:
23442 type = v2si_ftype_v4hi_v4hi;
23444 case V2SI_FTYPE_V2SF_V2SF:
23445 type = v2si_ftype_v2sf_v2sf;
23447 case V2SI_FTYPE_V2SI_SI_COUNT:
23448 type = v2si_ftype_v2si_int;
23450 case V2DF_FTYPE_V2DF_V2DF:
23451 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23452 type = v2df_ftype_v2df_v2df;
23454 case V2DF_FTYPE_V2DF_V4SF:
23455 type = v2df_ftype_v2df_v4sf;
23457 case V2DF_FTYPE_V2DF_V2DI:
23458 type = v2df_ftype_v2df_v2di;
23460 case V2DF_FTYPE_V2DF_DI:
23461 type = v2df_ftype_v2df_int64;
23463 case V2DF_FTYPE_V2DF_SI:
23464 type = v2df_ftype_v2df_int;
23466 case V2SF_FTYPE_V2SF_V2SF:
23467 type = v2sf_ftype_v2sf_v2sf;
23469 case V1DI_FTYPE_V1DI_V1DI:
23470 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23471 type = v1di_ftype_v1di_v1di;
23473 case V1DI_FTYPE_V8QI_V8QI:
23474 type = v1di_ftype_v8qi_v8qi;
23476 case V1DI_FTYPE_V2SI_V2SI:
23477 type = v1di_ftype_v2si_v2si;
23479 case V1DI_FTYPE_V1DI_SI_COUNT:
23480 type = v1di_ftype_v1di_int;
23482 case UINT64_FTYPE_UINT64_UINT64:
23483 type = uint64_ftype_uint64_uint64;
23485 case UINT_FTYPE_UINT_UINT:
23486 type = unsigned_ftype_unsigned_unsigned;
23488 case UINT_FTYPE_UINT_USHORT:
23489 type = unsigned_ftype_unsigned_ushort;
23491 case UINT_FTYPE_UINT_UCHAR:
23492 type = unsigned_ftype_unsigned_uchar;
23494 case V8HI_FTYPE_V8HI_INT:
23495 type = v8hi_ftype_v8hi_int;
23497 case V8SF_FTYPE_V8SF_INT:
23498 type = v8sf_ftype_v8sf_int;
23500 case V4SI_FTYPE_V4SI_INT:
23501 type = v4si_ftype_v4si_int;
23503 case V4SI_FTYPE_V8SI_INT:
23504 type = v4si_ftype_v8si_int;
23506 case V4HI_FTYPE_V4HI_INT:
23507 type = v4hi_ftype_v4hi_int;
23509 case V4DF_FTYPE_V4DF_INT:
23510 type = v4df_ftype_v4df_int;
23512 case V4SF_FTYPE_V4SF_INT:
23513 type = v4sf_ftype_v4sf_int;
23515 case V4SF_FTYPE_V8SF_INT:
23516 type = v4sf_ftype_v8sf_int;
23518 case V2DI_FTYPE_V2DI_INT:
23519 case V2DI2TI_FTYPE_V2DI_INT:
23520 type = v2di_ftype_v2di_int;
23522 case V2DF_FTYPE_V2DF_INT:
23523 type = v2df_ftype_v2df_int;
23525 case V2DF_FTYPE_V4DF_INT:
23526 type = v2df_ftype_v4df_int;
23528 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23529 type = v16qi_ftype_v16qi_v16qi_v16qi;
23531 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23532 type = v8sf_ftype_v8sf_v8sf_v8sf;
23534 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23535 type = v4df_ftype_v4df_v4df_v4df;
23537 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23538 type = v4sf_ftype_v4sf_v4sf_v4sf;
23540 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23541 type = v2df_ftype_v2df_v2df_v2df;
23543 case V16QI_FTYPE_V16QI_V16QI_INT:
23544 type = v16qi_ftype_v16qi_v16qi_int;
23546 case V8SI_FTYPE_V8SI_V8SI_INT:
23547 type = v8si_ftype_v8si_v8si_int;
23549 case V8SI_FTYPE_V8SI_V4SI_INT:
23550 type = v8si_ftype_v8si_v4si_int;
23552 case V8HI_FTYPE_V8HI_V8HI_INT:
23553 type = v8hi_ftype_v8hi_v8hi_int;
23555 case V8SF_FTYPE_V8SF_V8SF_INT:
23556 type = v8sf_ftype_v8sf_v8sf_int;
23558 case V8SF_FTYPE_V8SF_V4SF_INT:
23559 type = v8sf_ftype_v8sf_v4sf_int;
23561 case V4SI_FTYPE_V4SI_V4SI_INT:
23562 type = v4si_ftype_v4si_v4si_int;
23564 case V4DF_FTYPE_V4DF_V4DF_INT:
23565 type = v4df_ftype_v4df_v4df_int;
23567 case V4DF_FTYPE_V4DF_V2DF_INT:
23568 type = v4df_ftype_v4df_v2df_int;
23570 case V4SF_FTYPE_V4SF_V4SF_INT:
23571 type = v4sf_ftype_v4sf_v4sf_int;
23573 case V2DI_FTYPE_V2DI_V2DI_INT:
23574 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23575 type = v2di_ftype_v2di_v2di_int;
23577 case V2DF_FTYPE_V2DF_V2DF_INT:
23578 type = v2df_ftype_v2df_v2df_int;
23580 case V2DI_FTYPE_V2DI_UINT_UINT:
23581 type = v2di_ftype_v2di_unsigned_unsigned;
23583 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23584 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23586 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23587 type = v1di_ftype_v1di_v1di_int;
23590 gcc_unreachable ();
23593 def_builtin_const (d->mask, d->name, type, d->code);
23596 /* pcmpestr[im] insns. */
23597 for (i = 0, d = bdesc_pcmpestr;
23598 i < ARRAY_SIZE (bdesc_pcmpestr);
23601 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23602 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23604 ftype = int_ftype_v16qi_int_v16qi_int_int;
23605 def_builtin_const (d->mask, d->name, ftype, d->code);
23608 /* pcmpistr[im] insns. */
23609 for (i = 0, d = bdesc_pcmpistr;
23610 i < ARRAY_SIZE (bdesc_pcmpistr);
23613 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23614 ftype = v16qi_ftype_v16qi_v16qi_int;
23616 ftype = int_ftype_v16qi_v16qi_int;
23617 def_builtin_const (d->mask, d->name, ftype, d->code);
23620 /* comi/ucomi insns. */
23621 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23622 if (d->mask == OPTION_MASK_ISA_SSE2)
23623 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23625 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23628 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23629 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23631 /* SSE or 3DNow!A */
23632 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23635 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23637 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23638 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23641 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23642 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23645 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23646 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23647 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23648 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23649 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23650 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23653 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23656 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23657 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23659 /* Access to the vec_init patterns. */
23660 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23661 integer_type_node, NULL_TREE);
23662 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23664 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23665 short_integer_type_node,
23666 short_integer_type_node,
23667 short_integer_type_node, NULL_TREE);
23668 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23670 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23671 char_type_node, char_type_node,
23672 char_type_node, char_type_node,
23673 char_type_node, char_type_node,
23674 char_type_node, NULL_TREE);
23675 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23677 /* Access to the vec_extract patterns. */
23678 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23679 integer_type_node, NULL_TREE);
23680 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23682 ftype = build_function_type_list (long_long_integer_type_node,
23683 V2DI_type_node, integer_type_node,
23685 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23687 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23688 integer_type_node, NULL_TREE);
23689 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23691 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23692 integer_type_node, NULL_TREE);
23693 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23695 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23696 integer_type_node, NULL_TREE);
23697 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23699 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23700 integer_type_node, NULL_TREE);
23701 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23703 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23704 integer_type_node, NULL_TREE);
23705 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23707 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23708 integer_type_node, NULL_TREE);
23709 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23711 /* Access to the vec_set patterns. */
23712 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23714 integer_type_node, NULL_TREE);
23715 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23717 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23719 integer_type_node, NULL_TREE);
23720 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23722 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23724 integer_type_node, NULL_TREE);
23725 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23727 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23729 integer_type_node, NULL_TREE);
23730 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23732 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23734 integer_type_node, NULL_TREE);
23735 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23737 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23739 integer_type_node, NULL_TREE);
23740 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23742 /* Add SSE5 multi-arg argument instructions */
23743 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23745 tree mtype = NULL_TREE;
23750 switch ((enum multi_arg_type)d->flag)
23752 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23753 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23754 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23755 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23756 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23757 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23758 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23759 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23760 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23761 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23762 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23763 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23764 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23765 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23766 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23767 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23768 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23769 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23770 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23771 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23772 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23773 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23774 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23775 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23776 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23777 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23778 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23779 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23780 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23781 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23782 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23783 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23784 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23785 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23786 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23787 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23788 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23789 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23790 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23791 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23792 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23793 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23794 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23795 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23796 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23797 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23798 case MULTI_ARG_UNKNOWN:
23800 gcc_unreachable ();
23804 def_builtin_const (d->mask, d->name, mtype, d->code);
23808 /* Internal method for ix86_init_builtins. */
23811 ix86_init_builtins_va_builtins_abi (void)
23813 tree ms_va_ref, sysv_va_ref;
23814 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23815 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23816 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23817 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23821 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23822 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23823 ms_va_ref = build_reference_type (ms_va_list_type_node);
23825 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23828 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23829 fnvoid_va_start_ms =
23830 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23831 fnvoid_va_end_sysv =
23832 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23833 fnvoid_va_start_sysv =
23834 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23836 fnvoid_va_copy_ms =
23837 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23839 fnvoid_va_copy_sysv =
23840 build_function_type_list (void_type_node, sysv_va_ref,
23841 sysv_va_ref, NULL_TREE);
23843 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23844 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23845 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23846 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23847 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23848 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23849 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23850 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23851 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23852 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23853 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23854 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23858 ix86_init_builtins (void)
23860 tree float128_type_node = make_node (REAL_TYPE);
23863 /* The __float80 type. */
23864 if (TYPE_MODE (long_double_type_node) == XFmode)
23865 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23869 /* The __float80 type. */
23870 tree float80_type_node = make_node (REAL_TYPE);
23872 TYPE_PRECISION (float80_type_node) = 80;
23873 layout_type (float80_type_node);
23874 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23878 /* The __float128 type. */
23879 TYPE_PRECISION (float128_type_node) = 128;
23880 layout_type (float128_type_node);
23881 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23884 /* TFmode support builtins. */
23885 ftype = build_function_type (float128_type_node, void_list_node);
23886 decl = add_builtin_function ("__builtin_infq", ftype,
23887 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23889 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23891 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23892 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23894 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23896 /* We will expand them to normal call if SSE2 isn't available since
23897 they are used by libgcc. */
23898 ftype = build_function_type_list (float128_type_node,
23899 float128_type_node,
23901 decl = add_builtin_function ("__builtin_fabsq", ftype,
23902 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23903 "__fabstf2", NULL_TREE);
23904 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23905 TREE_READONLY (decl) = 1;
23907 ftype = build_function_type_list (float128_type_node,
23908 float128_type_node,
23909 float128_type_node,
23911 decl = add_builtin_function ("__builtin_copysignq", ftype,
23912 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23913 "__copysigntf3", NULL_TREE);
23914 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23915 TREE_READONLY (decl) = 1;
23917 ix86_init_mmx_sse_builtins ();
23919 ix86_init_builtins_va_builtins_abi ();
23922 /* Errors in the source file can cause expand_expr to return const0_rtx
23923 where we expect a vector. To avoid crashing, use one of the vector
23924 clear instructions. */
23926 safe_vector_operand (rtx x, enum machine_mode mode)
23928 if (x == const0_rtx)
23929 x = CONST0_RTX (mode);
23933 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23936 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23939 tree arg0 = CALL_EXPR_ARG (exp, 0);
23940 tree arg1 = CALL_EXPR_ARG (exp, 1);
23941 rtx op0 = expand_normal (arg0);
23942 rtx op1 = expand_normal (arg1);
23943 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23944 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23945 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23947 if (VECTOR_MODE_P (mode0))
23948 op0 = safe_vector_operand (op0, mode0);
23949 if (VECTOR_MODE_P (mode1))
23950 op1 = safe_vector_operand (op1, mode1);
23952 if (optimize || !target
23953 || GET_MODE (target) != tmode
23954 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23955 target = gen_reg_rtx (tmode);
23957 if (GET_MODE (op1) == SImode && mode1 == TImode)
23959 rtx x = gen_reg_rtx (V4SImode);
23960 emit_insn (gen_sse2_loadd (x, op1));
23961 op1 = gen_lowpart (TImode, x);
23964 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23965 op0 = copy_to_mode_reg (mode0, op0);
23966 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23967 op1 = copy_to_mode_reg (mode1, op1);
23969 pat = GEN_FCN (icode) (target, op0, op1);
23978 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23981 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23982 enum multi_arg_type m_type,
23983 enum rtx_code sub_code)
23988 bool comparison_p = false;
23990 bool last_arg_constant = false;
23991 int num_memory = 0;
23994 enum machine_mode mode;
23997 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24001 case MULTI_ARG_3_SF:
24002 case MULTI_ARG_3_DF:
24003 case MULTI_ARG_3_DI:
24004 case MULTI_ARG_3_SI:
24005 case MULTI_ARG_3_SI_DI:
24006 case MULTI_ARG_3_HI:
24007 case MULTI_ARG_3_HI_SI:
24008 case MULTI_ARG_3_QI:
24009 case MULTI_ARG_3_PERMPS:
24010 case MULTI_ARG_3_PERMPD:
24014 case MULTI_ARG_2_SF:
24015 case MULTI_ARG_2_DF:
24016 case MULTI_ARG_2_DI:
24017 case MULTI_ARG_2_SI:
24018 case MULTI_ARG_2_HI:
24019 case MULTI_ARG_2_QI:
24023 case MULTI_ARG_2_DI_IMM:
24024 case MULTI_ARG_2_SI_IMM:
24025 case MULTI_ARG_2_HI_IMM:
24026 case MULTI_ARG_2_QI_IMM:
24028 last_arg_constant = true;
24031 case MULTI_ARG_1_SF:
24032 case MULTI_ARG_1_DF:
24033 case MULTI_ARG_1_DI:
24034 case MULTI_ARG_1_SI:
24035 case MULTI_ARG_1_HI:
24036 case MULTI_ARG_1_QI:
24037 case MULTI_ARG_1_SI_DI:
24038 case MULTI_ARG_1_HI_DI:
24039 case MULTI_ARG_1_HI_SI:
24040 case MULTI_ARG_1_QI_DI:
24041 case MULTI_ARG_1_QI_SI:
24042 case MULTI_ARG_1_QI_HI:
24043 case MULTI_ARG_1_PH2PS:
24044 case MULTI_ARG_1_PS2PH:
24048 case MULTI_ARG_2_SF_CMP:
24049 case MULTI_ARG_2_DF_CMP:
24050 case MULTI_ARG_2_DI_CMP:
24051 case MULTI_ARG_2_SI_CMP:
24052 case MULTI_ARG_2_HI_CMP:
24053 case MULTI_ARG_2_QI_CMP:
24055 comparison_p = true;
24058 case MULTI_ARG_2_SF_TF:
24059 case MULTI_ARG_2_DF_TF:
24060 case MULTI_ARG_2_DI_TF:
24061 case MULTI_ARG_2_SI_TF:
24062 case MULTI_ARG_2_HI_TF:
24063 case MULTI_ARG_2_QI_TF:
24068 case MULTI_ARG_UNKNOWN:
24070 gcc_unreachable ();
24073 if (optimize || !target
24074 || GET_MODE (target) != tmode
24075 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24076 target = gen_reg_rtx (tmode);
24078 gcc_assert (nargs <= 4);
24080 for (i = 0; i < nargs; i++)
24082 tree arg = CALL_EXPR_ARG (exp, i);
24083 rtx op = expand_normal (arg);
24084 int adjust = (comparison_p) ? 1 : 0;
24085 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24087 if (last_arg_constant && i == nargs-1)
24089 if (!CONST_INT_P (op))
24091 error ("last argument must be an immediate");
24092 return gen_reg_rtx (tmode);
24097 if (VECTOR_MODE_P (mode))
24098 op = safe_vector_operand (op, mode);
24100 /* If we aren't optimizing, only allow one memory operand to be
24102 if (memory_operand (op, mode))
24105 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24108 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24110 op = force_reg (mode, op);
24114 args[i].mode = mode;
24120 pat = GEN_FCN (icode) (target, args[0].op);
24125 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24126 GEN_INT ((int)sub_code));
24127 else if (! comparison_p)
24128 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24131 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24135 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24140 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24144 gcc_unreachable ();
24154 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24155 insns with vec_merge. */
24158 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24162 tree arg0 = CALL_EXPR_ARG (exp, 0);
24163 rtx op1, op0 = expand_normal (arg0);
24164 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24165 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24167 if (optimize || !target
24168 || GET_MODE (target) != tmode
24169 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24170 target = gen_reg_rtx (tmode);
24172 if (VECTOR_MODE_P (mode0))
24173 op0 = safe_vector_operand (op0, mode0);
24175 if ((optimize && !register_operand (op0, mode0))
24176 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24177 op0 = copy_to_mode_reg (mode0, op0);
24180 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24181 op1 = copy_to_mode_reg (mode0, op1);
24183 pat = GEN_FCN (icode) (target, op0, op1);
24190 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24193 ix86_expand_sse_compare (const struct builtin_description *d,
24194 tree exp, rtx target, bool swap)
24197 tree arg0 = CALL_EXPR_ARG (exp, 0);
24198 tree arg1 = CALL_EXPR_ARG (exp, 1);
24199 rtx op0 = expand_normal (arg0);
24200 rtx op1 = expand_normal (arg1);
24202 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24203 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24204 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24205 enum rtx_code comparison = d->comparison;
24207 if (VECTOR_MODE_P (mode0))
24208 op0 = safe_vector_operand (op0, mode0);
24209 if (VECTOR_MODE_P (mode1))
24210 op1 = safe_vector_operand (op1, mode1);
24212 /* Swap operands if we have a comparison that isn't available in
24216 rtx tmp = gen_reg_rtx (mode1);
24217 emit_move_insn (tmp, op1);
24222 if (optimize || !target
24223 || GET_MODE (target) != tmode
24224 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24225 target = gen_reg_rtx (tmode);
24227 if ((optimize && !register_operand (op0, mode0))
24228 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24229 op0 = copy_to_mode_reg (mode0, op0);
24230 if ((optimize && !register_operand (op1, mode1))
24231 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24232 op1 = copy_to_mode_reg (mode1, op1);
24234 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24235 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24242 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24245 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24249 tree arg0 = CALL_EXPR_ARG (exp, 0);
24250 tree arg1 = CALL_EXPR_ARG (exp, 1);
24251 rtx op0 = expand_normal (arg0);
24252 rtx op1 = expand_normal (arg1);
24253 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24254 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24255 enum rtx_code comparison = d->comparison;
24257 if (VECTOR_MODE_P (mode0))
24258 op0 = safe_vector_operand (op0, mode0);
24259 if (VECTOR_MODE_P (mode1))
24260 op1 = safe_vector_operand (op1, mode1);
24262 /* Swap operands if we have a comparison that isn't available in
24264 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24271 target = gen_reg_rtx (SImode);
24272 emit_move_insn (target, const0_rtx);
24273 target = gen_rtx_SUBREG (QImode, target, 0);
24275 if ((optimize && !register_operand (op0, mode0))
24276 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24277 op0 = copy_to_mode_reg (mode0, op0);
24278 if ((optimize && !register_operand (op1, mode1))
24279 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24280 op1 = copy_to_mode_reg (mode1, op1);
24282 pat = GEN_FCN (d->icode) (op0, op1);
24286 emit_insn (gen_rtx_SET (VOIDmode,
24287 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24288 gen_rtx_fmt_ee (comparison, QImode,
24292 return SUBREG_REG (target);
24295 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24298 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24302 tree arg0 = CALL_EXPR_ARG (exp, 0);
24303 tree arg1 = CALL_EXPR_ARG (exp, 1);
24304 rtx op0 = expand_normal (arg0);
24305 rtx op1 = expand_normal (arg1);
24306 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24307 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24308 enum rtx_code comparison = d->comparison;
24310 if (VECTOR_MODE_P (mode0))
24311 op0 = safe_vector_operand (op0, mode0);
24312 if (VECTOR_MODE_P (mode1))
24313 op1 = safe_vector_operand (op1, mode1);
24315 target = gen_reg_rtx (SImode);
24316 emit_move_insn (target, const0_rtx);
24317 target = gen_rtx_SUBREG (QImode, target, 0);
24319 if ((optimize && !register_operand (op0, mode0))
24320 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24321 op0 = copy_to_mode_reg (mode0, op0);
24322 if ((optimize && !register_operand (op1, mode1))
24323 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24324 op1 = copy_to_mode_reg (mode1, op1);
24326 pat = GEN_FCN (d->icode) (op0, op1);
24330 emit_insn (gen_rtx_SET (VOIDmode,
24331 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24332 gen_rtx_fmt_ee (comparison, QImode,
24336 return SUBREG_REG (target);
24339 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24342 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24343 tree exp, rtx target)
24346 tree arg0 = CALL_EXPR_ARG (exp, 0);
24347 tree arg1 = CALL_EXPR_ARG (exp, 1);
24348 tree arg2 = CALL_EXPR_ARG (exp, 2);
24349 tree arg3 = CALL_EXPR_ARG (exp, 3);
24350 tree arg4 = CALL_EXPR_ARG (exp, 4);
24351 rtx scratch0, scratch1;
24352 rtx op0 = expand_normal (arg0);
24353 rtx op1 = expand_normal (arg1);
24354 rtx op2 = expand_normal (arg2);
24355 rtx op3 = expand_normal (arg3);
24356 rtx op4 = expand_normal (arg4);
24357 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24359 tmode0 = insn_data[d->icode].operand[0].mode;
24360 tmode1 = insn_data[d->icode].operand[1].mode;
24361 modev2 = insn_data[d->icode].operand[2].mode;
24362 modei3 = insn_data[d->icode].operand[3].mode;
24363 modev4 = insn_data[d->icode].operand[4].mode;
24364 modei5 = insn_data[d->icode].operand[5].mode;
24365 modeimm = insn_data[d->icode].operand[6].mode;
24367 if (VECTOR_MODE_P (modev2))
24368 op0 = safe_vector_operand (op0, modev2);
24369 if (VECTOR_MODE_P (modev4))
24370 op2 = safe_vector_operand (op2, modev4);
24372 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24373 op0 = copy_to_mode_reg (modev2, op0);
24374 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24375 op1 = copy_to_mode_reg (modei3, op1);
24376 if ((optimize && !register_operand (op2, modev4))
24377 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24378 op2 = copy_to_mode_reg (modev4, op2);
24379 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24380 op3 = copy_to_mode_reg (modei5, op3);
24382 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24384 error ("the fifth argument must be a 8-bit immediate");
24388 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24390 if (optimize || !target
24391 || GET_MODE (target) != tmode0
24392 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24393 target = gen_reg_rtx (tmode0);
24395 scratch1 = gen_reg_rtx (tmode1);
24397 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24399 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24401 if (optimize || !target
24402 || GET_MODE (target) != tmode1
24403 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24404 target = gen_reg_rtx (tmode1);
24406 scratch0 = gen_reg_rtx (tmode0);
24408 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24412 gcc_assert (d->flag);
24414 scratch0 = gen_reg_rtx (tmode0);
24415 scratch1 = gen_reg_rtx (tmode1);
24417 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24427 target = gen_reg_rtx (SImode);
24428 emit_move_insn (target, const0_rtx);
24429 target = gen_rtx_SUBREG (QImode, target, 0);
24432 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24433 gen_rtx_fmt_ee (EQ, QImode,
24434 gen_rtx_REG ((enum machine_mode) d->flag,
24437 return SUBREG_REG (target);
24444 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24447 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24448 tree exp, rtx target)
24451 tree arg0 = CALL_EXPR_ARG (exp, 0);
24452 tree arg1 = CALL_EXPR_ARG (exp, 1);
24453 tree arg2 = CALL_EXPR_ARG (exp, 2);
24454 rtx scratch0, scratch1;
24455 rtx op0 = expand_normal (arg0);
24456 rtx op1 = expand_normal (arg1);
24457 rtx op2 = expand_normal (arg2);
24458 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24460 tmode0 = insn_data[d->icode].operand[0].mode;
24461 tmode1 = insn_data[d->icode].operand[1].mode;
24462 modev2 = insn_data[d->icode].operand[2].mode;
24463 modev3 = insn_data[d->icode].operand[3].mode;
24464 modeimm = insn_data[d->icode].operand[4].mode;
24466 if (VECTOR_MODE_P (modev2))
24467 op0 = safe_vector_operand (op0, modev2);
24468 if (VECTOR_MODE_P (modev3))
24469 op1 = safe_vector_operand (op1, modev3);
24471 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24472 op0 = copy_to_mode_reg (modev2, op0);
24473 if ((optimize && !register_operand (op1, modev3))
24474 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24475 op1 = copy_to_mode_reg (modev3, op1);
24477 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24479 error ("the third argument must be a 8-bit immediate");
24483 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24485 if (optimize || !target
24486 || GET_MODE (target) != tmode0
24487 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24488 target = gen_reg_rtx (tmode0);
24490 scratch1 = gen_reg_rtx (tmode1);
24492 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24494 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24496 if (optimize || !target
24497 || GET_MODE (target) != tmode1
24498 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24499 target = gen_reg_rtx (tmode1);
24501 scratch0 = gen_reg_rtx (tmode0);
24503 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24507 gcc_assert (d->flag);
24509 scratch0 = gen_reg_rtx (tmode0);
24510 scratch1 = gen_reg_rtx (tmode1);
24512 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24522 target = gen_reg_rtx (SImode);
24523 emit_move_insn (target, const0_rtx);
24524 target = gen_rtx_SUBREG (QImode, target, 0);
24527 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24528 gen_rtx_fmt_ee (EQ, QImode,
24529 gen_rtx_REG ((enum machine_mode) d->flag,
24532 return SUBREG_REG (target);
24538 /* Subroutine of ix86_expand_builtin to take care of insns with
24539 variable number of operands. */
24542 ix86_expand_args_builtin (const struct builtin_description *d,
24543 tree exp, rtx target)
24545 rtx pat, real_target;
24546 unsigned int i, nargs;
24547 unsigned int nargs_constant = 0;
24548 int num_memory = 0;
24552 enum machine_mode mode;
24554 bool last_arg_count = false;
24555 enum insn_code icode = d->icode;
24556 const struct insn_data *insn_p = &insn_data[icode];
24557 enum machine_mode tmode = insn_p->operand[0].mode;
24558 enum machine_mode rmode = VOIDmode;
24560 enum rtx_code comparison = d->comparison;
24562 switch ((enum ix86_builtin_type) d->flag)
24564 case INT_FTYPE_V8SF_V8SF_PTEST:
24565 case INT_FTYPE_V4DI_V4DI_PTEST:
24566 case INT_FTYPE_V4DF_V4DF_PTEST:
24567 case INT_FTYPE_V4SF_V4SF_PTEST:
24568 case INT_FTYPE_V2DI_V2DI_PTEST:
24569 case INT_FTYPE_V2DF_V2DF_PTEST:
24570 return ix86_expand_sse_ptest (d, exp, target);
24571 case FLOAT128_FTYPE_FLOAT128:
24572 case FLOAT_FTYPE_FLOAT:
24573 case INT64_FTYPE_V4SF:
24574 case INT64_FTYPE_V2DF:
24575 case INT_FTYPE_V16QI:
24576 case INT_FTYPE_V8QI:
24577 case INT_FTYPE_V8SF:
24578 case INT_FTYPE_V4DF:
24579 case INT_FTYPE_V4SF:
24580 case INT_FTYPE_V2DF:
24581 case V16QI_FTYPE_V16QI:
24582 case V8SI_FTYPE_V8SF:
24583 case V8SI_FTYPE_V4SI:
24584 case V8HI_FTYPE_V8HI:
24585 case V8HI_FTYPE_V16QI:
24586 case V8QI_FTYPE_V8QI:
24587 case V8SF_FTYPE_V8SF:
24588 case V8SF_FTYPE_V8SI:
24589 case V8SF_FTYPE_V4SF:
24590 case V4SI_FTYPE_V4SI:
24591 case V4SI_FTYPE_V16QI:
24592 case V4SI_FTYPE_V4SF:
24593 case V4SI_FTYPE_V8SI:
24594 case V4SI_FTYPE_V8HI:
24595 case V4SI_FTYPE_V4DF:
24596 case V4SI_FTYPE_V2DF:
24597 case V4HI_FTYPE_V4HI:
24598 case V4DF_FTYPE_V4DF:
24599 case V4DF_FTYPE_V4SI:
24600 case V4DF_FTYPE_V4SF:
24601 case V4DF_FTYPE_V2DF:
24602 case V4SF_FTYPE_V4SF:
24603 case V4SF_FTYPE_V4SI:
24604 case V4SF_FTYPE_V8SF:
24605 case V4SF_FTYPE_V4DF:
24606 case V4SF_FTYPE_V2DF:
24607 case V2DI_FTYPE_V2DI:
24608 case V2DI_FTYPE_V16QI:
24609 case V2DI_FTYPE_V8HI:
24610 case V2DI_FTYPE_V4SI:
24611 case V2DF_FTYPE_V2DF:
24612 case V2DF_FTYPE_V4SI:
24613 case V2DF_FTYPE_V4DF:
24614 case V2DF_FTYPE_V4SF:
24615 case V2DF_FTYPE_V2SI:
24616 case V2SI_FTYPE_V2SI:
24617 case V2SI_FTYPE_V4SF:
24618 case V2SI_FTYPE_V2SF:
24619 case V2SI_FTYPE_V2DF:
24620 case V2SF_FTYPE_V2SF:
24621 case V2SF_FTYPE_V2SI:
24624 case V4SF_FTYPE_V4SF_VEC_MERGE:
24625 case V2DF_FTYPE_V2DF_VEC_MERGE:
24626 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24627 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24628 case V16QI_FTYPE_V16QI_V16QI:
24629 case V16QI_FTYPE_V8HI_V8HI:
24630 case V8QI_FTYPE_V8QI_V8QI:
24631 case V8QI_FTYPE_V4HI_V4HI:
24632 case V8HI_FTYPE_V8HI_V8HI:
24633 case V8HI_FTYPE_V16QI_V16QI:
24634 case V8HI_FTYPE_V4SI_V4SI:
24635 case V8SF_FTYPE_V8SF_V8SF:
24636 case V8SF_FTYPE_V8SF_V8SI:
24637 case V4SI_FTYPE_V4SI_V4SI:
24638 case V4SI_FTYPE_V8HI_V8HI:
24639 case V4SI_FTYPE_V4SF_V4SF:
24640 case V4SI_FTYPE_V2DF_V2DF:
24641 case V4HI_FTYPE_V4HI_V4HI:
24642 case V4HI_FTYPE_V8QI_V8QI:
24643 case V4HI_FTYPE_V2SI_V2SI:
24644 case V4DF_FTYPE_V4DF_V4DF:
24645 case V4DF_FTYPE_V4DF_V4DI:
24646 case V4SF_FTYPE_V4SF_V4SF:
24647 case V4SF_FTYPE_V4SF_V4SI:
24648 case V4SF_FTYPE_V4SF_V2SI:
24649 case V4SF_FTYPE_V4SF_V2DF:
24650 case V4SF_FTYPE_V4SF_DI:
24651 case V4SF_FTYPE_V4SF_SI:
24652 case V2DI_FTYPE_V2DI_V2DI:
24653 case V2DI_FTYPE_V16QI_V16QI:
24654 case V2DI_FTYPE_V4SI_V4SI:
24655 case V2DI_FTYPE_V2DI_V16QI:
24656 case V2DI_FTYPE_V2DF_V2DF:
24657 case V2SI_FTYPE_V2SI_V2SI:
24658 case V2SI_FTYPE_V4HI_V4HI:
24659 case V2SI_FTYPE_V2SF_V2SF:
24660 case V2DF_FTYPE_V2DF_V2DF:
24661 case V2DF_FTYPE_V2DF_V4SF:
24662 case V2DF_FTYPE_V2DF_V2DI:
24663 case V2DF_FTYPE_V2DF_DI:
24664 case V2DF_FTYPE_V2DF_SI:
24665 case V2SF_FTYPE_V2SF_V2SF:
24666 case V1DI_FTYPE_V1DI_V1DI:
24667 case V1DI_FTYPE_V8QI_V8QI:
24668 case V1DI_FTYPE_V2SI_V2SI:
24669 if (comparison == UNKNOWN)
24670 return ix86_expand_binop_builtin (icode, exp, target);
24673 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24674 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24675 gcc_assert (comparison != UNKNOWN);
24679 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24680 case V8HI_FTYPE_V8HI_SI_COUNT:
24681 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24682 case V4SI_FTYPE_V4SI_SI_COUNT:
24683 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24684 case V4HI_FTYPE_V4HI_SI_COUNT:
24685 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24686 case V2DI_FTYPE_V2DI_SI_COUNT:
24687 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24688 case V2SI_FTYPE_V2SI_SI_COUNT:
24689 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24690 case V1DI_FTYPE_V1DI_SI_COUNT:
24692 last_arg_count = true;
24694 case UINT64_FTYPE_UINT64_UINT64:
24695 case UINT_FTYPE_UINT_UINT:
24696 case UINT_FTYPE_UINT_USHORT:
24697 case UINT_FTYPE_UINT_UCHAR:
24700 case V2DI2TI_FTYPE_V2DI_INT:
24703 nargs_constant = 1;
24705 case V8HI_FTYPE_V8HI_INT:
24706 case V8SF_FTYPE_V8SF_INT:
24707 case V4SI_FTYPE_V4SI_INT:
24708 case V4SI_FTYPE_V8SI_INT:
24709 case V4HI_FTYPE_V4HI_INT:
24710 case V4DF_FTYPE_V4DF_INT:
24711 case V4SF_FTYPE_V4SF_INT:
24712 case V4SF_FTYPE_V8SF_INT:
24713 case V2DI_FTYPE_V2DI_INT:
24714 case V2DF_FTYPE_V2DF_INT:
24715 case V2DF_FTYPE_V4DF_INT:
24717 nargs_constant = 1;
24719 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24720 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24721 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24722 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24723 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24726 case V16QI_FTYPE_V16QI_V16QI_INT:
24727 case V8HI_FTYPE_V8HI_V8HI_INT:
24728 case V8SI_FTYPE_V8SI_V8SI_INT:
24729 case V8SI_FTYPE_V8SI_V4SI_INT:
24730 case V8SF_FTYPE_V8SF_V8SF_INT:
24731 case V8SF_FTYPE_V8SF_V4SF_INT:
24732 case V4SI_FTYPE_V4SI_V4SI_INT:
24733 case V4DF_FTYPE_V4DF_V4DF_INT:
24734 case V4DF_FTYPE_V4DF_V2DF_INT:
24735 case V4SF_FTYPE_V4SF_V4SF_INT:
24736 case V2DI_FTYPE_V2DI_V2DI_INT:
24737 case V2DF_FTYPE_V2DF_V2DF_INT:
24739 nargs_constant = 1;
24741 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24744 nargs_constant = 1;
24746 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24749 nargs_constant = 1;
24751 case V2DI_FTYPE_V2DI_UINT_UINT:
24753 nargs_constant = 2;
24755 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24757 nargs_constant = 2;
24760 gcc_unreachable ();
24763 gcc_assert (nargs <= ARRAY_SIZE (args));
24765 if (comparison != UNKNOWN)
24767 gcc_assert (nargs == 2);
24768 return ix86_expand_sse_compare (d, exp, target, swap);
24771 if (rmode == VOIDmode || rmode == tmode)
24775 || GET_MODE (target) != tmode
24776 || ! (*insn_p->operand[0].predicate) (target, tmode))
24777 target = gen_reg_rtx (tmode);
24778 real_target = target;
24782 target = gen_reg_rtx (rmode);
24783 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24786 for (i = 0; i < nargs; i++)
24788 tree arg = CALL_EXPR_ARG (exp, i);
24789 rtx op = expand_normal (arg);
24790 enum machine_mode mode = insn_p->operand[i + 1].mode;
24791 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24793 if (last_arg_count && (i + 1) == nargs)
24795 /* SIMD shift insns take either an 8-bit immediate or
24796 register as count. But builtin functions take int as
24797 count. If count doesn't match, we put it in register. */
24800 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24801 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24802 op = copy_to_reg (op);
24805 else if ((nargs - i) <= nargs_constant)
24810 case CODE_FOR_sse4_1_roundpd:
24811 case CODE_FOR_sse4_1_roundps:
24812 case CODE_FOR_sse4_1_roundsd:
24813 case CODE_FOR_sse4_1_roundss:
24814 case CODE_FOR_sse4_1_blendps:
24815 case CODE_FOR_avx_blendpd256:
24816 case CODE_FOR_avx_vpermilv4df:
24817 case CODE_FOR_avx_roundpd256:
24818 case CODE_FOR_avx_roundps256:
24819 error ("the last argument must be a 4-bit immediate");
24822 case CODE_FOR_sse4_1_blendpd:
24823 case CODE_FOR_avx_vpermilv2df:
24824 error ("the last argument must be a 2-bit immediate");
24827 case CODE_FOR_avx_vextractf128v4df:
24828 case CODE_FOR_avx_vextractf128v8sf:
24829 case CODE_FOR_avx_vextractf128v8si:
24830 case CODE_FOR_avx_vinsertf128v4df:
24831 case CODE_FOR_avx_vinsertf128v8sf:
24832 case CODE_FOR_avx_vinsertf128v8si:
24833 error ("the last argument must be a 1-bit immediate");
24836 case CODE_FOR_avx_cmpsdv2df3:
24837 case CODE_FOR_avx_cmpssv4sf3:
24838 case CODE_FOR_avx_cmppdv2df3:
24839 case CODE_FOR_avx_cmppsv4sf3:
24840 case CODE_FOR_avx_cmppdv4df3:
24841 case CODE_FOR_avx_cmppsv8sf3:
24842 error ("the last argument must be a 5-bit immediate");
24846 switch (nargs_constant)
24849 if ((nargs - i) == nargs_constant)
24851 error ("the next to last argument must be an 8-bit immediate");
24855 error ("the last argument must be an 8-bit immediate");
24858 gcc_unreachable ();
24865 if (VECTOR_MODE_P (mode))
24866 op = safe_vector_operand (op, mode);
24868 /* If we aren't optimizing, only allow one memory operand to
24870 if (memory_operand (op, mode))
24873 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24875 if (optimize || !match || num_memory > 1)
24876 op = copy_to_mode_reg (mode, op);
24880 op = copy_to_reg (op);
24881 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24886 args[i].mode = mode;
24892 pat = GEN_FCN (icode) (real_target, args[0].op);
24895 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24898 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24902 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24903 args[2].op, args[3].op);
24906 gcc_unreachable ();
24916 /* Subroutine of ix86_expand_builtin to take care of special insns
24917 with variable number of operands. */
24920 ix86_expand_special_args_builtin (const struct builtin_description *d,
24921 tree exp, rtx target)
24925 unsigned int i, nargs, arg_adjust, memory;
24929 enum machine_mode mode;
24931 enum insn_code icode = d->icode;
24932 bool last_arg_constant = false;
24933 const struct insn_data *insn_p = &insn_data[icode];
24934 enum machine_mode tmode = insn_p->operand[0].mode;
24935 enum { load, store } klass;
24937 switch ((enum ix86_special_builtin_type) d->flag)
24939 case VOID_FTYPE_VOID:
24940 emit_insn (GEN_FCN (icode) (target));
24942 case V2DI_FTYPE_PV2DI:
24943 case V32QI_FTYPE_PCCHAR:
24944 case V16QI_FTYPE_PCCHAR:
24945 case V8SF_FTYPE_PCV4SF:
24946 case V8SF_FTYPE_PCFLOAT:
24947 case V4SF_FTYPE_PCFLOAT:
24948 case V4DF_FTYPE_PCV2DF:
24949 case V4DF_FTYPE_PCDOUBLE:
24950 case V2DF_FTYPE_PCDOUBLE:
24955 case VOID_FTYPE_PV2SF_V4SF:
24956 case VOID_FTYPE_PV4DI_V4DI:
24957 case VOID_FTYPE_PV2DI_V2DI:
24958 case VOID_FTYPE_PCHAR_V32QI:
24959 case VOID_FTYPE_PCHAR_V16QI:
24960 case VOID_FTYPE_PFLOAT_V8SF:
24961 case VOID_FTYPE_PFLOAT_V4SF:
24962 case VOID_FTYPE_PDOUBLE_V4DF:
24963 case VOID_FTYPE_PDOUBLE_V2DF:
24964 case VOID_FTYPE_PDI_DI:
24965 case VOID_FTYPE_PINT_INT:
24968 /* Reserve memory operand for target. */
24969 memory = ARRAY_SIZE (args);
24971 case V4SF_FTYPE_V4SF_PCV2SF:
24972 case V2DF_FTYPE_V2DF_PCDOUBLE:
24977 case V8SF_FTYPE_PCV8SF_V8SF:
24978 case V4DF_FTYPE_PCV4DF_V4DF:
24979 case V4SF_FTYPE_PCV4SF_V4SF:
24980 case V2DF_FTYPE_PCV2DF_V2DF:
24985 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24986 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24987 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24988 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24991 /* Reserve memory operand for target. */
24992 memory = ARRAY_SIZE (args);
24995 gcc_unreachable ();
24998 gcc_assert (nargs <= ARRAY_SIZE (args));
25000 if (klass == store)
25002 arg = CALL_EXPR_ARG (exp, 0);
25003 op = expand_normal (arg);
25004 gcc_assert (target == 0);
25005 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25013 || GET_MODE (target) != tmode
25014 || ! (*insn_p->operand[0].predicate) (target, tmode))
25015 target = gen_reg_rtx (tmode);
25018 for (i = 0; i < nargs; i++)
25020 enum machine_mode mode = insn_p->operand[i + 1].mode;
25023 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25024 op = expand_normal (arg);
25025 match = (*insn_p->operand[i + 1].predicate) (op, mode);
25027 if (last_arg_constant && (i + 1) == nargs)
25033 error ("the last argument must be an 8-bit immediate");
25041 /* This must be the memory operand. */
25042 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25043 gcc_assert (GET_MODE (op) == mode
25044 || GET_MODE (op) == VOIDmode);
25048 /* This must be register. */
25049 if (VECTOR_MODE_P (mode))
25050 op = safe_vector_operand (op, mode);
25052 gcc_assert (GET_MODE (op) == mode
25053 || GET_MODE (op) == VOIDmode);
25054 op = copy_to_mode_reg (mode, op);
25059 args[i].mode = mode;
25065 pat = GEN_FCN (icode) (target, args[0].op);
25068 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25071 gcc_unreachable ();
25077 return klass == store ? 0 : target;
25080 /* Return the integer constant in ARG. Constrain it to be in the range
25081 of the subparts of VEC_TYPE; issue an error if not. */
25084 get_element_number (tree vec_type, tree arg)
25086 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25088 if (!host_integerp (arg, 1)
25089 || (elt = tree_low_cst (arg, 1), elt > max))
25091 error ("selector must be an integer constant in the range 0..%wi", max);
25098 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25099 ix86_expand_vector_init. We DO have language-level syntax for this, in
25100 the form of (type){ init-list }. Except that since we can't place emms
25101 instructions from inside the compiler, we can't allow the use of MMX
25102 registers unless the user explicitly asks for it. So we do *not* define
25103 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25104 we have builtins invoked by mmintrin.h that gives us license to emit
25105 these sorts of instructions. */
25108 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25110 enum machine_mode tmode = TYPE_MODE (type);
25111 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25112 int i, n_elt = GET_MODE_NUNITS (tmode);
25113 rtvec v = rtvec_alloc (n_elt);
25115 gcc_assert (VECTOR_MODE_P (tmode));
25116 gcc_assert (call_expr_nargs (exp) == n_elt);
25118 for (i = 0; i < n_elt; ++i)
25120 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25121 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25124 if (!target || !register_operand (target, tmode))
25125 target = gen_reg_rtx (tmode);
25127 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25131 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25132 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25133 had a language-level syntax for referencing vector elements. */
25136 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25138 enum machine_mode tmode, mode0;
25143 arg0 = CALL_EXPR_ARG (exp, 0);
25144 arg1 = CALL_EXPR_ARG (exp, 1);
25146 op0 = expand_normal (arg0);
25147 elt = get_element_number (TREE_TYPE (arg0), arg1);
25149 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25150 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25151 gcc_assert (VECTOR_MODE_P (mode0));
25153 op0 = force_reg (mode0, op0);
25155 if (optimize || !target || !register_operand (target, tmode))
25156 target = gen_reg_rtx (tmode);
25158 ix86_expand_vector_extract (true, target, op0, elt);
25163 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25164 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25165 a language-level syntax for referencing vector elements. */
25168 ix86_expand_vec_set_builtin (tree exp)
25170 enum machine_mode tmode, mode1;
25171 tree arg0, arg1, arg2;
25173 rtx op0, op1, target;
25175 arg0 = CALL_EXPR_ARG (exp, 0);
25176 arg1 = CALL_EXPR_ARG (exp, 1);
25177 arg2 = CALL_EXPR_ARG (exp, 2);
25179 tmode = TYPE_MODE (TREE_TYPE (arg0));
25180 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25181 gcc_assert (VECTOR_MODE_P (tmode));
25183 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25184 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25185 elt = get_element_number (TREE_TYPE (arg0), arg2);
25187 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25188 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25190 op0 = force_reg (tmode, op0);
25191 op1 = force_reg (mode1, op1);
25193 /* OP0 is the source of these builtin functions and shouldn't be
25194 modified. Create a copy, use it and return it as target. */
25195 target = gen_reg_rtx (tmode);
25196 emit_move_insn (target, op0);
25197 ix86_expand_vector_set (true, target, op1, elt);
25202 /* Expand an expression EXP that calls a built-in function,
25203 with result going to TARGET if that's convenient
25204 (and in mode MODE if that's convenient).
25205 SUBTARGET may be used as the target for computing one of EXP's operands.
25206 IGNORE is nonzero if the value is to be ignored. */
25209 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25210 enum machine_mode mode ATTRIBUTE_UNUSED,
25211 int ignore ATTRIBUTE_UNUSED)
25213 const struct builtin_description *d;
25215 enum insn_code icode;
25216 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25217 tree arg0, arg1, arg2;
25218 rtx op0, op1, op2, pat;
25219 enum machine_mode mode0, mode1, mode2;
25220 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25222 /* Determine whether the builtin function is available under the current ISA.
25223 Originally the builtin was not created if it wasn't applicable to the
25224 current ISA based on the command line switches. With function specific
25225 options, we need to check in the context of the function making the call
25226 whether it is supported. */
25227 if (ix86_builtins_isa[fcode].isa
25228 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25230 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25231 NULL, NULL, false);
25234 error ("%qE needs unknown isa option", fndecl);
25237 gcc_assert (opts != NULL);
25238 error ("%qE needs isa option %s", fndecl, opts);
25246 case IX86_BUILTIN_MASKMOVQ:
25247 case IX86_BUILTIN_MASKMOVDQU:
25248 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25249 ? CODE_FOR_mmx_maskmovq
25250 : CODE_FOR_sse2_maskmovdqu);
25251 /* Note the arg order is different from the operand order. */
25252 arg1 = CALL_EXPR_ARG (exp, 0);
25253 arg2 = CALL_EXPR_ARG (exp, 1);
25254 arg0 = CALL_EXPR_ARG (exp, 2);
25255 op0 = expand_normal (arg0);
25256 op1 = expand_normal (arg1);
25257 op2 = expand_normal (arg2);
25258 mode0 = insn_data[icode].operand[0].mode;
25259 mode1 = insn_data[icode].operand[1].mode;
25260 mode2 = insn_data[icode].operand[2].mode;
25262 op0 = force_reg (Pmode, op0);
25263 op0 = gen_rtx_MEM (mode1, op0);
25265 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25266 op0 = copy_to_mode_reg (mode0, op0);
25267 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25268 op1 = copy_to_mode_reg (mode1, op1);
25269 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25270 op2 = copy_to_mode_reg (mode2, op2);
25271 pat = GEN_FCN (icode) (op0, op1, op2);
25277 case IX86_BUILTIN_LDMXCSR:
25278 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25279 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25280 emit_move_insn (target, op0);
25281 emit_insn (gen_sse_ldmxcsr (target));
25284 case IX86_BUILTIN_STMXCSR:
25285 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25286 emit_insn (gen_sse_stmxcsr (target));
25287 return copy_to_mode_reg (SImode, target);
25289 case IX86_BUILTIN_CLFLUSH:
25290 arg0 = CALL_EXPR_ARG (exp, 0);
25291 op0 = expand_normal (arg0);
25292 icode = CODE_FOR_sse2_clflush;
25293 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25294 op0 = copy_to_mode_reg (Pmode, op0);
25296 emit_insn (gen_sse2_clflush (op0));
25299 case IX86_BUILTIN_MONITOR:
25300 arg0 = CALL_EXPR_ARG (exp, 0);
25301 arg1 = CALL_EXPR_ARG (exp, 1);
25302 arg2 = CALL_EXPR_ARG (exp, 2);
25303 op0 = expand_normal (arg0);
25304 op1 = expand_normal (arg1);
25305 op2 = expand_normal (arg2);
25307 op0 = copy_to_mode_reg (Pmode, op0);
25309 op1 = copy_to_mode_reg (SImode, op1);
25311 op2 = copy_to_mode_reg (SImode, op2);
25312 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25315 case IX86_BUILTIN_MWAIT:
25316 arg0 = CALL_EXPR_ARG (exp, 0);
25317 arg1 = CALL_EXPR_ARG (exp, 1);
25318 op0 = expand_normal (arg0);
25319 op1 = expand_normal (arg1);
25321 op0 = copy_to_mode_reg (SImode, op0);
25323 op1 = copy_to_mode_reg (SImode, op1);
25324 emit_insn (gen_sse3_mwait (op0, op1));
25327 case IX86_BUILTIN_VEC_INIT_V2SI:
25328 case IX86_BUILTIN_VEC_INIT_V4HI:
25329 case IX86_BUILTIN_VEC_INIT_V8QI:
25330 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25332 case IX86_BUILTIN_VEC_EXT_V2DF:
25333 case IX86_BUILTIN_VEC_EXT_V2DI:
25334 case IX86_BUILTIN_VEC_EXT_V4SF:
25335 case IX86_BUILTIN_VEC_EXT_V4SI:
25336 case IX86_BUILTIN_VEC_EXT_V8HI:
25337 case IX86_BUILTIN_VEC_EXT_V2SI:
25338 case IX86_BUILTIN_VEC_EXT_V4HI:
25339 case IX86_BUILTIN_VEC_EXT_V16QI:
25340 return ix86_expand_vec_ext_builtin (exp, target);
25342 case IX86_BUILTIN_VEC_SET_V2DI:
25343 case IX86_BUILTIN_VEC_SET_V4SF:
25344 case IX86_BUILTIN_VEC_SET_V4SI:
25345 case IX86_BUILTIN_VEC_SET_V8HI:
25346 case IX86_BUILTIN_VEC_SET_V4HI:
25347 case IX86_BUILTIN_VEC_SET_V16QI:
25348 return ix86_expand_vec_set_builtin (exp);
25350 case IX86_BUILTIN_INFQ:
25351 case IX86_BUILTIN_HUGE_VALQ:
25353 REAL_VALUE_TYPE inf;
25357 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25359 tmp = validize_mem (force_const_mem (mode, tmp));
25362 target = gen_reg_rtx (mode);
25364 emit_move_insn (target, tmp);
25372 for (i = 0, d = bdesc_special_args;
25373 i < ARRAY_SIZE (bdesc_special_args);
25375 if (d->code == fcode)
25376 return ix86_expand_special_args_builtin (d, exp, target);
25378 for (i = 0, d = bdesc_args;
25379 i < ARRAY_SIZE (bdesc_args);
25381 if (d->code == fcode)
25384 case IX86_BUILTIN_FABSQ:
25385 case IX86_BUILTIN_COPYSIGNQ:
25387 /* Emit a normal call if SSE2 isn't available. */
25388 return expand_call (exp, target, ignore);
25390 return ix86_expand_args_builtin (d, exp, target);
25393 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25394 if (d->code == fcode)
25395 return ix86_expand_sse_comi (d, exp, target);
25397 for (i = 0, d = bdesc_pcmpestr;
25398 i < ARRAY_SIZE (bdesc_pcmpestr);
25400 if (d->code == fcode)
25401 return ix86_expand_sse_pcmpestr (d, exp, target);
25403 for (i = 0, d = bdesc_pcmpistr;
25404 i < ARRAY_SIZE (bdesc_pcmpistr);
25406 if (d->code == fcode)
25407 return ix86_expand_sse_pcmpistr (d, exp, target);
25409 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25410 if (d->code == fcode)
25411 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25412 (enum multi_arg_type)d->flag,
25415 gcc_unreachable ();
25418 /* Returns a function decl for a vectorized version of the builtin function
25419 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25420 if it is not available. */
25423 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25426 enum machine_mode in_mode, out_mode;
25429 if (TREE_CODE (type_out) != VECTOR_TYPE
25430 || TREE_CODE (type_in) != VECTOR_TYPE)
25433 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25434 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25435 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25436 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25440 case BUILT_IN_SQRT:
25441 if (out_mode == DFmode && out_n == 2
25442 && in_mode == DFmode && in_n == 2)
25443 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25446 case BUILT_IN_SQRTF:
25447 if (out_mode == SFmode && out_n == 4
25448 && in_mode == SFmode && in_n == 4)
25449 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25452 case BUILT_IN_LRINT:
25453 if (out_mode == SImode && out_n == 4
25454 && in_mode == DFmode && in_n == 2)
25455 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25458 case BUILT_IN_LRINTF:
25459 if (out_mode == SImode && out_n == 4
25460 && in_mode == SFmode && in_n == 4)
25461 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25468 /* Dispatch to a handler for a vectorization library. */
25469 if (ix86_veclib_handler)
25470 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25476 /* Handler for an SVML-style interface to
25477 a library with vectorized intrinsics. */
25480 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25483 tree fntype, new_fndecl, args;
25486 enum machine_mode el_mode, in_mode;
25489 /* The SVML is suitable for unsafe math only. */
25490 if (!flag_unsafe_math_optimizations)
25493 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25494 n = TYPE_VECTOR_SUBPARTS (type_out);
25495 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25496 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25497 if (el_mode != in_mode
25505 case BUILT_IN_LOG10:
25507 case BUILT_IN_TANH:
25509 case BUILT_IN_ATAN:
25510 case BUILT_IN_ATAN2:
25511 case BUILT_IN_ATANH:
25512 case BUILT_IN_CBRT:
25513 case BUILT_IN_SINH:
25515 case BUILT_IN_ASINH:
25516 case BUILT_IN_ASIN:
25517 case BUILT_IN_COSH:
25519 case BUILT_IN_ACOSH:
25520 case BUILT_IN_ACOS:
25521 if (el_mode != DFmode || n != 2)
25525 case BUILT_IN_EXPF:
25526 case BUILT_IN_LOGF:
25527 case BUILT_IN_LOG10F:
25528 case BUILT_IN_POWF:
25529 case BUILT_IN_TANHF:
25530 case BUILT_IN_TANF:
25531 case BUILT_IN_ATANF:
25532 case BUILT_IN_ATAN2F:
25533 case BUILT_IN_ATANHF:
25534 case BUILT_IN_CBRTF:
25535 case BUILT_IN_SINHF:
25536 case BUILT_IN_SINF:
25537 case BUILT_IN_ASINHF:
25538 case BUILT_IN_ASINF:
25539 case BUILT_IN_COSHF:
25540 case BUILT_IN_COSF:
25541 case BUILT_IN_ACOSHF:
25542 case BUILT_IN_ACOSF:
25543 if (el_mode != SFmode || n != 4)
25551 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25553 if (fn == BUILT_IN_LOGF)
25554 strcpy (name, "vmlsLn4");
25555 else if (fn == BUILT_IN_LOG)
25556 strcpy (name, "vmldLn2");
25559 sprintf (name, "vmls%s", bname+10);
25560 name[strlen (name)-1] = '4';
25563 sprintf (name, "vmld%s2", bname+10);
25565 /* Convert to uppercase. */
25569 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25570 args = TREE_CHAIN (args))
25574 fntype = build_function_type_list (type_out, type_in, NULL);
25576 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25578 /* Build a function declaration for the vectorized function. */
25579 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25580 TREE_PUBLIC (new_fndecl) = 1;
25581 DECL_EXTERNAL (new_fndecl) = 1;
25582 DECL_IS_NOVOPS (new_fndecl) = 1;
25583 TREE_READONLY (new_fndecl) = 1;
25588 /* Handler for an ACML-style interface to
25589 a library with vectorized intrinsics. */
25592 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25594 char name[20] = "__vr.._";
25595 tree fntype, new_fndecl, args;
25598 enum machine_mode el_mode, in_mode;
25601 /* The ACML is 64bits only and suitable for unsafe math only as
25602 it does not correctly support parts of IEEE with the required
25603 precision such as denormals. */
25605 || !flag_unsafe_math_optimizations)
25608 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25609 n = TYPE_VECTOR_SUBPARTS (type_out);
25610 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25611 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25612 if (el_mode != in_mode
25622 case BUILT_IN_LOG2:
25623 case BUILT_IN_LOG10:
25626 if (el_mode != DFmode
25631 case BUILT_IN_SINF:
25632 case BUILT_IN_COSF:
25633 case BUILT_IN_EXPF:
25634 case BUILT_IN_POWF:
25635 case BUILT_IN_LOGF:
25636 case BUILT_IN_LOG2F:
25637 case BUILT_IN_LOG10F:
25640 if (el_mode != SFmode
25649 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25650 sprintf (name + 7, "%s", bname+10);
25653 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25654 args = TREE_CHAIN (args))
25658 fntype = build_function_type_list (type_out, type_in, NULL);
25660 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25662 /* Build a function declaration for the vectorized function. */
25663 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25664 TREE_PUBLIC (new_fndecl) = 1;
25665 DECL_EXTERNAL (new_fndecl) = 1;
25666 DECL_IS_NOVOPS (new_fndecl) = 1;
25667 TREE_READONLY (new_fndecl) = 1;
25673 /* Returns a decl of a function that implements conversion of an integer vector
25674 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25675 side of the conversion.
25676 Return NULL_TREE if it is not available. */
25679 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25681 if (TREE_CODE (type) != VECTOR_TYPE
25682 /* There are only conversions from/to signed integers. */
25683 || TYPE_UNSIGNED (TREE_TYPE (type)))
25689 switch (TYPE_MODE (type))
25692 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25697 case FIX_TRUNC_EXPR:
25698 switch (TYPE_MODE (type))
25701 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25711 /* Returns a code for a target-specific builtin that implements
25712 reciprocal of the function, or NULL_TREE if not available. */
25715 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25716 bool sqrt ATTRIBUTE_UNUSED)
25718 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25719 && flag_finite_math_only && !flag_trapping_math
25720 && flag_unsafe_math_optimizations))
25724 /* Machine dependent builtins. */
25727 /* Vectorized version of sqrt to rsqrt conversion. */
25728 case IX86_BUILTIN_SQRTPS_NR:
25729 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25735 /* Normal builtins. */
25738 /* Sqrt to rsqrt conversion. */
25739 case BUILT_IN_SQRTF:
25740 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25747 /* Store OPERAND to the memory after reload is completed. This means
25748 that we can't easily use assign_stack_local. */
25750 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25754 gcc_assert (reload_completed);
25755 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25757 result = gen_rtx_MEM (mode,
25758 gen_rtx_PLUS (Pmode,
25760 GEN_INT (-RED_ZONE_SIZE)));
25761 emit_move_insn (result, operand);
25763 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25769 operand = gen_lowpart (DImode, operand);
25773 gen_rtx_SET (VOIDmode,
25774 gen_rtx_MEM (DImode,
25775 gen_rtx_PRE_DEC (DImode,
25776 stack_pointer_rtx)),
25780 gcc_unreachable ();
25782 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25791 split_di (&operand, 1, operands, operands + 1);
25793 gen_rtx_SET (VOIDmode,
25794 gen_rtx_MEM (SImode,
25795 gen_rtx_PRE_DEC (Pmode,
25796 stack_pointer_rtx)),
25799 gen_rtx_SET (VOIDmode,
25800 gen_rtx_MEM (SImode,
25801 gen_rtx_PRE_DEC (Pmode,
25802 stack_pointer_rtx)),
25807 /* Store HImodes as SImodes. */
25808 operand = gen_lowpart (SImode, operand);
25812 gen_rtx_SET (VOIDmode,
25813 gen_rtx_MEM (GET_MODE (operand),
25814 gen_rtx_PRE_DEC (SImode,
25815 stack_pointer_rtx)),
25819 gcc_unreachable ();
25821 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25826 /* Free operand from the memory. */
25828 ix86_free_from_memory (enum machine_mode mode)
25830 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25834 if (mode == DImode || TARGET_64BIT)
25838 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25839 to pop or add instruction if registers are available. */
25840 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25841 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25846 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25847 QImode must go into class Q_REGS.
25848 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25849 movdf to do mem-to-mem moves through integer regs. */
25851 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25853 enum machine_mode mode = GET_MODE (x);
25855 /* We're only allowed to return a subclass of CLASS. Many of the
25856 following checks fail for NO_REGS, so eliminate that early. */
25857 if (regclass == NO_REGS)
25860 /* All classes can load zeros. */
25861 if (x == CONST0_RTX (mode))
25864 /* Force constants into memory if we are loading a (nonzero) constant into
25865 an MMX or SSE register. This is because there are no MMX/SSE instructions
25866 to load from a constant. */
25868 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25871 /* Prefer SSE regs only, if we can use them for math. */
25872 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25873 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25875 /* Floating-point constants need more complex checks. */
25876 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25878 /* General regs can load everything. */
25879 if (reg_class_subset_p (regclass, GENERAL_REGS))
25882 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25883 zero above. We only want to wind up preferring 80387 registers if
25884 we plan on doing computation with them. */
25886 && standard_80387_constant_p (x))
25888 /* Limit class to non-sse. */
25889 if (regclass == FLOAT_SSE_REGS)
25891 if (regclass == FP_TOP_SSE_REGS)
25893 if (regclass == FP_SECOND_SSE_REGS)
25894 return FP_SECOND_REG;
25895 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25902 /* Generally when we see PLUS here, it's the function invariant
25903 (plus soft-fp const_int). Which can only be computed into general
25905 if (GET_CODE (x) == PLUS)
25906 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25908 /* QImode constants are easy to load, but non-constant QImode data
25909 must go into Q_REGS. */
25910 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25912 if (reg_class_subset_p (regclass, Q_REGS))
25914 if (reg_class_subset_p (Q_REGS, regclass))
25922 /* Discourage putting floating-point values in SSE registers unless
25923 SSE math is being used, and likewise for the 387 registers. */
25925 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25927 enum machine_mode mode = GET_MODE (x);
25929 /* Restrict the output reload class to the register bank that we are doing
25930 math on. If we would like not to return a subset of CLASS, reject this
25931 alternative: if reload cannot do this, it will still use its choice. */
25932 mode = GET_MODE (x);
25933 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25934 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25936 if (X87_FLOAT_MODE_P (mode))
25938 if (regclass == FP_TOP_SSE_REGS)
25940 else if (regclass == FP_SECOND_SSE_REGS)
25941 return FP_SECOND_REG;
25943 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25949 static enum reg_class
25950 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25951 enum machine_mode mode,
25952 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25954 /* QImode spills from non-QI registers require
25955 intermediate register on 32bit targets. */
25956 if (!in_p && mode == QImode && !TARGET_64BIT
25957 && (rclass == GENERAL_REGS
25958 || rclass == LEGACY_REGS
25959 || rclass == INDEX_REGS))
25968 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25969 regno = true_regnum (x);
25971 /* Return Q_REGS if the operand is in memory. */
25979 /* If we are copying between general and FP registers, we need a memory
25980 location. The same is true for SSE and MMX registers.
25982 To optimize register_move_cost performance, allow inline variant.
25984 The macro can't work reliably when one of the CLASSES is class containing
25985 registers from multiple units (SSE, MMX, integer). We avoid this by never
25986 combining those units in single alternative in the machine description.
25987 Ensure that this constraint holds to avoid unexpected surprises.
25989 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25990 enforce these sanity checks. */
25993 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25994 enum machine_mode mode, int strict)
25996 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25997 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25998 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25999 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26000 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26001 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26003 gcc_assert (!strict);
26007 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26010 /* ??? This is a lie. We do have moves between mmx/general, and for
26011 mmx/sse2. But by saying we need secondary memory we discourage the
26012 register allocator from using the mmx registers unless needed. */
26013 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26016 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26018 /* SSE1 doesn't have any direct moves from other classes. */
26022 /* If the target says that inter-unit moves are more expensive
26023 than moving through memory, then don't generate them. */
26024 if (!TARGET_INTER_UNIT_MOVES)
26027 /* Between SSE and general, we have moves no larger than word size. */
26028 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26036 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26037 enum machine_mode mode, int strict)
26039 return inline_secondary_memory_needed (class1, class2, mode, strict);
26042 /* Return true if the registers in CLASS cannot represent the change from
26043 modes FROM to TO. */
26046 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26047 enum reg_class regclass)
26052 /* x87 registers can't do subreg at all, as all values are reformatted
26053 to extended precision. */
26054 if (MAYBE_FLOAT_CLASS_P (regclass))
26057 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26059 /* Vector registers do not support QI or HImode loads. If we don't
26060 disallow a change to these modes, reload will assume it's ok to
26061 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26062 the vec_dupv4hi pattern. */
26063 if (GET_MODE_SIZE (from) < 4)
26066 /* Vector registers do not support subreg with nonzero offsets, which
26067 are otherwise valid for integer registers. Since we can't see
26068 whether we have a nonzero offset from here, prohibit all
26069 nonparadoxical subregs changing size. */
26070 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26077 /* Return the cost of moving data of mode M between a
26078 register and memory. A value of 2 is the default; this cost is
26079 relative to those in `REGISTER_MOVE_COST'.
26081 This function is used extensively by register_move_cost that is used to
26082 build tables at startup. Make it inline in this case.
26083 When IN is 2, return maximum of in and out move cost.
26085 If moving between registers and memory is more expensive than
26086 between two registers, you should define this macro to express the
26089 Model also increased moving costs of QImode registers in non
26093 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26097 if (FLOAT_CLASS_P (regclass))
26115 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26116 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26118 if (SSE_CLASS_P (regclass))
26121 switch (GET_MODE_SIZE (mode))
26136 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26137 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26139 if (MMX_CLASS_P (regclass))
26142 switch (GET_MODE_SIZE (mode))
26154 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26155 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26157 switch (GET_MODE_SIZE (mode))
26160 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26163 return ix86_cost->int_store[0];
26164 if (TARGET_PARTIAL_REG_DEPENDENCY
26165 && optimize_function_for_speed_p (cfun))
26166 cost = ix86_cost->movzbl_load;
26168 cost = ix86_cost->int_load[0];
26170 return MAX (cost, ix86_cost->int_store[0]);
26176 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26178 return ix86_cost->movzbl_load;
26180 return ix86_cost->int_store[0] + 4;
26185 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26186 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26188 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26189 if (mode == TFmode)
26192 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26194 cost = ix86_cost->int_load[2];
26196 cost = ix86_cost->int_store[2];
26197 return (cost * (((int) GET_MODE_SIZE (mode)
26198 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26203 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26205 return inline_memory_move_cost (mode, regclass, in);
26209 /* Return the cost of moving data from a register in class CLASS1 to
26210 one in class CLASS2.
26212 It is not required that the cost always equal 2 when FROM is the same as TO;
26213 on some machines it is expensive to move between registers if they are not
26214 general registers. */
26217 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26218 enum reg_class class2)
26220 /* In case we require secondary memory, compute cost of the store followed
26221 by load. In order to avoid bad register allocation choices, we need
26222 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26224 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26228 cost += inline_memory_move_cost (mode, class1, 2);
26229 cost += inline_memory_move_cost (mode, class2, 2);
26231 /* In case of copying from general_purpose_register we may emit multiple
26232 stores followed by single load causing memory size mismatch stall.
26233 Count this as arbitrarily high cost of 20. */
26234 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26237 /* In the case of FP/MMX moves, the registers actually overlap, and we
26238 have to switch modes in order to treat them differently. */
26239 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26240 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26246 /* Moves between SSE/MMX and integer unit are expensive. */
26247 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26248 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26250 /* ??? By keeping returned value relatively high, we limit the number
26251 of moves between integer and MMX/SSE registers for all targets.
26252 Additionally, high value prevents problem with x86_modes_tieable_p(),
26253 where integer modes in MMX/SSE registers are not tieable
26254 because of missing QImode and HImode moves to, from or between
26255 MMX/SSE registers. */
26256 return MAX (8, ix86_cost->mmxsse_to_integer);
26258 if (MAYBE_FLOAT_CLASS_P (class1))
26259 return ix86_cost->fp_move;
26260 if (MAYBE_SSE_CLASS_P (class1))
26261 return ix86_cost->sse_move;
26262 if (MAYBE_MMX_CLASS_P (class1))
26263 return ix86_cost->mmx_move;
26267 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26270 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26272 /* Flags and only flags can only hold CCmode values. */
26273 if (CC_REGNO_P (regno))
26274 return GET_MODE_CLASS (mode) == MODE_CC;
26275 if (GET_MODE_CLASS (mode) == MODE_CC
26276 || GET_MODE_CLASS (mode) == MODE_RANDOM
26277 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26279 if (FP_REGNO_P (regno))
26280 return VALID_FP_MODE_P (mode);
26281 if (SSE_REGNO_P (regno))
26283 /* We implement the move patterns for all vector modes into and
26284 out of SSE registers, even when no operation instructions
26285 are available. OImode move is available only when AVX is
26287 return ((TARGET_AVX && mode == OImode)
26288 || VALID_AVX256_REG_MODE (mode)
26289 || VALID_SSE_REG_MODE (mode)
26290 || VALID_SSE2_REG_MODE (mode)
26291 || VALID_MMX_REG_MODE (mode)
26292 || VALID_MMX_REG_MODE_3DNOW (mode));
26294 if (MMX_REGNO_P (regno))
26296 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26297 so if the register is available at all, then we can move data of
26298 the given mode into or out of it. */
26299 return (VALID_MMX_REG_MODE (mode)
26300 || VALID_MMX_REG_MODE_3DNOW (mode));
26303 if (mode == QImode)
26305 /* Take care for QImode values - they can be in non-QI regs,
26306 but then they do cause partial register stalls. */
26307 if (regno <= BX_REG || TARGET_64BIT)
26309 if (!TARGET_PARTIAL_REG_STALL)
26311 return reload_in_progress || reload_completed;
26313 /* We handle both integer and floats in the general purpose registers. */
26314 else if (VALID_INT_MODE_P (mode))
26316 else if (VALID_FP_MODE_P (mode))
26318 else if (VALID_DFP_MODE_P (mode))
26320 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26321 on to use that value in smaller contexts, this can easily force a
26322 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26323 supporting DImode, allow it. */
26324 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26330 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26331 tieable integer mode. */
26334 ix86_tieable_integer_mode_p (enum machine_mode mode)
26343 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26346 return TARGET_64BIT;
26353 /* Return true if MODE1 is accessible in a register that can hold MODE2
26354 without copying. That is, all register classes that can hold MODE2
26355 can also hold MODE1. */
26358 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26360 if (mode1 == mode2)
26363 if (ix86_tieable_integer_mode_p (mode1)
26364 && ix86_tieable_integer_mode_p (mode2))
26367 /* MODE2 being XFmode implies fp stack or general regs, which means we
26368 can tie any smaller floating point modes to it. Note that we do not
26369 tie this with TFmode. */
26370 if (mode2 == XFmode)
26371 return mode1 == SFmode || mode1 == DFmode;
26373 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26374 that we can tie it with SFmode. */
26375 if (mode2 == DFmode)
26376 return mode1 == SFmode;
26378 /* If MODE2 is only appropriate for an SSE register, then tie with
26379 any other mode acceptable to SSE registers. */
26380 if (GET_MODE_SIZE (mode2) == 16
26381 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26382 return (GET_MODE_SIZE (mode1) == 16
26383 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26385 /* If MODE2 is appropriate for an MMX register, then tie
26386 with any other mode acceptable to MMX registers. */
26387 if (GET_MODE_SIZE (mode2) == 8
26388 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26389 return (GET_MODE_SIZE (mode1) == 8
26390 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26395 /* Compute a (partial) cost for rtx X. Return true if the complete
26396 cost has been computed, and false if subexpressions should be
26397 scanned. In either case, *TOTAL contains the cost result. */
26400 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26402 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26403 enum machine_mode mode = GET_MODE (x);
26404 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26412 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26414 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26416 else if (flag_pic && SYMBOLIC_CONST (x)
26418 || (!GET_CODE (x) != LABEL_REF
26419 && (GET_CODE (x) != SYMBOL_REF
26420 || !SYMBOL_REF_LOCAL_P (x)))))
26427 if (mode == VOIDmode)
26430 switch (standard_80387_constant_p (x))
26435 default: /* Other constants */
26440 /* Start with (MEM (SYMBOL_REF)), since that's where
26441 it'll probably end up. Add a penalty for size. */
26442 *total = (COSTS_N_INSNS (1)
26443 + (flag_pic != 0 && !TARGET_64BIT)
26444 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26450 /* The zero extensions is often completely free on x86_64, so make
26451 it as cheap as possible. */
26452 if (TARGET_64BIT && mode == DImode
26453 && GET_MODE (XEXP (x, 0)) == SImode)
26455 else if (TARGET_ZERO_EXTEND_WITH_AND)
26456 *total = cost->add;
26458 *total = cost->movzx;
26462 *total = cost->movsx;
26466 if (CONST_INT_P (XEXP (x, 1))
26467 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26469 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26472 *total = cost->add;
26475 if ((value == 2 || value == 3)
26476 && cost->lea <= cost->shift_const)
26478 *total = cost->lea;
26488 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26490 if (CONST_INT_P (XEXP (x, 1)))
26492 if (INTVAL (XEXP (x, 1)) > 32)
26493 *total = cost->shift_const + COSTS_N_INSNS (2);
26495 *total = cost->shift_const * 2;
26499 if (GET_CODE (XEXP (x, 1)) == AND)
26500 *total = cost->shift_var * 2;
26502 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26507 if (CONST_INT_P (XEXP (x, 1)))
26508 *total = cost->shift_const;
26510 *total = cost->shift_var;
26515 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26517 /* ??? SSE scalar cost should be used here. */
26518 *total = cost->fmul;
26521 else if (X87_FLOAT_MODE_P (mode))
26523 *total = cost->fmul;
26526 else if (FLOAT_MODE_P (mode))
26528 /* ??? SSE vector cost should be used here. */
26529 *total = cost->fmul;
26534 rtx op0 = XEXP (x, 0);
26535 rtx op1 = XEXP (x, 1);
26537 if (CONST_INT_P (XEXP (x, 1)))
26539 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26540 for (nbits = 0; value != 0; value &= value - 1)
26544 /* This is arbitrary. */
26547 /* Compute costs correctly for widening multiplication. */
26548 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26549 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26550 == GET_MODE_SIZE (mode))
26552 int is_mulwiden = 0;
26553 enum machine_mode inner_mode = GET_MODE (op0);
26555 if (GET_CODE (op0) == GET_CODE (op1))
26556 is_mulwiden = 1, op1 = XEXP (op1, 0);
26557 else if (CONST_INT_P (op1))
26559 if (GET_CODE (op0) == SIGN_EXTEND)
26560 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26563 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26567 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26570 *total = (cost->mult_init[MODE_INDEX (mode)]
26571 + nbits * cost->mult_bit
26572 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26581 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26582 /* ??? SSE cost should be used here. */
26583 *total = cost->fdiv;
26584 else if (X87_FLOAT_MODE_P (mode))
26585 *total = cost->fdiv;
26586 else if (FLOAT_MODE_P (mode))
26587 /* ??? SSE vector cost should be used here. */
26588 *total = cost->fdiv;
26590 *total = cost->divide[MODE_INDEX (mode)];
26594 if (GET_MODE_CLASS (mode) == MODE_INT
26595 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26597 if (GET_CODE (XEXP (x, 0)) == PLUS
26598 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26599 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26600 && CONSTANT_P (XEXP (x, 1)))
26602 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26603 if (val == 2 || val == 4 || val == 8)
26605 *total = cost->lea;
26606 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26607 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26608 outer_code, speed);
26609 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26613 else if (GET_CODE (XEXP (x, 0)) == MULT
26614 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26616 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26617 if (val == 2 || val == 4 || val == 8)
26619 *total = cost->lea;
26620 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26621 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26625 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26627 *total = cost->lea;
26628 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26629 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26630 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26637 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26639 /* ??? SSE cost should be used here. */
26640 *total = cost->fadd;
26643 else if (X87_FLOAT_MODE_P (mode))
26645 *total = cost->fadd;
26648 else if (FLOAT_MODE_P (mode))
26650 /* ??? SSE vector cost should be used here. */
26651 *total = cost->fadd;
26659 if (!TARGET_64BIT && mode == DImode)
26661 *total = (cost->add * 2
26662 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26663 << (GET_MODE (XEXP (x, 0)) != DImode))
26664 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26665 << (GET_MODE (XEXP (x, 1)) != DImode)));
26671 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26673 /* ??? SSE cost should be used here. */
26674 *total = cost->fchs;
26677 else if (X87_FLOAT_MODE_P (mode))
26679 *total = cost->fchs;
26682 else if (FLOAT_MODE_P (mode))
26684 /* ??? SSE vector cost should be used here. */
26685 *total = cost->fchs;
26691 if (!TARGET_64BIT && mode == DImode)
26692 *total = cost->add * 2;
26694 *total = cost->add;
26698 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26699 && XEXP (XEXP (x, 0), 1) == const1_rtx
26700 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26701 && XEXP (x, 1) == const0_rtx)
26703 /* This kind of construct is implemented using test[bwl].
26704 Treat it as if we had an AND. */
26705 *total = (cost->add
26706 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26707 + rtx_cost (const1_rtx, outer_code, speed));
26713 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26718 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26719 /* ??? SSE cost should be used here. */
26720 *total = cost->fabs;
26721 else if (X87_FLOAT_MODE_P (mode))
26722 *total = cost->fabs;
26723 else if (FLOAT_MODE_P (mode))
26724 /* ??? SSE vector cost should be used here. */
26725 *total = cost->fabs;
26729 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26730 /* ??? SSE cost should be used here. */
26731 *total = cost->fsqrt;
26732 else if (X87_FLOAT_MODE_P (mode))
26733 *total = cost->fsqrt;
26734 else if (FLOAT_MODE_P (mode))
26735 /* ??? SSE vector cost should be used here. */
26736 *total = cost->fsqrt;
26740 if (XINT (x, 1) == UNSPEC_TP)
26751 static int current_machopic_label_num;
26753 /* Given a symbol name and its associated stub, write out the
26754 definition of the stub. */
26757 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26759 unsigned int length;
26760 char *binder_name, *symbol_name, lazy_ptr_name[32];
26761 int label = ++current_machopic_label_num;
26763 /* For 64-bit we shouldn't get here. */
26764 gcc_assert (!TARGET_64BIT);
26766 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26767 symb = (*targetm.strip_name_encoding) (symb);
26769 length = strlen (stub);
26770 binder_name = XALLOCAVEC (char, length + 32);
26771 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26773 length = strlen (symb);
26774 symbol_name = XALLOCAVEC (char, length + 32);
26775 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26777 sprintf (lazy_ptr_name, "L%d$lz", label);
26780 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26782 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26784 fprintf (file, "%s:\n", stub);
26785 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26789 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26790 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26791 fprintf (file, "\tjmp\t*%%edx\n");
26794 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26796 fprintf (file, "%s:\n", binder_name);
26800 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26801 fprintf (file, "\tpushl\t%%eax\n");
26804 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26806 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26808 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26809 fprintf (file, "%s:\n", lazy_ptr_name);
26810 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26811 fprintf (file, "\t.long %s\n", binder_name);
26815 darwin_x86_file_end (void)
26817 darwin_file_end ();
26820 #endif /* TARGET_MACHO */
26822 /* Order the registers for register allocator. */
26825 x86_order_regs_for_local_alloc (void)
26830 /* First allocate the local general purpose registers. */
26831 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26832 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26833 reg_alloc_order [pos++] = i;
26835 /* Global general purpose registers. */
26836 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26837 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26838 reg_alloc_order [pos++] = i;
26840 /* x87 registers come first in case we are doing FP math
26842 if (!TARGET_SSE_MATH)
26843 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26844 reg_alloc_order [pos++] = i;
26846 /* SSE registers. */
26847 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26848 reg_alloc_order [pos++] = i;
26849 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26850 reg_alloc_order [pos++] = i;
26852 /* x87 registers. */
26853 if (TARGET_SSE_MATH)
26854 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26855 reg_alloc_order [pos++] = i;
26857 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26858 reg_alloc_order [pos++] = i;
26860 /* Initialize the rest of array as we do not allocate some registers
26862 while (pos < FIRST_PSEUDO_REGISTER)
26863 reg_alloc_order [pos++] = 0;
26866 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26867 struct attribute_spec.handler. */
26869 ix86_handle_abi_attribute (tree *node, tree name,
26870 tree args ATTRIBUTE_UNUSED,
26871 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26873 if (TREE_CODE (*node) != FUNCTION_TYPE
26874 && TREE_CODE (*node) != METHOD_TYPE
26875 && TREE_CODE (*node) != FIELD_DECL
26876 && TREE_CODE (*node) != TYPE_DECL)
26878 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26880 *no_add_attrs = true;
26885 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
26887 *no_add_attrs = true;
26891 /* Can combine regparm with all attributes but fastcall. */
26892 if (is_attribute_p ("ms_abi", name))
26894 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26896 error ("ms_abi and sysv_abi attributes are not compatible");
26901 else if (is_attribute_p ("sysv_abi", name))
26903 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26905 error ("ms_abi and sysv_abi attributes are not compatible");
26914 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26915 struct attribute_spec.handler. */
26917 ix86_handle_struct_attribute (tree *node, tree name,
26918 tree args ATTRIBUTE_UNUSED,
26919 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26922 if (DECL_P (*node))
26924 if (TREE_CODE (*node) == TYPE_DECL)
26925 type = &TREE_TYPE (*node);
26930 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26931 || TREE_CODE (*type) == UNION_TYPE)))
26933 warning (OPT_Wattributes, "%qE attribute ignored",
26935 *no_add_attrs = true;
26938 else if ((is_attribute_p ("ms_struct", name)
26939 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26940 || ((is_attribute_p ("gcc_struct", name)
26941 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26943 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
26945 *no_add_attrs = true;
26952 ix86_ms_bitfield_layout_p (const_tree record_type)
26954 return (TARGET_MS_BITFIELD_LAYOUT &&
26955 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26956 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26959 /* Returns an expression indicating where the this parameter is
26960 located on entry to the FUNCTION. */
26963 x86_this_parameter (tree function)
26965 tree type = TREE_TYPE (function);
26966 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26971 const int *parm_regs;
26973 if (ix86_function_type_abi (type) == MS_ABI)
26974 parm_regs = x86_64_ms_abi_int_parameter_registers;
26976 parm_regs = x86_64_int_parameter_registers;
26977 return gen_rtx_REG (DImode, parm_regs[aggr]);
26980 nregs = ix86_function_regparm (type, function);
26982 if (nregs > 0 && !stdarg_p (type))
26986 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26987 regno = aggr ? DX_REG : CX_REG;
26995 return gen_rtx_MEM (SImode,
26996 plus_constant (stack_pointer_rtx, 4));
26999 return gen_rtx_REG (SImode, regno);
27002 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27005 /* Determine whether x86_output_mi_thunk can succeed. */
27008 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27009 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27010 HOST_WIDE_INT vcall_offset, const_tree function)
27012 /* 64-bit can handle anything. */
27016 /* For 32-bit, everything's fine if we have one free register. */
27017 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27020 /* Need a free register for vcall_offset. */
27024 /* Need a free register for GOT references. */
27025 if (flag_pic && !(*targetm.binds_local_p) (function))
27028 /* Otherwise ok. */
27032 /* Output the assembler code for a thunk function. THUNK_DECL is the
27033 declaration for the thunk function itself, FUNCTION is the decl for
27034 the target function. DELTA is an immediate constant offset to be
27035 added to THIS. If VCALL_OFFSET is nonzero, the word at
27036 *(*this + vcall_offset) should be added to THIS. */
27039 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27040 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27041 HOST_WIDE_INT vcall_offset, tree function)
27044 rtx this_param = x86_this_parameter (function);
27047 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27048 pull it in now and let DELTA benefit. */
27049 if (REG_P (this_param))
27050 this_reg = this_param;
27051 else if (vcall_offset)
27053 /* Put the this parameter into %eax. */
27054 xops[0] = this_param;
27055 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27056 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27059 this_reg = NULL_RTX;
27061 /* Adjust the this parameter by a fixed constant. */
27064 xops[0] = GEN_INT (delta);
27065 xops[1] = this_reg ? this_reg : this_param;
27068 if (!x86_64_general_operand (xops[0], DImode))
27070 tmp = gen_rtx_REG (DImode, R10_REG);
27072 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27074 xops[1] = this_param;
27076 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27079 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27082 /* Adjust the this parameter by a value stored in the vtable. */
27086 tmp = gen_rtx_REG (DImode, R10_REG);
27089 int tmp_regno = CX_REG;
27090 if (lookup_attribute ("fastcall",
27091 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27092 tmp_regno = AX_REG;
27093 tmp = gen_rtx_REG (SImode, tmp_regno);
27096 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27098 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27100 /* Adjust the this parameter. */
27101 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27102 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27104 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27105 xops[0] = GEN_INT (vcall_offset);
27107 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27108 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27110 xops[1] = this_reg;
27111 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27114 /* If necessary, drop THIS back to its stack slot. */
27115 if (this_reg && this_reg != this_param)
27117 xops[0] = this_reg;
27118 xops[1] = this_param;
27119 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27122 xops[0] = XEXP (DECL_RTL (function), 0);
27125 if (!flag_pic || (*targetm.binds_local_p) (function))
27126 output_asm_insn ("jmp\t%P0", xops);
27127 /* All thunks should be in the same object as their target,
27128 and thus binds_local_p should be true. */
27129 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27130 gcc_unreachable ();
27133 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27134 tmp = gen_rtx_CONST (Pmode, tmp);
27135 tmp = gen_rtx_MEM (QImode, tmp);
27137 output_asm_insn ("jmp\t%A0", xops);
27142 if (!flag_pic || (*targetm.binds_local_p) (function))
27143 output_asm_insn ("jmp\t%P0", xops);
27148 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27149 tmp = (gen_rtx_SYMBOL_REF
27151 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27152 tmp = gen_rtx_MEM (QImode, tmp);
27154 output_asm_insn ("jmp\t%0", xops);
27157 #endif /* TARGET_MACHO */
27159 tmp = gen_rtx_REG (SImode, CX_REG);
27160 output_set_got (tmp, NULL_RTX);
27163 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27164 output_asm_insn ("jmp\t{*}%1", xops);
27170 x86_file_start (void)
27172 default_file_start ();
27174 darwin_file_start ();
27176 if (X86_FILE_START_VERSION_DIRECTIVE)
27177 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27178 if (X86_FILE_START_FLTUSED)
27179 fputs ("\t.global\t__fltused\n", asm_out_file);
27180 if (ix86_asm_dialect == ASM_INTEL)
27181 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27185 x86_field_alignment (tree field, int computed)
27187 enum machine_mode mode;
27188 tree type = TREE_TYPE (field);
27190 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27192 mode = TYPE_MODE (strip_array_types (type));
27193 if (mode == DFmode || mode == DCmode
27194 || GET_MODE_CLASS (mode) == MODE_INT
27195 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27196 return MIN (32, computed);
27200 /* Output assembler code to FILE to increment profiler label # LABELNO
27201 for profiling a function entry. */
27203 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27207 #ifndef NO_PROFILE_COUNTERS
27208 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27211 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27212 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27214 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27218 #ifndef NO_PROFILE_COUNTERS
27219 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27220 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27222 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27226 #ifndef NO_PROFILE_COUNTERS
27227 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27228 PROFILE_COUNT_REGISTER);
27230 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27234 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27235 /* We don't have exact information about the insn sizes, but we may assume
27236 quite safely that we are informed about all 1 byte insns and memory
27237 address sizes. This is enough to eliminate unnecessary padding in
27241 min_insn_size (rtx insn)
27245 if (!INSN_P (insn) || !active_insn_p (insn))
27248 /* Discard alignments we've emit and jump instructions. */
27249 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27250 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27252 if (JUMP_TABLE_DATA_P(insn))
27255 /* Important case - calls are always 5 bytes.
27256 It is common to have many calls in the row. */
27258 && symbolic_reference_mentioned_p (PATTERN (insn))
27259 && !SIBLING_CALL_P (insn))
27261 if (get_attr_length (insn) <= 1)
27264 /* For normal instructions we may rely on the sizes of addresses
27265 and the presence of symbol to require 4 bytes of encoding.
27266 This is not the case for jumps where references are PC relative. */
27267 if (!JUMP_P (insn))
27269 l = get_attr_length_address (insn);
27270 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27279 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27283 ix86_avoid_jump_mispredicts (void)
27285 rtx insn, start = get_insns ();
27286 int nbytes = 0, njumps = 0;
27289 /* Look for all minimal intervals of instructions containing 4 jumps.
27290 The intervals are bounded by START and INSN. NBYTES is the total
27291 size of instructions in the interval including INSN and not including
27292 START. When the NBYTES is smaller than 16 bytes, it is possible
27293 that the end of START and INSN ends up in the same 16byte page.
27295 The smallest offset in the page INSN can start is the case where START
27296 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27297 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27299 for (insn = start; insn; insn = NEXT_INSN (insn))
27303 if (LABEL_P (insn))
27305 int align = label_to_alignment (insn);
27306 int max_skip = label_to_max_skip (insn);
27310 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27311 already in the current 16 byte page, because otherwise
27312 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27313 bytes to reach 16 byte boundary. */
27315 || (align <= 3 && max_skip != (1 << align) - 1))
27318 fprintf (dump_file, "Label %i with max_skip %i\n",
27319 INSN_UID (insn), max_skip);
27322 while (nbytes + max_skip >= 16)
27324 start = NEXT_INSN (start);
27325 if ((JUMP_P (start)
27326 && GET_CODE (PATTERN (start)) != ADDR_VEC
27327 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27329 njumps--, isjump = 1;
27332 nbytes -= min_insn_size (start);
27338 min_size = min_insn_size (insn);
27339 nbytes += min_size;
27341 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27342 INSN_UID (insn), min_size);
27344 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27345 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27353 start = NEXT_INSN (start);
27354 if ((JUMP_P (start)
27355 && GET_CODE (PATTERN (start)) != ADDR_VEC
27356 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27358 njumps--, isjump = 1;
27361 nbytes -= min_insn_size (start);
27363 gcc_assert (njumps >= 0);
27365 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27366 INSN_UID (start), INSN_UID (insn), nbytes);
27368 if (njumps == 3 && isjump && nbytes < 16)
27370 int padsize = 15 - nbytes + min_insn_size (insn);
27373 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27374 INSN_UID (insn), padsize);
27375 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27381 /* AMD Athlon works faster
27382 when RET is not destination of conditional jump or directly preceded
27383 by other jump instruction. We avoid the penalty by inserting NOP just
27384 before the RET instructions in such cases. */
27386 ix86_pad_returns (void)
27391 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27393 basic_block bb = e->src;
27394 rtx ret = BB_END (bb);
27396 bool replace = false;
27398 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27399 || optimize_bb_for_size_p (bb))
27401 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27402 if (active_insn_p (prev) || LABEL_P (prev))
27404 if (prev && LABEL_P (prev))
27409 FOR_EACH_EDGE (e, ei, bb->preds)
27410 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27411 && !(e->flags & EDGE_FALLTHRU))
27416 prev = prev_active_insn (ret);
27418 && ((JUMP_P (prev) && any_condjump_p (prev))
27421 /* Empty functions get branch mispredict even when the jump destination
27422 is not visible to us. */
27423 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27428 emit_insn_before (gen_return_internal_long (), ret);
27434 /* Implement machine specific optimizations. We implement padding of returns
27435 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27439 if (optimize && optimize_function_for_speed_p (cfun))
27441 if (TARGET_PAD_RETURNS)
27442 ix86_pad_returns ();
27443 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27444 if (TARGET_FOUR_JUMP_LIMIT)
27445 ix86_avoid_jump_mispredicts ();
27450 /* Return nonzero when QImode register that must be represented via REX prefix
27453 x86_extended_QIreg_mentioned_p (rtx insn)
27456 extract_insn_cached (insn);
27457 for (i = 0; i < recog_data.n_operands; i++)
27458 if (REG_P (recog_data.operand[i])
27459 && REGNO (recog_data.operand[i]) > BX_REG)
27464 /* Return nonzero when P points to register encoded via REX prefix.
27465 Called via for_each_rtx. */
27467 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27469 unsigned int regno;
27472 regno = REGNO (*p);
27473 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27476 /* Return true when INSN mentions register that must be encoded using REX
27479 x86_extended_reg_mentioned_p (rtx insn)
27481 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27482 extended_reg_mentioned_1, NULL);
27485 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27486 optabs would emit if we didn't have TFmode patterns. */
27489 x86_emit_floatuns (rtx operands[2])
27491 rtx neglab, donelab, i0, i1, f0, in, out;
27492 enum machine_mode mode, inmode;
27494 inmode = GET_MODE (operands[1]);
27495 gcc_assert (inmode == SImode || inmode == DImode);
27498 in = force_reg (inmode, operands[1]);
27499 mode = GET_MODE (out);
27500 neglab = gen_label_rtx ();
27501 donelab = gen_label_rtx ();
27502 f0 = gen_reg_rtx (mode);
27504 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27506 expand_float (out, in, 0);
27508 emit_jump_insn (gen_jump (donelab));
27511 emit_label (neglab);
27513 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27515 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27517 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27519 expand_float (f0, i0, 0);
27521 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27523 emit_label (donelab);
27526 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27527 with all elements equal to VAR. Return true if successful. */
27530 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27531 rtx target, rtx val)
27533 enum machine_mode hmode, smode, wsmode, wvmode;
27548 val = force_reg (GET_MODE_INNER (mode), val);
27549 x = gen_rtx_VEC_DUPLICATE (mode, val);
27550 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27556 if (TARGET_SSE || TARGET_3DNOW_A)
27558 val = gen_lowpart (SImode, val);
27559 x = gen_rtx_TRUNCATE (HImode, val);
27560 x = gen_rtx_VEC_DUPLICATE (mode, x);
27561 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27583 /* Extend HImode to SImode using a paradoxical SUBREG. */
27584 tmp1 = gen_reg_rtx (SImode);
27585 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27586 /* Insert the SImode value as low element of V4SImode vector. */
27587 tmp2 = gen_reg_rtx (V4SImode);
27588 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27589 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27590 CONST0_RTX (V4SImode),
27592 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27593 /* Cast the V4SImode vector back to a V8HImode vector. */
27594 tmp1 = gen_reg_rtx (V8HImode);
27595 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27596 /* Duplicate the low short through the whole low SImode word. */
27597 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27598 /* Cast the V8HImode vector back to a V4SImode vector. */
27599 tmp2 = gen_reg_rtx (V4SImode);
27600 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27601 /* Replicate the low element of the V4SImode vector. */
27602 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27603 /* Cast the V2SImode back to V8HImode, and store in target. */
27604 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27615 /* Extend QImode to SImode using a paradoxical SUBREG. */
27616 tmp1 = gen_reg_rtx (SImode);
27617 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27618 /* Insert the SImode value as low element of V4SImode vector. */
27619 tmp2 = gen_reg_rtx (V4SImode);
27620 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27621 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27622 CONST0_RTX (V4SImode),
27624 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27625 /* Cast the V4SImode vector back to a V16QImode vector. */
27626 tmp1 = gen_reg_rtx (V16QImode);
27627 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27628 /* Duplicate the low byte through the whole low SImode word. */
27629 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27630 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27631 /* Cast the V16QImode vector back to a V4SImode vector. */
27632 tmp2 = gen_reg_rtx (V4SImode);
27633 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27634 /* Replicate the low element of the V4SImode vector. */
27635 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27636 /* Cast the V2SImode back to V16QImode, and store in target. */
27637 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27645 /* Replicate the value once into the next wider mode and recurse. */
27646 val = convert_modes (wsmode, smode, val, true);
27647 x = expand_simple_binop (wsmode, ASHIFT, val,
27648 GEN_INT (GET_MODE_BITSIZE (smode)),
27649 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27650 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27652 x = gen_reg_rtx (wvmode);
27653 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27654 gcc_unreachable ();
27655 emit_move_insn (target, gen_lowpart (mode, x));
27678 rtx tmp = gen_reg_rtx (hmode);
27679 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27680 emit_insn (gen_rtx_SET (VOIDmode, target,
27681 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27690 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27691 whose ONE_VAR element is VAR, and other elements are zero. Return true
27695 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27696 rtx target, rtx var, int one_var)
27698 enum machine_mode vsimode;
27701 bool use_vector_set = false;
27706 /* For SSE4.1, we normally use vector set. But if the second
27707 element is zero and inter-unit moves are OK, we use movq
27709 use_vector_set = (TARGET_64BIT
27711 && !(TARGET_INTER_UNIT_MOVES
27717 use_vector_set = TARGET_SSE4_1;
27720 use_vector_set = TARGET_SSE2;
27723 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27730 use_vector_set = TARGET_AVX;
27733 /* Use ix86_expand_vector_set in 64bit mode only. */
27734 use_vector_set = TARGET_AVX && TARGET_64BIT;
27740 if (use_vector_set)
27742 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27743 var = force_reg (GET_MODE_INNER (mode), var);
27744 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27760 var = force_reg (GET_MODE_INNER (mode), var);
27761 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27762 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27767 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27768 new_target = gen_reg_rtx (mode);
27770 new_target = target;
27771 var = force_reg (GET_MODE_INNER (mode), var);
27772 x = gen_rtx_VEC_DUPLICATE (mode, var);
27773 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27774 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27777 /* We need to shuffle the value to the correct position, so
27778 create a new pseudo to store the intermediate result. */
27780 /* With SSE2, we can use the integer shuffle insns. */
27781 if (mode != V4SFmode && TARGET_SSE2)
27783 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27785 GEN_INT (one_var == 1 ? 0 : 1),
27786 GEN_INT (one_var == 2 ? 0 : 1),
27787 GEN_INT (one_var == 3 ? 0 : 1)));
27788 if (target != new_target)
27789 emit_move_insn (target, new_target);
27793 /* Otherwise convert the intermediate result to V4SFmode and
27794 use the SSE1 shuffle instructions. */
27795 if (mode != V4SFmode)
27797 tmp = gen_reg_rtx (V4SFmode);
27798 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27803 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27805 GEN_INT (one_var == 1 ? 0 : 1),
27806 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27807 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27809 if (mode != V4SFmode)
27810 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27811 else if (tmp != target)
27812 emit_move_insn (target, tmp);
27814 else if (target != new_target)
27815 emit_move_insn (target, new_target);
27820 vsimode = V4SImode;
27826 vsimode = V2SImode;
27832 /* Zero extend the variable element to SImode and recurse. */
27833 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27835 x = gen_reg_rtx (vsimode);
27836 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27838 gcc_unreachable ();
27840 emit_move_insn (target, gen_lowpart (mode, x));
27848 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27849 consisting of the values in VALS. It is known that all elements
27850 except ONE_VAR are constants. Return true if successful. */
27853 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27854 rtx target, rtx vals, int one_var)
27856 rtx var = XVECEXP (vals, 0, one_var);
27857 enum machine_mode wmode;
27860 const_vec = copy_rtx (vals);
27861 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27862 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27870 /* For the two element vectors, it's just as easy to use
27871 the general case. */
27875 /* Use ix86_expand_vector_set in 64bit mode only. */
27898 /* There's no way to set one QImode entry easily. Combine
27899 the variable value with its adjacent constant value, and
27900 promote to an HImode set. */
27901 x = XVECEXP (vals, 0, one_var ^ 1);
27904 var = convert_modes (HImode, QImode, var, true);
27905 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27906 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27907 x = GEN_INT (INTVAL (x) & 0xff);
27911 var = convert_modes (HImode, QImode, var, true);
27912 x = gen_int_mode (INTVAL (x) << 8, HImode);
27914 if (x != const0_rtx)
27915 var = expand_simple_binop (HImode, IOR, var, x, var,
27916 1, OPTAB_LIB_WIDEN);
27918 x = gen_reg_rtx (wmode);
27919 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27920 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27922 emit_move_insn (target, gen_lowpart (mode, x));
27929 emit_move_insn (target, const_vec);
27930 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27934 /* A subroutine of ix86_expand_vector_init_general. Use vector
27935 concatenate to handle the most general case: all values variable,
27936 and none identical. */
27939 ix86_expand_vector_init_concat (enum machine_mode mode,
27940 rtx target, rtx *ops, int n)
27942 enum machine_mode cmode, hmode = VOIDmode;
27943 rtx first[8], second[4];
27983 gcc_unreachable ();
27986 if (!register_operand (ops[1], cmode))
27987 ops[1] = force_reg (cmode, ops[1]);
27988 if (!register_operand (ops[0], cmode))
27989 ops[0] = force_reg (cmode, ops[0]);
27990 emit_insn (gen_rtx_SET (VOIDmode, target,
27991 gen_rtx_VEC_CONCAT (mode, ops[0],
28011 gcc_unreachable ();
28027 gcc_unreachable ();
28032 /* FIXME: We process inputs backward to help RA. PR 36222. */
28035 for (; i > 0; i -= 2, j--)
28037 first[j] = gen_reg_rtx (cmode);
28038 v = gen_rtvec (2, ops[i - 1], ops[i]);
28039 ix86_expand_vector_init (false, first[j],
28040 gen_rtx_PARALLEL (cmode, v));
28046 gcc_assert (hmode != VOIDmode);
28047 for (i = j = 0; i < n; i += 2, j++)
28049 second[j] = gen_reg_rtx (hmode);
28050 ix86_expand_vector_init_concat (hmode, second [j],
28054 ix86_expand_vector_init_concat (mode, target, second, n);
28057 ix86_expand_vector_init_concat (mode, target, first, n);
28061 gcc_unreachable ();
28065 /* A subroutine of ix86_expand_vector_init_general. Use vector
28066 interleave to handle the most general case: all values variable,
28067 and none identical. */
28070 ix86_expand_vector_init_interleave (enum machine_mode mode,
28071 rtx target, rtx *ops, int n)
28073 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28076 rtx (*gen_load_even) (rtx, rtx, rtx);
28077 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28078 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28083 gen_load_even = gen_vec_setv8hi;
28084 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28085 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28086 inner_mode = HImode;
28087 first_imode = V4SImode;
28088 second_imode = V2DImode;
28089 third_imode = VOIDmode;
28092 gen_load_even = gen_vec_setv16qi;
28093 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28094 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28095 inner_mode = QImode;
28096 first_imode = V8HImode;
28097 second_imode = V4SImode;
28098 third_imode = V2DImode;
28101 gcc_unreachable ();
28104 for (i = 0; i < n; i++)
28106 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28107 op0 = gen_reg_rtx (SImode);
28108 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28110 /* Insert the SImode value as low element of V4SImode vector. */
28111 op1 = gen_reg_rtx (V4SImode);
28112 op0 = gen_rtx_VEC_MERGE (V4SImode,
28113 gen_rtx_VEC_DUPLICATE (V4SImode,
28115 CONST0_RTX (V4SImode),
28117 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28119 /* Cast the V4SImode vector back to a vector in orignal mode. */
28120 op0 = gen_reg_rtx (mode);
28121 emit_move_insn (op0, gen_lowpart (mode, op1));
28123 /* Load even elements into the second positon. */
28124 emit_insn ((*gen_load_even) (op0,
28125 force_reg (inner_mode,
28129 /* Cast vector to FIRST_IMODE vector. */
28130 ops[i] = gen_reg_rtx (first_imode);
28131 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28134 /* Interleave low FIRST_IMODE vectors. */
28135 for (i = j = 0; i < n; i += 2, j++)
28137 op0 = gen_reg_rtx (first_imode);
28138 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28140 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28141 ops[j] = gen_reg_rtx (second_imode);
28142 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28145 /* Interleave low SECOND_IMODE vectors. */
28146 switch (second_imode)
28149 for (i = j = 0; i < n / 2; i += 2, j++)
28151 op0 = gen_reg_rtx (second_imode);
28152 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28155 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28157 ops[j] = gen_reg_rtx (third_imode);
28158 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28160 second_imode = V2DImode;
28161 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28165 op0 = gen_reg_rtx (second_imode);
28166 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28169 /* Cast the SECOND_IMODE vector back to a vector on original
28171 emit_insn (gen_rtx_SET (VOIDmode, target,
28172 gen_lowpart (mode, op0)));
28176 gcc_unreachable ();
28180 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28181 all values variable, and none identical. */
28184 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28185 rtx target, rtx vals)
28187 rtx ops[32], op0, op1;
28188 enum machine_mode half_mode = VOIDmode;
28195 if (!mmx_ok && !TARGET_SSE)
28207 n = GET_MODE_NUNITS (mode);
28208 for (i = 0; i < n; i++)
28209 ops[i] = XVECEXP (vals, 0, i);
28210 ix86_expand_vector_init_concat (mode, target, ops, n);
28214 half_mode = V16QImode;
28218 half_mode = V8HImode;
28222 n = GET_MODE_NUNITS (mode);
28223 for (i = 0; i < n; i++)
28224 ops[i] = XVECEXP (vals, 0, i);
28225 op0 = gen_reg_rtx (half_mode);
28226 op1 = gen_reg_rtx (half_mode);
28227 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28229 ix86_expand_vector_init_interleave (half_mode, op1,
28230 &ops [n >> 1], n >> 2);
28231 emit_insn (gen_rtx_SET (VOIDmode, target,
28232 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28236 if (!TARGET_SSE4_1)
28244 /* Don't use ix86_expand_vector_init_interleave if we can't
28245 move from GPR to SSE register directly. */
28246 if (!TARGET_INTER_UNIT_MOVES)
28249 n = GET_MODE_NUNITS (mode);
28250 for (i = 0; i < n; i++)
28251 ops[i] = XVECEXP (vals, 0, i);
28252 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28260 gcc_unreachable ();
28264 int i, j, n_elts, n_words, n_elt_per_word;
28265 enum machine_mode inner_mode;
28266 rtx words[4], shift;
28268 inner_mode = GET_MODE_INNER (mode);
28269 n_elts = GET_MODE_NUNITS (mode);
28270 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28271 n_elt_per_word = n_elts / n_words;
28272 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28274 for (i = 0; i < n_words; ++i)
28276 rtx word = NULL_RTX;
28278 for (j = 0; j < n_elt_per_word; ++j)
28280 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28281 elt = convert_modes (word_mode, inner_mode, elt, true);
28287 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28288 word, 1, OPTAB_LIB_WIDEN);
28289 word = expand_simple_binop (word_mode, IOR, word, elt,
28290 word, 1, OPTAB_LIB_WIDEN);
28298 emit_move_insn (target, gen_lowpart (mode, words[0]));
28299 else if (n_words == 2)
28301 rtx tmp = gen_reg_rtx (mode);
28302 emit_clobber (tmp);
28303 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28304 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28305 emit_move_insn (target, tmp);
28307 else if (n_words == 4)
28309 rtx tmp = gen_reg_rtx (V4SImode);
28310 gcc_assert (word_mode == SImode);
28311 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28312 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28313 emit_move_insn (target, gen_lowpart (mode, tmp));
28316 gcc_unreachable ();
28320 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28321 instructions unless MMX_OK is true. */
28324 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28326 enum machine_mode mode = GET_MODE (target);
28327 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28328 int n_elts = GET_MODE_NUNITS (mode);
28329 int n_var = 0, one_var = -1;
28330 bool all_same = true, all_const_zero = true;
28334 for (i = 0; i < n_elts; ++i)
28336 x = XVECEXP (vals, 0, i);
28337 if (!(CONST_INT_P (x)
28338 || GET_CODE (x) == CONST_DOUBLE
28339 || GET_CODE (x) == CONST_FIXED))
28340 n_var++, one_var = i;
28341 else if (x != CONST0_RTX (inner_mode))
28342 all_const_zero = false;
28343 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28347 /* Constants are best loaded from the constant pool. */
28350 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28354 /* If all values are identical, broadcast the value. */
28356 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28357 XVECEXP (vals, 0, 0)))
28360 /* Values where only one field is non-constant are best loaded from
28361 the pool and overwritten via move later. */
28365 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28366 XVECEXP (vals, 0, one_var),
28370 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28374 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28378 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28380 enum machine_mode mode = GET_MODE (target);
28381 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28382 enum machine_mode half_mode;
28383 bool use_vec_merge = false;
28385 static rtx (*gen_extract[6][2]) (rtx, rtx)
28387 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28388 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28389 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28390 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28391 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28392 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28394 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28396 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28397 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28398 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28399 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28400 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28401 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28411 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28412 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28414 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28416 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28417 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28423 use_vec_merge = TARGET_SSE4_1;
28431 /* For the two element vectors, we implement a VEC_CONCAT with
28432 the extraction of the other element. */
28434 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28435 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28438 op0 = val, op1 = tmp;
28440 op0 = tmp, op1 = val;
28442 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28443 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28448 use_vec_merge = TARGET_SSE4_1;
28455 use_vec_merge = true;
28459 /* tmp = target = A B C D */
28460 tmp = copy_to_reg (target);
28461 /* target = A A B B */
28462 emit_insn (gen_sse_unpcklps (target, target, target));
28463 /* target = X A B B */
28464 ix86_expand_vector_set (false, target, val, 0);
28465 /* target = A X C D */
28466 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28467 GEN_INT (1), GEN_INT (0),
28468 GEN_INT (2+4), GEN_INT (3+4)));
28472 /* tmp = target = A B C D */
28473 tmp = copy_to_reg (target);
28474 /* tmp = X B C D */
28475 ix86_expand_vector_set (false, tmp, val, 0);
28476 /* target = A B X D */
28477 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28478 GEN_INT (0), GEN_INT (1),
28479 GEN_INT (0+4), GEN_INT (3+4)));
28483 /* tmp = target = A B C D */
28484 tmp = copy_to_reg (target);
28485 /* tmp = X B C D */
28486 ix86_expand_vector_set (false, tmp, val, 0);
28487 /* target = A B X D */
28488 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28489 GEN_INT (0), GEN_INT (1),
28490 GEN_INT (2+4), GEN_INT (0+4)));
28494 gcc_unreachable ();
28499 use_vec_merge = TARGET_SSE4_1;
28503 /* Element 0 handled by vec_merge below. */
28506 use_vec_merge = true;
28512 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28513 store into element 0, then shuffle them back. */
28517 order[0] = GEN_INT (elt);
28518 order[1] = const1_rtx;
28519 order[2] = const2_rtx;
28520 order[3] = GEN_INT (3);
28521 order[elt] = const0_rtx;
28523 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28524 order[1], order[2], order[3]));
28526 ix86_expand_vector_set (false, target, val, 0);
28528 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28529 order[1], order[2], order[3]));
28533 /* For SSE1, we have to reuse the V4SF code. */
28534 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28535 gen_lowpart (SFmode, val), elt);
28540 use_vec_merge = TARGET_SSE2;
28543 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28547 use_vec_merge = TARGET_SSE4_1;
28554 half_mode = V16QImode;
28560 half_mode = V8HImode;
28566 half_mode = V4SImode;
28572 half_mode = V2DImode;
28578 half_mode = V4SFmode;
28584 half_mode = V2DFmode;
28590 /* Compute offset. */
28594 gcc_assert (i <= 1);
28596 /* Extract the half. */
28597 tmp = gen_reg_rtx (half_mode);
28598 emit_insn ((*gen_extract[j][i]) (tmp, target));
28600 /* Put val in tmp at elt. */
28601 ix86_expand_vector_set (false, tmp, val, elt);
28604 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28613 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28614 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28615 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28619 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28621 emit_move_insn (mem, target);
28623 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28624 emit_move_insn (tmp, val);
28626 emit_move_insn (target, mem);
28631 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28633 enum machine_mode mode = GET_MODE (vec);
28634 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28635 bool use_vec_extr = false;
28648 use_vec_extr = true;
28652 use_vec_extr = TARGET_SSE4_1;
28664 tmp = gen_reg_rtx (mode);
28665 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28666 GEN_INT (elt), GEN_INT (elt),
28667 GEN_INT (elt+4), GEN_INT (elt+4)));
28671 tmp = gen_reg_rtx (mode);
28672 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28676 gcc_unreachable ();
28679 use_vec_extr = true;
28684 use_vec_extr = TARGET_SSE4_1;
28698 tmp = gen_reg_rtx (mode);
28699 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28700 GEN_INT (elt), GEN_INT (elt),
28701 GEN_INT (elt), GEN_INT (elt)));
28705 tmp = gen_reg_rtx (mode);
28706 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28710 gcc_unreachable ();
28713 use_vec_extr = true;
28718 /* For SSE1, we have to reuse the V4SF code. */
28719 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28720 gen_lowpart (V4SFmode, vec), elt);
28726 use_vec_extr = TARGET_SSE2;
28729 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28733 use_vec_extr = TARGET_SSE4_1;
28737 /* ??? Could extract the appropriate HImode element and shift. */
28744 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28745 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28747 /* Let the rtl optimizers know about the zero extension performed. */
28748 if (inner_mode == QImode || inner_mode == HImode)
28750 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28751 target = gen_lowpart (SImode, target);
28754 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28758 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28760 emit_move_insn (mem, vec);
28762 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28763 emit_move_insn (target, tmp);
28767 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28768 pattern to reduce; DEST is the destination; IN is the input vector. */
28771 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28773 rtx tmp1, tmp2, tmp3;
28775 tmp1 = gen_reg_rtx (V4SFmode);
28776 tmp2 = gen_reg_rtx (V4SFmode);
28777 tmp3 = gen_reg_rtx (V4SFmode);
28779 emit_insn (gen_sse_movhlps (tmp1, in, in));
28780 emit_insn (fn (tmp2, tmp1, in));
28782 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28783 GEN_INT (1), GEN_INT (1),
28784 GEN_INT (1+4), GEN_INT (1+4)));
28785 emit_insn (fn (dest, tmp2, tmp3));
28788 /* Target hook for scalar_mode_supported_p. */
28790 ix86_scalar_mode_supported_p (enum machine_mode mode)
28792 if (DECIMAL_FLOAT_MODE_P (mode))
28794 else if (mode == TFmode)
28797 return default_scalar_mode_supported_p (mode);
28800 /* Implements target hook vector_mode_supported_p. */
28802 ix86_vector_mode_supported_p (enum machine_mode mode)
28804 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28806 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28808 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28810 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28812 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28817 /* Target hook for c_mode_for_suffix. */
28818 static enum machine_mode
28819 ix86_c_mode_for_suffix (char suffix)
28829 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28831 We do this in the new i386 backend to maintain source compatibility
28832 with the old cc0-based compiler. */
28835 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28836 tree inputs ATTRIBUTE_UNUSED,
28839 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28841 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28846 /* Implements target vector targetm.asm.encode_section_info. This
28847 is not used by netware. */
28849 static void ATTRIBUTE_UNUSED
28850 ix86_encode_section_info (tree decl, rtx rtl, int first)
28852 default_encode_section_info (decl, rtl, first);
28854 if (TREE_CODE (decl) == VAR_DECL
28855 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28856 && ix86_in_large_data_p (decl))
28857 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28860 /* Worker function for REVERSE_CONDITION. */
28863 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28865 return (mode != CCFPmode && mode != CCFPUmode
28866 ? reverse_condition (code)
28867 : reverse_condition_maybe_unordered (code));
28870 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28874 output_387_reg_move (rtx insn, rtx *operands)
28876 if (REG_P (operands[0]))
28878 if (REG_P (operands[1])
28879 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28881 if (REGNO (operands[0]) == FIRST_STACK_REG)
28882 return output_387_ffreep (operands, 0);
28883 return "fstp\t%y0";
28885 if (STACK_TOP_P (operands[0]))
28886 return "fld%Z1\t%y1";
28889 else if (MEM_P (operands[0]))
28891 gcc_assert (REG_P (operands[1]));
28892 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28893 return "fstp%Z0\t%y0";
28896 /* There is no non-popping store to memory for XFmode.
28897 So if we need one, follow the store with a load. */
28898 if (GET_MODE (operands[0]) == XFmode)
28899 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
28901 return "fst%Z0\t%y0";
28908 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28909 FP status register is set. */
28912 ix86_emit_fp_unordered_jump (rtx label)
28914 rtx reg = gen_reg_rtx (HImode);
28917 emit_insn (gen_x86_fnstsw_1 (reg));
28919 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28921 emit_insn (gen_x86_sahf_1 (reg));
28923 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28924 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28928 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28930 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28931 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28934 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28935 gen_rtx_LABEL_REF (VOIDmode, label),
28937 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28939 emit_jump_insn (temp);
28940 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28943 /* Output code to perform a log1p XFmode calculation. */
28945 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28947 rtx label1 = gen_label_rtx ();
28948 rtx label2 = gen_label_rtx ();
28950 rtx tmp = gen_reg_rtx (XFmode);
28951 rtx tmp2 = gen_reg_rtx (XFmode);
28954 emit_insn (gen_absxf2 (tmp, op1));
28955 test = gen_rtx_GE (VOIDmode, tmp,
28956 CONST_DOUBLE_FROM_REAL_VALUE (
28957 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28959 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
28961 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28962 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28963 emit_jump (label2);
28965 emit_label (label1);
28966 emit_move_insn (tmp, CONST1_RTX (XFmode));
28967 emit_insn (gen_addxf3 (tmp, op1, tmp));
28968 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28969 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28971 emit_label (label2);
28974 /* Output code to perform a Newton-Rhapson approximation of a single precision
28975 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28977 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28979 rtx x0, x1, e0, e1, two;
28981 x0 = gen_reg_rtx (mode);
28982 e0 = gen_reg_rtx (mode);
28983 e1 = gen_reg_rtx (mode);
28984 x1 = gen_reg_rtx (mode);
28986 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28988 if (VECTOR_MODE_P (mode))
28989 two = ix86_build_const_vector (SFmode, true, two);
28991 two = force_reg (mode, two);
28993 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28995 /* x0 = rcp(b) estimate */
28996 emit_insn (gen_rtx_SET (VOIDmode, x0,
28997 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29000 emit_insn (gen_rtx_SET (VOIDmode, e0,
29001 gen_rtx_MULT (mode, x0, b)));
29003 emit_insn (gen_rtx_SET (VOIDmode, e1,
29004 gen_rtx_MINUS (mode, two, e0)));
29006 emit_insn (gen_rtx_SET (VOIDmode, x1,
29007 gen_rtx_MULT (mode, x0, e1)));
29009 emit_insn (gen_rtx_SET (VOIDmode, res,
29010 gen_rtx_MULT (mode, a, x1)));
29013 /* Output code to perform a Newton-Rhapson approximation of a
29014 single precision floating point [reciprocal] square root. */
29016 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29019 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29022 x0 = gen_reg_rtx (mode);
29023 e0 = gen_reg_rtx (mode);
29024 e1 = gen_reg_rtx (mode);
29025 e2 = gen_reg_rtx (mode);
29026 e3 = gen_reg_rtx (mode);
29028 real_from_integer (&r, VOIDmode, -3, -1, 0);
29029 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29031 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29032 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29034 if (VECTOR_MODE_P (mode))
29036 mthree = ix86_build_const_vector (SFmode, true, mthree);
29037 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29040 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29041 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29043 /* x0 = rsqrt(a) estimate */
29044 emit_insn (gen_rtx_SET (VOIDmode, x0,
29045 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29048 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29053 zero = gen_reg_rtx (mode);
29054 mask = gen_reg_rtx (mode);
29056 zero = force_reg (mode, CONST0_RTX(mode));
29057 emit_insn (gen_rtx_SET (VOIDmode, mask,
29058 gen_rtx_NE (mode, zero, a)));
29060 emit_insn (gen_rtx_SET (VOIDmode, x0,
29061 gen_rtx_AND (mode, x0, mask)));
29065 emit_insn (gen_rtx_SET (VOIDmode, e0,
29066 gen_rtx_MULT (mode, x0, a)));
29068 emit_insn (gen_rtx_SET (VOIDmode, e1,
29069 gen_rtx_MULT (mode, e0, x0)));
29072 mthree = force_reg (mode, mthree);
29073 emit_insn (gen_rtx_SET (VOIDmode, e2,
29074 gen_rtx_PLUS (mode, e1, mthree)));
29076 mhalf = force_reg (mode, mhalf);
29078 /* e3 = -.5 * x0 */
29079 emit_insn (gen_rtx_SET (VOIDmode, e3,
29080 gen_rtx_MULT (mode, x0, mhalf)));
29082 /* e3 = -.5 * e0 */
29083 emit_insn (gen_rtx_SET (VOIDmode, e3,
29084 gen_rtx_MULT (mode, e0, mhalf)));
29085 /* ret = e2 * e3 */
29086 emit_insn (gen_rtx_SET (VOIDmode, res,
29087 gen_rtx_MULT (mode, e2, e3)));
29090 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29092 static void ATTRIBUTE_UNUSED
29093 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29096 /* With Binutils 2.15, the "@unwind" marker must be specified on
29097 every occurrence of the ".eh_frame" section, not just the first
29100 && strcmp (name, ".eh_frame") == 0)
29102 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29103 flags & SECTION_WRITE ? "aw" : "a");
29106 default_elf_asm_named_section (name, flags, decl);
29109 /* Return the mangling of TYPE if it is an extended fundamental type. */
29111 static const char *
29112 ix86_mangle_type (const_tree type)
29114 type = TYPE_MAIN_VARIANT (type);
29116 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29117 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29120 switch (TYPE_MODE (type))
29123 /* __float128 is "g". */
29126 /* "long double" or __float80 is "e". */
29133 /* For 32-bit code we can save PIC register setup by using
29134 __stack_chk_fail_local hidden function instead of calling
29135 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29136 register, so it is better to call __stack_chk_fail directly. */
29139 ix86_stack_protect_fail (void)
29141 return TARGET_64BIT
29142 ? default_external_stack_protect_fail ()
29143 : default_hidden_stack_protect_fail ();
29146 /* Select a format to encode pointers in exception handling data. CODE
29147 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29148 true if the symbol may be affected by dynamic relocations.
29150 ??? All x86 object file formats are capable of representing this.
29151 After all, the relocation needed is the same as for the call insn.
29152 Whether or not a particular assembler allows us to enter such, I
29153 guess we'll have to see. */
29155 asm_preferred_eh_data_format (int code, int global)
29159 int type = DW_EH_PE_sdata8;
29161 || ix86_cmodel == CM_SMALL_PIC
29162 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29163 type = DW_EH_PE_sdata4;
29164 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29166 if (ix86_cmodel == CM_SMALL
29167 || (ix86_cmodel == CM_MEDIUM && code))
29168 return DW_EH_PE_udata4;
29169 return DW_EH_PE_absptr;
29172 /* Expand copysign from SIGN to the positive value ABS_VALUE
29173 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29176 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29178 enum machine_mode mode = GET_MODE (sign);
29179 rtx sgn = gen_reg_rtx (mode);
29180 if (mask == NULL_RTX)
29182 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29183 if (!VECTOR_MODE_P (mode))
29185 /* We need to generate a scalar mode mask in this case. */
29186 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29187 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29188 mask = gen_reg_rtx (mode);
29189 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29193 mask = gen_rtx_NOT (mode, mask);
29194 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29195 gen_rtx_AND (mode, mask, sign)));
29196 emit_insn (gen_rtx_SET (VOIDmode, result,
29197 gen_rtx_IOR (mode, abs_value, sgn)));
29200 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29201 mask for masking out the sign-bit is stored in *SMASK, if that is
29204 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29206 enum machine_mode mode = GET_MODE (op0);
29209 xa = gen_reg_rtx (mode);
29210 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29211 if (!VECTOR_MODE_P (mode))
29213 /* We need to generate a scalar mode mask in this case. */
29214 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29215 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29216 mask = gen_reg_rtx (mode);
29217 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29219 emit_insn (gen_rtx_SET (VOIDmode, xa,
29220 gen_rtx_AND (mode, op0, mask)));
29228 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29229 swapping the operands if SWAP_OPERANDS is true. The expanded
29230 code is a forward jump to a newly created label in case the
29231 comparison is true. The generated label rtx is returned. */
29233 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29234 bool swap_operands)
29245 label = gen_label_rtx ();
29246 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29247 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29248 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29249 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29250 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29251 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29252 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29253 JUMP_LABEL (tmp) = label;
29258 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29259 using comparison code CODE. Operands are swapped for the comparison if
29260 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29262 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29263 bool swap_operands)
29265 enum machine_mode mode = GET_MODE (op0);
29266 rtx mask = gen_reg_rtx (mode);
29275 if (mode == DFmode)
29276 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29277 gen_rtx_fmt_ee (code, mode, op0, op1)));
29279 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29280 gen_rtx_fmt_ee (code, mode, op0, op1)));
29285 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29286 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29288 ix86_gen_TWO52 (enum machine_mode mode)
29290 REAL_VALUE_TYPE TWO52r;
29293 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29294 TWO52 = const_double_from_real_value (TWO52r, mode);
29295 TWO52 = force_reg (mode, TWO52);
29300 /* Expand SSE sequence for computing lround from OP1 storing
29303 ix86_expand_lround (rtx op0, rtx op1)
29305 /* C code for the stuff we're doing below:
29306 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29309 enum machine_mode mode = GET_MODE (op1);
29310 const struct real_format *fmt;
29311 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29314 /* load nextafter (0.5, 0.0) */
29315 fmt = REAL_MODE_FORMAT (mode);
29316 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29317 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29319 /* adj = copysign (0.5, op1) */
29320 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29321 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29323 /* adj = op1 + adj */
29324 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29326 /* op0 = (imode)adj */
29327 expand_fix (op0, adj, 0);
29330 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29333 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29335 /* C code for the stuff we're doing below (for do_floor):
29337 xi -= (double)xi > op1 ? 1 : 0;
29340 enum machine_mode fmode = GET_MODE (op1);
29341 enum machine_mode imode = GET_MODE (op0);
29342 rtx ireg, freg, label, tmp;
29344 /* reg = (long)op1 */
29345 ireg = gen_reg_rtx (imode);
29346 expand_fix (ireg, op1, 0);
29348 /* freg = (double)reg */
29349 freg = gen_reg_rtx (fmode);
29350 expand_float (freg, ireg, 0);
29352 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29353 label = ix86_expand_sse_compare_and_jump (UNLE,
29354 freg, op1, !do_floor);
29355 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29356 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29357 emit_move_insn (ireg, tmp);
29359 emit_label (label);
29360 LABEL_NUSES (label) = 1;
29362 emit_move_insn (op0, ireg);
29365 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29366 result in OPERAND0. */
29368 ix86_expand_rint (rtx operand0, rtx operand1)
29370 /* C code for the stuff we're doing below:
29371 xa = fabs (operand1);
29372 if (!isless (xa, 2**52))
29374 xa = xa + 2**52 - 2**52;
29375 return copysign (xa, operand1);
29377 enum machine_mode mode = GET_MODE (operand0);
29378 rtx res, xa, label, TWO52, mask;
29380 res = gen_reg_rtx (mode);
29381 emit_move_insn (res, operand1);
29383 /* xa = abs (operand1) */
29384 xa = ix86_expand_sse_fabs (res, &mask);
29386 /* if (!isless (xa, TWO52)) goto label; */
29387 TWO52 = ix86_gen_TWO52 (mode);
29388 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29390 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29391 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29393 ix86_sse_copysign_to_positive (res, xa, res, mask);
29395 emit_label (label);
29396 LABEL_NUSES (label) = 1;
29398 emit_move_insn (operand0, res);
29401 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29404 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29406 /* C code for the stuff we expand below.
29407 double xa = fabs (x), x2;
29408 if (!isless (xa, TWO52))
29410 xa = xa + TWO52 - TWO52;
29411 x2 = copysign (xa, x);
29420 enum machine_mode mode = GET_MODE (operand0);
29421 rtx xa, TWO52, tmp, label, one, res, mask;
29423 TWO52 = ix86_gen_TWO52 (mode);
29425 /* Temporary for holding the result, initialized to the input
29426 operand to ease control flow. */
29427 res = gen_reg_rtx (mode);
29428 emit_move_insn (res, operand1);
29430 /* xa = abs (operand1) */
29431 xa = ix86_expand_sse_fabs (res, &mask);
29433 /* if (!isless (xa, TWO52)) goto label; */
29434 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29436 /* xa = xa + TWO52 - TWO52; */
29437 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29438 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29440 /* xa = copysign (xa, operand1) */
29441 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29443 /* generate 1.0 or -1.0 */
29444 one = force_reg (mode,
29445 const_double_from_real_value (do_floor
29446 ? dconst1 : dconstm1, mode));
29448 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29449 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29450 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29451 gen_rtx_AND (mode, one, tmp)));
29452 /* We always need to subtract here to preserve signed zero. */
29453 tmp = expand_simple_binop (mode, MINUS,
29454 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29455 emit_move_insn (res, tmp);
29457 emit_label (label);
29458 LABEL_NUSES (label) = 1;
29460 emit_move_insn (operand0, res);
29463 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29466 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29468 /* C code for the stuff we expand below.
29469 double xa = fabs (x), x2;
29470 if (!isless (xa, TWO52))
29472 x2 = (double)(long)x;
29479 if (HONOR_SIGNED_ZEROS (mode))
29480 return copysign (x2, x);
29483 enum machine_mode mode = GET_MODE (operand0);
29484 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29486 TWO52 = ix86_gen_TWO52 (mode);
29488 /* Temporary for holding the result, initialized to the input
29489 operand to ease control flow. */
29490 res = gen_reg_rtx (mode);
29491 emit_move_insn (res, operand1);
29493 /* xa = abs (operand1) */
29494 xa = ix86_expand_sse_fabs (res, &mask);
29496 /* if (!isless (xa, TWO52)) goto label; */
29497 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29499 /* xa = (double)(long)x */
29500 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29501 expand_fix (xi, res, 0);
29502 expand_float (xa, xi, 0);
29505 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29507 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29508 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29509 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29510 gen_rtx_AND (mode, one, tmp)));
29511 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29512 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29513 emit_move_insn (res, tmp);
29515 if (HONOR_SIGNED_ZEROS (mode))
29516 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29518 emit_label (label);
29519 LABEL_NUSES (label) = 1;
29521 emit_move_insn (operand0, res);
29524 /* Expand SSE sequence for computing round from OPERAND1 storing
29525 into OPERAND0. Sequence that works without relying on DImode truncation
29526 via cvttsd2siq that is only available on 64bit targets. */
29528 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29530 /* C code for the stuff we expand below.
29531 double xa = fabs (x), xa2, x2;
29532 if (!isless (xa, TWO52))
29534 Using the absolute value and copying back sign makes
29535 -0.0 -> -0.0 correct.
29536 xa2 = xa + TWO52 - TWO52;
29541 else if (dxa > 0.5)
29543 x2 = copysign (xa2, x);
29546 enum machine_mode mode = GET_MODE (operand0);
29547 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29549 TWO52 = ix86_gen_TWO52 (mode);
29551 /* Temporary for holding the result, initialized to the input
29552 operand to ease control flow. */
29553 res = gen_reg_rtx (mode);
29554 emit_move_insn (res, operand1);
29556 /* xa = abs (operand1) */
29557 xa = ix86_expand_sse_fabs (res, &mask);
29559 /* if (!isless (xa, TWO52)) goto label; */
29560 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29562 /* xa2 = xa + TWO52 - TWO52; */
29563 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29564 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29566 /* dxa = xa2 - xa; */
29567 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29569 /* generate 0.5, 1.0 and -0.5 */
29570 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29571 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29572 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29576 tmp = gen_reg_rtx (mode);
29577 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29578 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29579 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29580 gen_rtx_AND (mode, one, tmp)));
29581 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29582 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29583 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29584 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29585 gen_rtx_AND (mode, one, tmp)));
29586 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29588 /* res = copysign (xa2, operand1) */
29589 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29591 emit_label (label);
29592 LABEL_NUSES (label) = 1;
29594 emit_move_insn (operand0, res);
29597 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29600 ix86_expand_trunc (rtx operand0, rtx operand1)
29602 /* C code for SSE variant we expand below.
29603 double xa = fabs (x), x2;
29604 if (!isless (xa, TWO52))
29606 x2 = (double)(long)x;
29607 if (HONOR_SIGNED_ZEROS (mode))
29608 return copysign (x2, x);
29611 enum machine_mode mode = GET_MODE (operand0);
29612 rtx xa, xi, TWO52, label, res, mask;
29614 TWO52 = ix86_gen_TWO52 (mode);
29616 /* Temporary for holding the result, initialized to the input
29617 operand to ease control flow. */
29618 res = gen_reg_rtx (mode);
29619 emit_move_insn (res, operand1);
29621 /* xa = abs (operand1) */
29622 xa = ix86_expand_sse_fabs (res, &mask);
29624 /* if (!isless (xa, TWO52)) goto label; */
29625 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29627 /* x = (double)(long)x */
29628 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29629 expand_fix (xi, res, 0);
29630 expand_float (res, xi, 0);
29632 if (HONOR_SIGNED_ZEROS (mode))
29633 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29635 emit_label (label);
29636 LABEL_NUSES (label) = 1;
29638 emit_move_insn (operand0, res);
29641 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29644 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29646 enum machine_mode mode = GET_MODE (operand0);
29647 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29649 /* C code for SSE variant we expand below.
29650 double xa = fabs (x), x2;
29651 if (!isless (xa, TWO52))
29653 xa2 = xa + TWO52 - TWO52;
29657 x2 = copysign (xa2, x);
29661 TWO52 = ix86_gen_TWO52 (mode);
29663 /* Temporary for holding the result, initialized to the input
29664 operand to ease control flow. */
29665 res = gen_reg_rtx (mode);
29666 emit_move_insn (res, operand1);
29668 /* xa = abs (operand1) */
29669 xa = ix86_expand_sse_fabs (res, &smask);
29671 /* if (!isless (xa, TWO52)) goto label; */
29672 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29674 /* res = xa + TWO52 - TWO52; */
29675 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29676 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29677 emit_move_insn (res, tmp);
29680 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29682 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29683 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29684 emit_insn (gen_rtx_SET (VOIDmode, mask,
29685 gen_rtx_AND (mode, mask, one)));
29686 tmp = expand_simple_binop (mode, MINUS,
29687 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29688 emit_move_insn (res, tmp);
29690 /* res = copysign (res, operand1) */
29691 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29693 emit_label (label);
29694 LABEL_NUSES (label) = 1;
29696 emit_move_insn (operand0, res);
29699 /* Expand SSE sequence for computing round from OPERAND1 storing
29702 ix86_expand_round (rtx operand0, rtx operand1)
29704 /* C code for the stuff we're doing below:
29705 double xa = fabs (x);
29706 if (!isless (xa, TWO52))
29708 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29709 return copysign (xa, x);
29711 enum machine_mode mode = GET_MODE (operand0);
29712 rtx res, TWO52, xa, label, xi, half, mask;
29713 const struct real_format *fmt;
29714 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29716 /* Temporary for holding the result, initialized to the input
29717 operand to ease control flow. */
29718 res = gen_reg_rtx (mode);
29719 emit_move_insn (res, operand1);
29721 TWO52 = ix86_gen_TWO52 (mode);
29722 xa = ix86_expand_sse_fabs (res, &mask);
29723 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29725 /* load nextafter (0.5, 0.0) */
29726 fmt = REAL_MODE_FORMAT (mode);
29727 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29728 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29730 /* xa = xa + 0.5 */
29731 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29732 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29734 /* xa = (double)(int64_t)xa */
29735 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29736 expand_fix (xi, xa, 0);
29737 expand_float (xa, xi, 0);
29739 /* res = copysign (xa, operand1) */
29740 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29742 emit_label (label);
29743 LABEL_NUSES (label) = 1;
29745 emit_move_insn (operand0, res);
29749 /* Validate whether a SSE5 instruction is valid or not.
29750 OPERANDS is the array of operands.
29751 NUM is the number of operands.
29752 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29753 NUM_MEMORY is the maximum number of memory operands to accept.
29754 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29757 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29758 bool uses_oc0, int num_memory, bool commutative)
29764 /* Count the number of memory arguments */
29767 for (i = 0; i < num; i++)
29769 enum machine_mode mode = GET_MODE (operands[i]);
29770 if (register_operand (operands[i], mode))
29773 else if (memory_operand (operands[i], mode))
29775 mem_mask |= (1 << i);
29781 rtx pattern = PATTERN (insn);
29783 /* allow 0 for pcmov */
29784 if (GET_CODE (pattern) != SET
29785 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29787 || operands[i] != CONST0_RTX (mode))
29792 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29793 a memory operation. */
29794 if (num_memory < 0)
29796 num_memory = -num_memory;
29797 if ((mem_mask & (1 << (num-1))) != 0)
29799 mem_mask &= ~(1 << (num-1));
29804 /* If there were no memory operations, allow the insn */
29808 /* Do not allow the destination register to be a memory operand. */
29809 else if (mem_mask & (1 << 0))
29812 /* If there are too many memory operations, disallow the instruction. While
29813 the hardware only allows 1 memory reference, before register allocation
29814 for some insns, we allow two memory operations sometimes in order to allow
29815 code like the following to be optimized:
29817 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29819 or similar cases that are vectorized into using the fmaddss
29821 else if (mem_count > num_memory)
29824 /* Don't allow more than one memory operation if not optimizing. */
29825 else if (mem_count > 1 && !optimize)
29828 else if (num == 4 && mem_count == 1)
29830 /* formats (destination is the first argument), example fmaddss:
29831 xmm1, xmm1, xmm2, xmm3/mem
29832 xmm1, xmm1, xmm2/mem, xmm3
29833 xmm1, xmm2, xmm3/mem, xmm1
29834 xmm1, xmm2/mem, xmm3, xmm1 */
29836 return ((mem_mask == (1 << 1))
29837 || (mem_mask == (1 << 2))
29838 || (mem_mask == (1 << 3)));
29840 /* format, example pmacsdd:
29841 xmm1, xmm2, xmm3/mem, xmm1 */
29843 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29845 return (mem_mask == (1 << 2));
29848 else if (num == 4 && num_memory == 2)
29850 /* If there are two memory operations, we can load one of the memory ops
29851 into the destination register. This is for optimizing the
29852 multiply/add ops, which the combiner has optimized both the multiply
29853 and the add insns to have a memory operation. We have to be careful
29854 that the destination doesn't overlap with the inputs. */
29855 rtx op0 = operands[0];
29857 if (reg_mentioned_p (op0, operands[1])
29858 || reg_mentioned_p (op0, operands[2])
29859 || reg_mentioned_p (op0, operands[3]))
29862 /* formats (destination is the first argument), example fmaddss:
29863 xmm1, xmm1, xmm2, xmm3/mem
29864 xmm1, xmm1, xmm2/mem, xmm3
29865 xmm1, xmm2, xmm3/mem, xmm1
29866 xmm1, xmm2/mem, xmm3, xmm1
29868 For the oc0 case, we will load either operands[1] or operands[3] into
29869 operands[0], so any combination of 2 memory operands is ok. */
29873 /* format, example pmacsdd:
29874 xmm1, xmm2, xmm3/mem, xmm1
29876 For the integer multiply/add instructions be more restrictive and
29877 require operands[2] and operands[3] to be the memory operands. */
29879 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29881 return (mem_mask == ((1 << 2) | (1 << 3)));
29884 else if (num == 3 && num_memory == 1)
29886 /* formats, example protb:
29887 xmm1, xmm2, xmm3/mem
29888 xmm1, xmm2/mem, xmm3 */
29890 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29892 /* format, example comeq:
29893 xmm1, xmm2, xmm3/mem */
29895 return (mem_mask == (1 << 2));
29899 gcc_unreachable ();
29905 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29906 hardware will allow by using the destination register to load one of the
29907 memory operations. Presently this is used by the multiply/add routines to
29908 allow 2 memory references. */
29911 ix86_expand_sse5_multiple_memory (rtx operands[],
29913 enum machine_mode mode)
29915 rtx op0 = operands[0];
29917 || memory_operand (op0, mode)
29918 || reg_mentioned_p (op0, operands[1])
29919 || reg_mentioned_p (op0, operands[2])
29920 || reg_mentioned_p (op0, operands[3]))
29921 gcc_unreachable ();
29923 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29924 the destination register. */
29925 if (memory_operand (operands[1], mode))
29927 emit_move_insn (op0, operands[1]);
29930 else if (memory_operand (operands[3], mode))
29932 emit_move_insn (op0, operands[3]);
29936 gcc_unreachable ();
29942 /* Table of valid machine attributes. */
29943 static const struct attribute_spec ix86_attribute_table[] =
29945 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29946 /* Stdcall attribute says callee is responsible for popping arguments
29947 if they are not variable. */
29948 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29949 /* Fastcall attribute says callee is responsible for popping arguments
29950 if they are not variable. */
29951 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29952 /* Cdecl attribute says the callee is a normal C declaration */
29953 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29954 /* Regparm attribute specifies how many integer arguments are to be
29955 passed in registers. */
29956 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29957 /* Sseregparm attribute says we are using x86_64 calling conventions
29958 for FP arguments. */
29959 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29960 /* force_align_arg_pointer says this function realigns the stack at entry. */
29961 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29962 false, true, true, ix86_handle_cconv_attribute },
29963 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29964 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29965 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29966 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29968 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29969 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29970 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29971 SUBTARGET_ATTRIBUTE_TABLE,
29973 /* ms_abi and sysv_abi calling convention function attributes. */
29974 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29975 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29977 { NULL, 0, 0, false, false, false, NULL }
29980 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29982 x86_builtin_vectorization_cost (bool runtime_test)
29984 /* If the branch of the runtime test is taken - i.e. - the vectorized
29985 version is skipped - this incurs a misprediction cost (because the
29986 vectorized version is expected to be the fall-through). So we subtract
29987 the latency of a mispredicted branch from the costs that are incured
29988 when the vectorized version is executed.
29990 TODO: The values in individual target tables have to be tuned or new
29991 fields may be needed. For eg. on K8, the default branch path is the
29992 not-taken path. If the taken path is predicted correctly, the minimum
29993 penalty of going down the taken-path is 1 cycle. If the taken-path is
29994 not predicted correctly, then the minimum penalty is 10 cycles. */
29998 return (-(ix86_cost->cond_taken_branch_cost));
30004 /* This function returns the calling abi specific va_list type node.
30005 It returns the FNDECL specific va_list type. */
30008 ix86_fn_abi_va_list (tree fndecl)
30011 return va_list_type_node;
30012 gcc_assert (fndecl != NULL_TREE);
30014 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30015 return ms_va_list_type_node;
30017 return sysv_va_list_type_node;
30020 /* Returns the canonical va_list type specified by TYPE. If there
30021 is no valid TYPE provided, it return NULL_TREE. */
30024 ix86_canonical_va_list_type (tree type)
30028 /* Resolve references and pointers to va_list type. */
30029 if (INDIRECT_REF_P (type))
30030 type = TREE_TYPE (type);
30031 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30032 type = TREE_TYPE (type);
30036 wtype = va_list_type_node;
30037 gcc_assert (wtype != NULL_TREE);
30039 if (TREE_CODE (wtype) == ARRAY_TYPE)
30041 /* If va_list is an array type, the argument may have decayed
30042 to a pointer type, e.g. by being passed to another function.
30043 In that case, unwrap both types so that we can compare the
30044 underlying records. */
30045 if (TREE_CODE (htype) == ARRAY_TYPE
30046 || POINTER_TYPE_P (htype))
30048 wtype = TREE_TYPE (wtype);
30049 htype = TREE_TYPE (htype);
30052 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30053 return va_list_type_node;
30054 wtype = sysv_va_list_type_node;
30055 gcc_assert (wtype != NULL_TREE);
30057 if (TREE_CODE (wtype) == ARRAY_TYPE)
30059 /* If va_list is an array type, the argument may have decayed
30060 to a pointer type, e.g. by being passed to another function.
30061 In that case, unwrap both types so that we can compare the
30062 underlying records. */
30063 if (TREE_CODE (htype) == ARRAY_TYPE
30064 || POINTER_TYPE_P (htype))
30066 wtype = TREE_TYPE (wtype);
30067 htype = TREE_TYPE (htype);
30070 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30071 return sysv_va_list_type_node;
30072 wtype = ms_va_list_type_node;
30073 gcc_assert (wtype != NULL_TREE);
30075 if (TREE_CODE (wtype) == ARRAY_TYPE)
30077 /* If va_list is an array type, the argument may have decayed
30078 to a pointer type, e.g. by being passed to another function.
30079 In that case, unwrap both types so that we can compare the
30080 underlying records. */
30081 if (TREE_CODE (htype) == ARRAY_TYPE
30082 || POINTER_TYPE_P (htype))
30084 wtype = TREE_TYPE (wtype);
30085 htype = TREE_TYPE (htype);
30088 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30089 return ms_va_list_type_node;
30092 return std_canonical_va_list_type (type);
30095 /* Iterate through the target-specific builtin types for va_list.
30096 IDX denotes the iterator, *PTREE is set to the result type of
30097 the va_list builtin, and *PNAME to its internal type.
30098 Returns zero if there is no element for this index, otherwise
30099 IDX should be increased upon the next call.
30100 Note, do not iterate a base builtin's name like __builtin_va_list.
30101 Used from c_common_nodes_and_builtins. */
30104 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30110 *ptree = ms_va_list_type_node;
30111 *pname = "__builtin_ms_va_list";
30114 *ptree = sysv_va_list_type_node;
30115 *pname = "__builtin_sysv_va_list";
30123 /* Initialize the GCC target structure. */
30124 #undef TARGET_RETURN_IN_MEMORY
30125 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30127 #undef TARGET_LEGITIMIZE_ADDRESS
30128 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30130 #undef TARGET_ATTRIBUTE_TABLE
30131 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30132 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30133 # undef TARGET_MERGE_DECL_ATTRIBUTES
30134 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30137 #undef TARGET_COMP_TYPE_ATTRIBUTES
30138 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30140 #undef TARGET_INIT_BUILTINS
30141 #define TARGET_INIT_BUILTINS ix86_init_builtins
30142 #undef TARGET_EXPAND_BUILTIN
30143 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30145 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30146 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30147 ix86_builtin_vectorized_function
30149 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30150 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30152 #undef TARGET_BUILTIN_RECIPROCAL
30153 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30155 #undef TARGET_ASM_FUNCTION_EPILOGUE
30156 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30158 #undef TARGET_ENCODE_SECTION_INFO
30159 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30160 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30162 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30165 #undef TARGET_ASM_OPEN_PAREN
30166 #define TARGET_ASM_OPEN_PAREN ""
30167 #undef TARGET_ASM_CLOSE_PAREN
30168 #define TARGET_ASM_CLOSE_PAREN ""
30170 #undef TARGET_ASM_ALIGNED_HI_OP
30171 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30172 #undef TARGET_ASM_ALIGNED_SI_OP
30173 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30175 #undef TARGET_ASM_ALIGNED_DI_OP
30176 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30179 #undef TARGET_ASM_UNALIGNED_HI_OP
30180 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30181 #undef TARGET_ASM_UNALIGNED_SI_OP
30182 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30183 #undef TARGET_ASM_UNALIGNED_DI_OP
30184 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30186 #undef TARGET_SCHED_ADJUST_COST
30187 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30188 #undef TARGET_SCHED_ISSUE_RATE
30189 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30190 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30191 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30192 ia32_multipass_dfa_lookahead
30194 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30195 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30198 #undef TARGET_HAVE_TLS
30199 #define TARGET_HAVE_TLS true
30201 #undef TARGET_CANNOT_FORCE_CONST_MEM
30202 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30203 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30204 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30206 #undef TARGET_DELEGITIMIZE_ADDRESS
30207 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30209 #undef TARGET_MS_BITFIELD_LAYOUT_P
30210 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30213 #undef TARGET_BINDS_LOCAL_P
30214 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30216 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30217 #undef TARGET_BINDS_LOCAL_P
30218 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30221 #undef TARGET_ASM_OUTPUT_MI_THUNK
30222 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30223 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30224 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30226 #undef TARGET_ASM_FILE_START
30227 #define TARGET_ASM_FILE_START x86_file_start
30229 #undef TARGET_DEFAULT_TARGET_FLAGS
30230 #define TARGET_DEFAULT_TARGET_FLAGS \
30232 | TARGET_SUBTARGET_DEFAULT \
30233 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30235 #undef TARGET_HANDLE_OPTION
30236 #define TARGET_HANDLE_OPTION ix86_handle_option
30238 #undef TARGET_RTX_COSTS
30239 #define TARGET_RTX_COSTS ix86_rtx_costs
30240 #undef TARGET_ADDRESS_COST
30241 #define TARGET_ADDRESS_COST ix86_address_cost
30243 #undef TARGET_FIXED_CONDITION_CODE_REGS
30244 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30245 #undef TARGET_CC_MODES_COMPATIBLE
30246 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30248 #undef TARGET_MACHINE_DEPENDENT_REORG
30249 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30251 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30252 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30254 #undef TARGET_BUILD_BUILTIN_VA_LIST
30255 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30257 #undef TARGET_FN_ABI_VA_LIST
30258 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30260 #undef TARGET_CANONICAL_VA_LIST_TYPE
30261 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30263 #undef TARGET_EXPAND_BUILTIN_VA_START
30264 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30266 #undef TARGET_MD_ASM_CLOBBERS
30267 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30269 #undef TARGET_PROMOTE_PROTOTYPES
30270 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30271 #undef TARGET_STRUCT_VALUE_RTX
30272 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30273 #undef TARGET_SETUP_INCOMING_VARARGS
30274 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30275 #undef TARGET_MUST_PASS_IN_STACK
30276 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30277 #undef TARGET_PASS_BY_REFERENCE
30278 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30279 #undef TARGET_INTERNAL_ARG_POINTER
30280 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30281 #undef TARGET_UPDATE_STACK_BOUNDARY
30282 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30283 #undef TARGET_GET_DRAP_RTX
30284 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30285 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
30286 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
30287 #undef TARGET_STRICT_ARGUMENT_NAMING
30288 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30290 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30291 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30293 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30294 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30296 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30297 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30299 #undef TARGET_C_MODE_FOR_SUFFIX
30300 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30303 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30304 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30307 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30308 #undef TARGET_INSERT_ATTRIBUTES
30309 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30312 #undef TARGET_MANGLE_TYPE
30313 #define TARGET_MANGLE_TYPE ix86_mangle_type
30315 #undef TARGET_STACK_PROTECT_FAIL
30316 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30318 #undef TARGET_FUNCTION_VALUE
30319 #define TARGET_FUNCTION_VALUE ix86_function_value
30321 #undef TARGET_SECONDARY_RELOAD
30322 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30324 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30325 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30327 #undef TARGET_SET_CURRENT_FUNCTION
30328 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30330 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30331 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30333 #undef TARGET_OPTION_SAVE
30334 #define TARGET_OPTION_SAVE ix86_function_specific_save
30336 #undef TARGET_OPTION_RESTORE
30337 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30339 #undef TARGET_OPTION_PRINT
30340 #define TARGET_OPTION_PRINT ix86_function_specific_print
30342 #undef TARGET_OPTION_CAN_INLINE_P
30343 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30345 #undef TARGET_EXPAND_TO_RTL_HOOK
30346 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30348 #undef TARGET_LEGITIMATE_ADDRESS_P
30349 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30351 struct gcc_target targetm = TARGET_INITIALIZER;
30353 #include "gt-i386.h"