1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1761 HOST_WIDE_INT frame;
1763 int outgoing_arguments_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1968 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1969 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1970 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 /* Define a set of ISAs which aren't available when a given ISA is
1973 disabled. MMX and SSE ISAs are handled separately. */
1975 #define OPTION_MASK_ISA_MMX_UNSET \
1976 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1977 #define OPTION_MASK_ISA_3DNOW_UNSET \
1978 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1979 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1981 #define OPTION_MASK_ISA_SSE_UNSET \
1982 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1983 #define OPTION_MASK_ISA_SSE2_UNSET \
1984 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1985 #define OPTION_MASK_ISA_SSE3_UNSET \
1986 (OPTION_MASK_ISA_SSE3 \
1987 | OPTION_MASK_ISA_SSSE3_UNSET \
1988 | OPTION_MASK_ISA_SSE4A_UNSET )
1989 #define OPTION_MASK_ISA_SSSE3_UNSET \
1990 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1991 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1992 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1993 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1994 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1995 #define OPTION_MASK_ISA_AVX_UNSET \
1996 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1997 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1999 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2001 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2003 #define OPTION_MASK_ISA_SSE4A_UNSET \
2004 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2005 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2006 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2007 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2008 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2009 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2010 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2011 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2013 /* Vectorization library interface and handlers. */
2014 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2015 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2016 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2018 /* Processor target table, indexed by processor number */
2021 const struct processor_costs *cost; /* Processor costs */
2022 const int align_loop; /* Default alignments. */
2023 const int align_loop_max_skip;
2024 const int align_jump;
2025 const int align_jump_max_skip;
2026 const int align_func;
2029 static const struct ptt processor_target_table[PROCESSOR_max] =
2031 {&i386_cost, 4, 3, 4, 3, 4},
2032 {&i486_cost, 16, 15, 16, 15, 16},
2033 {&pentium_cost, 16, 7, 16, 7, 16},
2034 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2035 {&geode_cost, 0, 0, 0, 0, 0},
2036 {&k6_cost, 32, 7, 32, 7, 32},
2037 {&athlon_cost, 16, 7, 16, 7, 16},
2038 {&pentium4_cost, 0, 0, 0, 0, 0},
2039 {&k8_cost, 16, 7, 16, 7, 16},
2040 {&nocona_cost, 0, 0, 0, 0, 0},
2041 {&core2_cost, 16, 10, 16, 10, 16},
2042 {&generic32_cost, 16, 7, 16, 7, 16},
2043 {&generic64_cost, 16, 10, 16, 10, 16},
2044 {&amdfam10_cost, 32, 24, 32, 7, 32},
2045 {&atom_cost, 16, 7, 16, 7, 16}
2048 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2074 /* Implement TARGET_HANDLE_OPTION. */
2077 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2084 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2085 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2089 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2090 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2097 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2098 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2102 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2103 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2113 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2114 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2118 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2126 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2127 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2131 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2132 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2139 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2140 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2144 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2145 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2152 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2153 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2157 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2158 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2165 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2166 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2170 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2171 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2178 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2179 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2183 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2184 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2191 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2192 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2196 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2197 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2204 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2205 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2209 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2210 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2215 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2216 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2220 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2227 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2228 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2232 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2240 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2241 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2245 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2246 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2253 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2254 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2258 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2259 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2266 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2267 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2271 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2272 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2279 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2280 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2284 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2285 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2292 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2293 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2297 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2298 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2305 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2306 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2310 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2311 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2318 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2319 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2323 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2324 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2333 /* Return a string the documents the current -m options. The caller is
2334 responsible for freeing the string. */
2337 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2338 const char *fpmath, bool add_nl_p)
2340 struct ix86_target_opts
2342 const char *option; /* option string */
2343 int mask; /* isa mask options */
2346 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2347 preceding options while match those first. */
2348 static struct ix86_target_opts isa_opts[] =
2350 { "-m64", OPTION_MASK_ISA_64BIT },
2351 { "-msse5", OPTION_MASK_ISA_SSE5 },
2352 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2353 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2354 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2355 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2356 { "-msse3", OPTION_MASK_ISA_SSE3 },
2357 { "-msse2", OPTION_MASK_ISA_SSE2 },
2358 { "-msse", OPTION_MASK_ISA_SSE },
2359 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2360 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2361 { "-mmmx", OPTION_MASK_ISA_MMX },
2362 { "-mabm", OPTION_MASK_ISA_ABM },
2363 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2364 { "-maes", OPTION_MASK_ISA_AES },
2365 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2369 static struct ix86_target_opts flag_opts[] =
2371 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2372 { "-m80387", MASK_80387 },
2373 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2374 { "-malign-double", MASK_ALIGN_DOUBLE },
2375 { "-mcld", MASK_CLD },
2376 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2377 { "-mieee-fp", MASK_IEEE_FP },
2378 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2379 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2380 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2381 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2382 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2383 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2384 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2385 { "-mno-red-zone", MASK_NO_RED_ZONE },
2386 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2387 { "-mrecip", MASK_RECIP },
2388 { "-mrtd", MASK_RTD },
2389 { "-msseregparm", MASK_SSEREGPARM },
2390 { "-mstack-arg-probe", MASK_STACK_PROBE },
2391 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2394 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2397 char target_other[40];
2406 memset (opts, '\0', sizeof (opts));
2408 /* Add -march= option. */
2411 opts[num][0] = "-march=";
2412 opts[num++][1] = arch;
2415 /* Add -mtune= option. */
2418 opts[num][0] = "-mtune=";
2419 opts[num++][1] = tune;
2422 /* Pick out the options in isa options. */
2423 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2425 if ((isa & isa_opts[i].mask) != 0)
2427 opts[num++][0] = isa_opts[i].option;
2428 isa &= ~ isa_opts[i].mask;
2432 if (isa && add_nl_p)
2434 opts[num++][0] = isa_other;
2435 sprintf (isa_other, "(other isa: 0x%x)", isa);
2438 /* Add flag options. */
2439 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2441 if ((flags & flag_opts[i].mask) != 0)
2443 opts[num++][0] = flag_opts[i].option;
2444 flags &= ~ flag_opts[i].mask;
2448 if (flags && add_nl_p)
2450 opts[num++][0] = target_other;
2451 sprintf (target_other, "(other flags: 0x%x)", isa);
2454 /* Add -fpmath= option. */
2457 opts[num][0] = "-mfpmath=";
2458 opts[num++][1] = fpmath;
2465 gcc_assert (num < ARRAY_SIZE (opts));
2467 /* Size the string. */
2469 sep_len = (add_nl_p) ? 3 : 1;
2470 for (i = 0; i < num; i++)
2473 for (j = 0; j < 2; j++)
2475 len += strlen (opts[i][j]);
2478 /* Build the string. */
2479 ret = ptr = (char *) xmalloc (len);
2482 for (i = 0; i < num; i++)
2486 for (j = 0; j < 2; j++)
2487 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2494 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2502 for (j = 0; j < 2; j++)
2505 memcpy (ptr, opts[i][j], len2[j]);
2507 line_len += len2[j];
2512 gcc_assert (ret + len >= ptr);
2517 /* Function that is callable from the debugger to print the current
2520 ix86_debug_options (void)
2522 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2523 ix86_arch_string, ix86_tune_string,
2524 ix86_fpmath_string, true);
2528 fprintf (stderr, "%s\n\n", opts);
2532 fprintf (stderr, "<no options>\n\n");
2537 /* Sometimes certain combinations of command options do not make
2538 sense on a particular target machine. You can define a macro
2539 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2540 defined, is executed once just after all the command options have
2543 Don't use this macro to turn on various extra optimizations for
2544 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2547 override_options (bool main_args_p)
2550 unsigned int ix86_arch_mask, ix86_tune_mask;
2555 /* Comes from final.c -- no real reason to change it. */
2556 #define MAX_CODE_ALIGN 16
2564 PTA_PREFETCH_SSE = 1 << 4,
2566 PTA_3DNOW_A = 1 << 6,
2570 PTA_POPCNT = 1 << 10,
2572 PTA_SSE4A = 1 << 12,
2573 PTA_NO_SAHF = 1 << 13,
2574 PTA_SSE4_1 = 1 << 14,
2575 PTA_SSE4_2 = 1 << 15,
2578 PTA_PCLMUL = 1 << 18,
2585 const char *const name; /* processor name or nickname. */
2586 const enum processor_type processor;
2587 const enum attr_cpu schedule;
2588 const unsigned /*enum pta_flags*/ flags;
2590 const processor_alias_table[] =
2592 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2593 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2594 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2595 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2596 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2597 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2598 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2599 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2600 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2601 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2602 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2603 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2604 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2606 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2608 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2609 PTA_MMX | PTA_SSE | PTA_SSE2},
2610 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2611 PTA_MMX |PTA_SSE | PTA_SSE2},
2612 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2613 PTA_MMX | PTA_SSE | PTA_SSE2},
2614 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2615 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2616 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2617 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2618 | PTA_CX16 | PTA_NO_SAHF},
2619 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2620 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2621 | PTA_SSSE3 | PTA_CX16},
2622 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2623 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2624 | PTA_SSSE3 | PTA_CX16},
2625 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2626 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2627 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2628 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2629 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2630 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2631 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2632 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2633 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2634 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2635 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2636 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2637 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2638 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2639 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2640 {"x86-64", PROCESSOR_K8, CPU_K8,
2641 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2642 {"k8", PROCESSOR_K8, CPU_K8,
2643 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2644 | PTA_SSE2 | PTA_NO_SAHF},
2645 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2646 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2647 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2648 {"opteron", PROCESSOR_K8, CPU_K8,
2649 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2650 | PTA_SSE2 | PTA_NO_SAHF},
2651 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2652 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2653 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2654 {"athlon64", PROCESSOR_K8, CPU_K8,
2655 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2656 | PTA_SSE2 | PTA_NO_SAHF},
2657 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2658 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2659 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2660 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2661 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2662 | PTA_SSE2 | PTA_NO_SAHF},
2663 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2664 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2665 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2666 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2667 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2668 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2669 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2670 0 /* flags are only used for -march switch. */ },
2671 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2672 PTA_64BIT /* flags are only used for -march switch. */ },
2675 int const pta_size = ARRAY_SIZE (processor_alias_table);
2677 /* Set up prefix/suffix so the error messages refer to either the command
2678 line argument, or the attribute(target). */
2687 prefix = "option(\"";
2692 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2693 SUBTARGET_OVERRIDE_OPTIONS;
2696 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2697 SUBSUBTARGET_OVERRIDE_OPTIONS;
2700 /* -fPIC is the default for x86_64. */
2701 if (TARGET_MACHO && TARGET_64BIT)
2704 /* Set the default values for switches whose default depends on TARGET_64BIT
2705 in case they weren't overwritten by command line options. */
2708 /* Mach-O doesn't support omitting the frame pointer for now. */
2709 if (flag_omit_frame_pointer == 2)
2710 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2711 if (flag_asynchronous_unwind_tables == 2)
2712 flag_asynchronous_unwind_tables = 1;
2713 if (flag_pcc_struct_return == 2)
2714 flag_pcc_struct_return = 0;
2718 if (flag_omit_frame_pointer == 2)
2719 flag_omit_frame_pointer = 0;
2720 if (flag_asynchronous_unwind_tables == 2)
2721 flag_asynchronous_unwind_tables = 0;
2722 if (flag_pcc_struct_return == 2)
2723 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2726 /* Need to check -mtune=generic first. */
2727 if (ix86_tune_string)
2729 if (!strcmp (ix86_tune_string, "generic")
2730 || !strcmp (ix86_tune_string, "i686")
2731 /* As special support for cross compilers we read -mtune=native
2732 as -mtune=generic. With native compilers we won't see the
2733 -mtune=native, as it was changed by the driver. */
2734 || !strcmp (ix86_tune_string, "native"))
2737 ix86_tune_string = "generic64";
2739 ix86_tune_string = "generic32";
2741 /* If this call is for setting the option attribute, allow the
2742 generic32/generic64 that was previously set. */
2743 else if (!main_args_p
2744 && (!strcmp (ix86_tune_string, "generic32")
2745 || !strcmp (ix86_tune_string, "generic64")))
2747 else if (!strncmp (ix86_tune_string, "generic", 7))
2748 error ("bad value (%s) for %stune=%s %s",
2749 ix86_tune_string, prefix, suffix, sw);
2753 if (ix86_arch_string)
2754 ix86_tune_string = ix86_arch_string;
2755 if (!ix86_tune_string)
2757 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2758 ix86_tune_defaulted = 1;
2761 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2762 need to use a sensible tune option. */
2763 if (!strcmp (ix86_tune_string, "generic")
2764 || !strcmp (ix86_tune_string, "x86-64")
2765 || !strcmp (ix86_tune_string, "i686"))
2768 ix86_tune_string = "generic64";
2770 ix86_tune_string = "generic32";
2773 if (ix86_stringop_string)
2775 if (!strcmp (ix86_stringop_string, "rep_byte"))
2776 stringop_alg = rep_prefix_1_byte;
2777 else if (!strcmp (ix86_stringop_string, "libcall"))
2778 stringop_alg = libcall;
2779 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2780 stringop_alg = rep_prefix_4_byte;
2781 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2783 /* rep; movq isn't available in 32-bit code. */
2784 stringop_alg = rep_prefix_8_byte;
2785 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2786 stringop_alg = loop_1_byte;
2787 else if (!strcmp (ix86_stringop_string, "loop"))
2788 stringop_alg = loop;
2789 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2790 stringop_alg = unrolled_loop;
2792 error ("bad value (%s) for %sstringop-strategy=%s %s",
2793 ix86_stringop_string, prefix, suffix, sw);
2795 if (!strcmp (ix86_tune_string, "x86-64"))
2796 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2797 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2798 prefix, suffix, prefix, suffix, prefix, suffix);
2800 if (!ix86_arch_string)
2801 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2803 ix86_arch_specified = 1;
2805 if (!strcmp (ix86_arch_string, "generic"))
2806 error ("generic CPU can be used only for %stune=%s %s",
2807 prefix, suffix, sw);
2808 if (!strncmp (ix86_arch_string, "generic", 7))
2809 error ("bad value (%s) for %sarch=%s %s",
2810 ix86_arch_string, prefix, suffix, sw);
2812 /* Validate -mabi= value. */
2813 if (ix86_abi_string)
2815 if (strcmp (ix86_abi_string, "sysv") == 0)
2816 ix86_abi = SYSV_ABI;
2817 else if (strcmp (ix86_abi_string, "ms") == 0)
2820 error ("unknown ABI (%s) for %sabi=%s %s",
2821 ix86_abi_string, prefix, suffix, sw);
2824 ix86_abi = DEFAULT_ABI;
2826 if (ix86_cmodel_string != 0)
2828 if (!strcmp (ix86_cmodel_string, "small"))
2829 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2830 else if (!strcmp (ix86_cmodel_string, "medium"))
2831 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2832 else if (!strcmp (ix86_cmodel_string, "large"))
2833 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2835 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2836 else if (!strcmp (ix86_cmodel_string, "32"))
2837 ix86_cmodel = CM_32;
2838 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2839 ix86_cmodel = CM_KERNEL;
2841 error ("bad value (%s) for %scmodel=%s %s",
2842 ix86_cmodel_string, prefix, suffix, sw);
2846 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2847 use of rip-relative addressing. This eliminates fixups that
2848 would otherwise be needed if this object is to be placed in a
2849 DLL, and is essentially just as efficient as direct addressing. */
2850 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2851 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2852 else if (TARGET_64BIT)
2853 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2855 ix86_cmodel = CM_32;
2857 if (ix86_asm_string != 0)
2860 && !strcmp (ix86_asm_string, "intel"))
2861 ix86_asm_dialect = ASM_INTEL;
2862 else if (!strcmp (ix86_asm_string, "att"))
2863 ix86_asm_dialect = ASM_ATT;
2865 error ("bad value (%s) for %sasm=%s %s",
2866 ix86_asm_string, prefix, suffix, sw);
2868 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2869 error ("code model %qs not supported in the %s bit mode",
2870 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2871 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2872 sorry ("%i-bit mode not compiled in",
2873 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2875 for (i = 0; i < pta_size; i++)
2876 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2878 ix86_schedule = processor_alias_table[i].schedule;
2879 ix86_arch = processor_alias_table[i].processor;
2880 /* Default cpu tuning to the architecture. */
2881 ix86_tune = ix86_arch;
2883 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2884 error ("CPU you selected does not support x86-64 "
2887 if (processor_alias_table[i].flags & PTA_MMX
2888 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2889 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2890 if (processor_alias_table[i].flags & PTA_3DNOW
2891 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2892 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2893 if (processor_alias_table[i].flags & PTA_3DNOW_A
2894 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2895 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2896 if (processor_alias_table[i].flags & PTA_SSE
2897 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2898 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2899 if (processor_alias_table[i].flags & PTA_SSE2
2900 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2901 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2902 if (processor_alias_table[i].flags & PTA_SSE3
2903 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2904 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2905 if (processor_alias_table[i].flags & PTA_SSSE3
2906 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2907 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2908 if (processor_alias_table[i].flags & PTA_SSE4_1
2909 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2910 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2911 if (processor_alias_table[i].flags & PTA_SSE4_2
2912 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2913 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2914 if (processor_alias_table[i].flags & PTA_AVX
2915 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2916 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2917 if (processor_alias_table[i].flags & PTA_FMA
2918 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2919 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2920 if (processor_alias_table[i].flags & PTA_SSE4A
2921 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2922 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2923 if (processor_alias_table[i].flags & PTA_SSE5
2924 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2925 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2926 if (processor_alias_table[i].flags & PTA_ABM
2927 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2928 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2929 if (processor_alias_table[i].flags & PTA_CX16
2930 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2931 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2932 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2933 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2934 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2935 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2936 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2937 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2938 if (processor_alias_table[i].flags & PTA_AES
2939 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2940 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2941 if (processor_alias_table[i].flags & PTA_PCLMUL
2942 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2943 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2944 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2945 x86_prefetch_sse = true;
2951 error ("bad value (%s) for %sarch=%s %s",
2952 ix86_arch_string, prefix, suffix, sw);
2954 ix86_arch_mask = 1u << ix86_arch;
2955 for (i = 0; i < X86_ARCH_LAST; ++i)
2956 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2958 for (i = 0; i < pta_size; i++)
2959 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2961 ix86_schedule = processor_alias_table[i].schedule;
2962 ix86_tune = processor_alias_table[i].processor;
2963 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2965 if (ix86_tune_defaulted)
2967 ix86_tune_string = "x86-64";
2968 for (i = 0; i < pta_size; i++)
2969 if (! strcmp (ix86_tune_string,
2970 processor_alias_table[i].name))
2972 ix86_schedule = processor_alias_table[i].schedule;
2973 ix86_tune = processor_alias_table[i].processor;
2976 error ("CPU you selected does not support x86-64 "
2979 /* Intel CPUs have always interpreted SSE prefetch instructions as
2980 NOPs; so, we can enable SSE prefetch instructions even when
2981 -mtune (rather than -march) points us to a processor that has them.
2982 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2983 higher processors. */
2985 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2986 x86_prefetch_sse = true;
2990 error ("bad value (%s) for %stune=%s %s",
2991 ix86_tune_string, prefix, suffix, sw);
2993 ix86_tune_mask = 1u << ix86_tune;
2994 for (i = 0; i < X86_TUNE_LAST; ++i)
2995 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2998 ix86_cost = &ix86_size_cost;
3000 ix86_cost = processor_target_table[ix86_tune].cost;
3002 /* Arrange to set up i386_stack_locals for all functions. */
3003 init_machine_status = ix86_init_machine_status;
3005 /* Validate -mregparm= value. */
3006 if (ix86_regparm_string)
3009 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3010 i = atoi (ix86_regparm_string);
3011 if (i < 0 || i > REGPARM_MAX)
3012 error ("%sregparm=%d%s is not between 0 and %d",
3013 prefix, i, suffix, REGPARM_MAX);
3018 ix86_regparm = REGPARM_MAX;
3020 /* If the user has provided any of the -malign-* options,
3021 warn and use that value only if -falign-* is not set.
3022 Remove this code in GCC 3.2 or later. */
3023 if (ix86_align_loops_string)
3025 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3026 prefix, suffix, suffix);
3027 if (align_loops == 0)
3029 i = atoi (ix86_align_loops_string);
3030 if (i < 0 || i > MAX_CODE_ALIGN)
3031 error ("%salign-loops=%d%s is not between 0 and %d",
3032 prefix, i, suffix, MAX_CODE_ALIGN);
3034 align_loops = 1 << i;
3038 if (ix86_align_jumps_string)
3040 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3041 prefix, suffix, suffix);
3042 if (align_jumps == 0)
3044 i = atoi (ix86_align_jumps_string);
3045 if (i < 0 || i > MAX_CODE_ALIGN)
3046 error ("%salign-loops=%d%s is not between 0 and %d",
3047 prefix, i, suffix, MAX_CODE_ALIGN);
3049 align_jumps = 1 << i;
3053 if (ix86_align_funcs_string)
3055 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3056 prefix, suffix, suffix);
3057 if (align_functions == 0)
3059 i = atoi (ix86_align_funcs_string);
3060 if (i < 0 || i > MAX_CODE_ALIGN)
3061 error ("%salign-loops=%d%s is not between 0 and %d",
3062 prefix, i, suffix, MAX_CODE_ALIGN);
3064 align_functions = 1 << i;
3068 /* Default align_* from the processor table. */
3069 if (align_loops == 0)
3071 align_loops = processor_target_table[ix86_tune].align_loop;
3072 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3074 if (align_jumps == 0)
3076 align_jumps = processor_target_table[ix86_tune].align_jump;
3077 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3079 if (align_functions == 0)
3081 align_functions = processor_target_table[ix86_tune].align_func;
3084 /* Validate -mbranch-cost= value, or provide default. */
3085 ix86_branch_cost = ix86_cost->branch_cost;
3086 if (ix86_branch_cost_string)
3088 i = atoi (ix86_branch_cost_string);
3090 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3092 ix86_branch_cost = i;
3094 if (ix86_section_threshold_string)
3096 i = atoi (ix86_section_threshold_string);
3098 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3100 ix86_section_threshold = i;
3103 if (ix86_tls_dialect_string)
3105 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3106 ix86_tls_dialect = TLS_DIALECT_GNU;
3107 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3108 ix86_tls_dialect = TLS_DIALECT_GNU2;
3109 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3110 ix86_tls_dialect = TLS_DIALECT_SUN;
3112 error ("bad value (%s) for %stls-dialect=%s %s",
3113 ix86_tls_dialect_string, prefix, suffix, sw);
3116 if (ix87_precision_string)
3118 i = atoi (ix87_precision_string);
3119 if (i != 32 && i != 64 && i != 80)
3120 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3125 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3127 /* Enable by default the SSE and MMX builtins. Do allow the user to
3128 explicitly disable any of these. In particular, disabling SSE and
3129 MMX for kernel code is extremely useful. */
3130 if (!ix86_arch_specified)
3132 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3133 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3136 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3140 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3142 if (!ix86_arch_specified)
3144 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3146 /* i386 ABI does not specify red zone. It still makes sense to use it
3147 when programmer takes care to stack from being destroyed. */
3148 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3149 target_flags |= MASK_NO_RED_ZONE;
3152 /* Keep nonleaf frame pointers. */
3153 if (flag_omit_frame_pointer)
3154 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3155 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3156 flag_omit_frame_pointer = 1;
3158 /* If we're doing fast math, we don't care about comparison order
3159 wrt NaNs. This lets us use a shorter comparison sequence. */
3160 if (flag_finite_math_only)
3161 target_flags &= ~MASK_IEEE_FP;
3163 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3164 since the insns won't need emulation. */
3165 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3166 target_flags &= ~MASK_NO_FANCY_MATH_387;
3168 /* Likewise, if the target doesn't have a 387, or we've specified
3169 software floating point, don't use 387 inline intrinsics. */
3171 target_flags |= MASK_NO_FANCY_MATH_387;
3173 /* Turn on MMX builtins for -msse. */
3176 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3177 x86_prefetch_sse = true;
3180 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3181 if (TARGET_SSE4_2 || TARGET_ABM)
3182 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3184 /* Validate -mpreferred-stack-boundary= value or default it to
3185 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3186 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3187 if (ix86_preferred_stack_boundary_string)
3189 i = atoi (ix86_preferred_stack_boundary_string);
3190 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3191 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3192 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3194 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3197 /* Set the default value for -mstackrealign. */
3198 if (ix86_force_align_arg_pointer == -1)
3199 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3201 /* Validate -mincoming-stack-boundary= value or default it to
3202 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3203 if (ix86_force_align_arg_pointer)
3204 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3206 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3207 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3208 if (ix86_incoming_stack_boundary_string)
3210 i = atoi (ix86_incoming_stack_boundary_string);
3211 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3212 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3213 i, TARGET_64BIT ? 4 : 2);
3216 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3217 ix86_incoming_stack_boundary
3218 = ix86_user_incoming_stack_boundary;
3222 /* Accept -msseregparm only if at least SSE support is enabled. */
3223 if (TARGET_SSEREGPARM
3225 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3227 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3228 if (ix86_fpmath_string != 0)
3230 if (! strcmp (ix86_fpmath_string, "387"))
3231 ix86_fpmath = FPMATH_387;
3232 else if (! strcmp (ix86_fpmath_string, "sse"))
3236 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3237 ix86_fpmath = FPMATH_387;
3240 ix86_fpmath = FPMATH_SSE;
3242 else if (! strcmp (ix86_fpmath_string, "387,sse")
3243 || ! strcmp (ix86_fpmath_string, "387+sse")
3244 || ! strcmp (ix86_fpmath_string, "sse,387")
3245 || ! strcmp (ix86_fpmath_string, "sse+387")
3246 || ! strcmp (ix86_fpmath_string, "both"))
3250 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3251 ix86_fpmath = FPMATH_387;
3253 else if (!TARGET_80387)
3255 warning (0, "387 instruction set disabled, using SSE arithmetics");
3256 ix86_fpmath = FPMATH_SSE;
3259 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3262 error ("bad value (%s) for %sfpmath=%s %s",
3263 ix86_fpmath_string, prefix, suffix, sw);
3266 /* If the i387 is disabled, then do not return values in it. */
3268 target_flags &= ~MASK_FLOAT_RETURNS;
3270 /* Use external vectorized library in vectorizing intrinsics. */
3271 if (ix86_veclibabi_string)
3273 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3274 ix86_veclib_handler = ix86_veclibabi_svml;
3275 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3276 ix86_veclib_handler = ix86_veclibabi_acml;
3278 error ("unknown vectorization library ABI type (%s) for "
3279 "%sveclibabi=%s %s", ix86_veclibabi_string,
3280 prefix, suffix, sw);
3283 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3284 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3286 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3288 /* ??? Unwind info is not correct around the CFG unless either a frame
3289 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3290 unwind info generation to be aware of the CFG and propagating states
3292 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3293 || flag_exceptions || flag_non_call_exceptions)
3294 && flag_omit_frame_pointer
3295 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3297 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3298 warning (0, "unwind tables currently require either a frame pointer "
3299 "or %saccumulate-outgoing-args%s for correctness",
3301 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3304 /* If stack probes are required, the space used for large function
3305 arguments on the stack must also be probed, so enable
3306 -maccumulate-outgoing-args so this happens in the prologue. */
3307 if (TARGET_STACK_PROBE
3308 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3310 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3311 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3312 "for correctness", prefix, suffix);
3313 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3316 /* For sane SSE instruction set generation we need fcomi instruction.
3317 It is safe to enable all CMOVE instructions. */
3321 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3324 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3325 p = strchr (internal_label_prefix, 'X');
3326 internal_label_prefix_len = p - internal_label_prefix;
3330 /* When scheduling description is not available, disable scheduler pass
3331 so it won't slow down the compilation and make x87 code slower. */
3332 if (!TARGET_SCHEDULE)
3333 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3335 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3336 set_param_value ("simultaneous-prefetches",
3337 ix86_cost->simultaneous_prefetches);
3338 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3339 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3340 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3341 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3342 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3343 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3345 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3346 can be optimized to ap = __builtin_next_arg (0). */
3348 targetm.expand_builtin_va_start = NULL;
3352 ix86_gen_leave = gen_leave_rex64;
3353 ix86_gen_pop1 = gen_popdi1;
3354 ix86_gen_add3 = gen_adddi3;
3355 ix86_gen_sub3 = gen_subdi3;
3356 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3357 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3358 ix86_gen_monitor = gen_sse3_monitor64;
3359 ix86_gen_andsp = gen_anddi3;
3363 ix86_gen_leave = gen_leave;
3364 ix86_gen_pop1 = gen_popsi1;
3365 ix86_gen_add3 = gen_addsi3;
3366 ix86_gen_sub3 = gen_subsi3;
3367 ix86_gen_sub3_carry = gen_subsi3_carry;
3368 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3369 ix86_gen_monitor = gen_sse3_monitor;
3370 ix86_gen_andsp = gen_andsi3;
3374 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3376 target_flags |= MASK_CLD & ~target_flags_explicit;
3379 /* Save the initial options in case the user does function specific options */
3381 target_option_default_node = target_option_current_node
3382 = build_target_option_node ();
3385 /* Save the current options */
3388 ix86_function_specific_save (struct cl_target_option *ptr)
3390 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3391 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3392 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3393 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3394 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3396 ptr->arch = ix86_arch;
3397 ptr->schedule = ix86_schedule;
3398 ptr->tune = ix86_tune;
3399 ptr->fpmath = ix86_fpmath;
3400 ptr->branch_cost = ix86_branch_cost;
3401 ptr->tune_defaulted = ix86_tune_defaulted;
3402 ptr->arch_specified = ix86_arch_specified;
3403 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3404 ptr->target_flags_explicit = target_flags_explicit;
3407 /* Restore the current options */
3410 ix86_function_specific_restore (struct cl_target_option *ptr)
3412 enum processor_type old_tune = ix86_tune;
3413 enum processor_type old_arch = ix86_arch;
3414 unsigned int ix86_arch_mask, ix86_tune_mask;
3417 ix86_arch = (enum processor_type) ptr->arch;
3418 ix86_schedule = (enum attr_cpu) ptr->schedule;
3419 ix86_tune = (enum processor_type) ptr->tune;
3420 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3421 ix86_branch_cost = ptr->branch_cost;
3422 ix86_tune_defaulted = ptr->tune_defaulted;
3423 ix86_arch_specified = ptr->arch_specified;
3424 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3425 target_flags_explicit = ptr->target_flags_explicit;
3427 /* Recreate the arch feature tests if the arch changed */
3428 if (old_arch != ix86_arch)
3430 ix86_arch_mask = 1u << ix86_arch;
3431 for (i = 0; i < X86_ARCH_LAST; ++i)
3432 ix86_arch_features[i]
3433 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3436 /* Recreate the tune optimization tests */
3437 if (old_tune != ix86_tune)
3439 ix86_tune_mask = 1u << ix86_tune;
3440 for (i = 0; i < X86_TUNE_LAST; ++i)
3441 ix86_tune_features[i]
3442 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3446 /* Print the current options */
3449 ix86_function_specific_print (FILE *file, int indent,
3450 struct cl_target_option *ptr)
3453 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3454 NULL, NULL, NULL, false);
3456 fprintf (file, "%*sarch = %d (%s)\n",
3459 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3460 ? cpu_names[ptr->arch]
3463 fprintf (file, "%*stune = %d (%s)\n",
3466 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3467 ? cpu_names[ptr->tune]
3470 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3471 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3472 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3473 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3477 fprintf (file, "%*s%s\n", indent, "", target_string);
3478 free (target_string);
3483 /* Inner function to process the attribute((target(...))), take an argument and
3484 set the current options from the argument. If we have a list, recursively go
3488 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3493 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3494 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3495 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3496 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3511 enum ix86_opt_type type;
3516 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3517 IX86_ATTR_ISA ("abm", OPT_mabm),
3518 IX86_ATTR_ISA ("aes", OPT_maes),
3519 IX86_ATTR_ISA ("avx", OPT_mavx),
3520 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3521 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3522 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3523 IX86_ATTR_ISA ("sse", OPT_msse),
3524 IX86_ATTR_ISA ("sse2", OPT_msse2),
3525 IX86_ATTR_ISA ("sse3", OPT_msse3),
3526 IX86_ATTR_ISA ("sse4", OPT_msse4),
3527 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3528 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3529 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3530 IX86_ATTR_ISA ("sse5", OPT_msse5),
3531 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3533 /* string options */
3534 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3535 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3536 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3539 IX86_ATTR_YES ("cld",
3543 IX86_ATTR_NO ("fancy-math-387",
3544 OPT_mfancy_math_387,
3545 MASK_NO_FANCY_MATH_387),
3547 IX86_ATTR_NO ("fused-madd",
3549 MASK_NO_FUSED_MADD),
3551 IX86_ATTR_YES ("ieee-fp",
3555 IX86_ATTR_YES ("inline-all-stringops",
3556 OPT_minline_all_stringops,
3557 MASK_INLINE_ALL_STRINGOPS),
3559 IX86_ATTR_YES ("inline-stringops-dynamically",
3560 OPT_minline_stringops_dynamically,
3561 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3563 IX86_ATTR_NO ("align-stringops",
3564 OPT_mno_align_stringops,
3565 MASK_NO_ALIGN_STRINGOPS),
3567 IX86_ATTR_YES ("recip",
3573 /* If this is a list, recurse to get the options. */
3574 if (TREE_CODE (args) == TREE_LIST)
3578 for (; args; args = TREE_CHAIN (args))
3579 if (TREE_VALUE (args)
3580 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3586 else if (TREE_CODE (args) != STRING_CST)
3589 /* Handle multiple arguments separated by commas. */
3590 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3592 while (next_optstr && *next_optstr != '\0')
3594 char *p = next_optstr;
3596 char *comma = strchr (next_optstr, ',');
3597 const char *opt_string;
3598 size_t len, opt_len;
3603 enum ix86_opt_type type = ix86_opt_unknown;
3609 len = comma - next_optstr;
3610 next_optstr = comma + 1;
3618 /* Recognize no-xxx. */
3619 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3628 /* Find the option. */
3631 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3633 type = attrs[i].type;
3634 opt_len = attrs[i].len;
3635 if (ch == attrs[i].string[0]
3636 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3637 && memcmp (p, attrs[i].string, opt_len) == 0)
3640 mask = attrs[i].mask;
3641 opt_string = attrs[i].string;
3646 /* Process the option. */
3649 error ("attribute(target(\"%s\")) is unknown", orig_p);
3653 else if (type == ix86_opt_isa)
3654 ix86_handle_option (opt, p, opt_set_p);
3656 else if (type == ix86_opt_yes || type == ix86_opt_no)
3658 if (type == ix86_opt_no)
3659 opt_set_p = !opt_set_p;
3662 target_flags |= mask;
3664 target_flags &= ~mask;
3667 else if (type == ix86_opt_str)
3671 error ("option(\"%s\") was already specified", opt_string);
3675 p_strings[opt] = xstrdup (p + opt_len);
3685 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3688 ix86_valid_target_attribute_tree (tree args)
3690 const char *orig_arch_string = ix86_arch_string;
3691 const char *orig_tune_string = ix86_tune_string;
3692 const char *orig_fpmath_string = ix86_fpmath_string;
3693 int orig_tune_defaulted = ix86_tune_defaulted;
3694 int orig_arch_specified = ix86_arch_specified;
3695 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3698 struct cl_target_option *def
3699 = TREE_TARGET_OPTION (target_option_default_node);
3701 /* Process each of the options on the chain. */
3702 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3705 /* If the changed options are different from the default, rerun override_options,
3706 and then save the options away. The string options are are attribute options,
3707 and will be undone when we copy the save structure. */
3708 if (ix86_isa_flags != def->ix86_isa_flags
3709 || target_flags != def->target_flags
3710 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3711 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3712 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3714 /* If we are using the default tune= or arch=, undo the string assigned,
3715 and use the default. */
3716 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3717 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3718 else if (!orig_arch_specified)
3719 ix86_arch_string = NULL;
3721 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3722 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3723 else if (orig_tune_defaulted)
3724 ix86_tune_string = NULL;
3726 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3727 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3728 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3729 else if (!TARGET_64BIT && TARGET_SSE)
3730 ix86_fpmath_string = "sse,387";
3732 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3733 override_options (false);
3735 /* Add any builtin functions with the new isa if any. */
3736 ix86_add_new_builtins (ix86_isa_flags);
3738 /* Save the current options unless we are validating options for
3740 t = build_target_option_node ();
3742 ix86_arch_string = orig_arch_string;
3743 ix86_tune_string = orig_tune_string;
3744 ix86_fpmath_string = orig_fpmath_string;
3746 /* Free up memory allocated to hold the strings */
3747 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3748 if (option_strings[i])
3749 free (option_strings[i]);
3755 /* Hook to validate attribute((target("string"))). */
3758 ix86_valid_target_attribute_p (tree fndecl,
3759 tree ARG_UNUSED (name),
3761 int ARG_UNUSED (flags))
3763 struct cl_target_option cur_target;
3765 tree old_optimize = build_optimization_node ();
3766 tree new_target, new_optimize;
3767 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3769 /* If the function changed the optimization levels as well as setting target
3770 options, start with the optimizations specified. */
3771 if (func_optimize && func_optimize != old_optimize)
3772 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3774 /* The target attributes may also change some optimization flags, so update
3775 the optimization options if necessary. */
3776 cl_target_option_save (&cur_target);
3777 new_target = ix86_valid_target_attribute_tree (args);
3778 new_optimize = build_optimization_node ();
3785 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3787 if (old_optimize != new_optimize)
3788 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3791 cl_target_option_restore (&cur_target);
3793 if (old_optimize != new_optimize)
3794 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3800 /* Hook to determine if one function can safely inline another. */
3803 ix86_can_inline_p (tree caller, tree callee)
3806 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3807 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3809 /* If callee has no option attributes, then it is ok to inline. */
3813 /* If caller has no option attributes, but callee does then it is not ok to
3815 else if (!caller_tree)
3820 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3821 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3823 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3824 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3826 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3827 != callee_opts->ix86_isa_flags)
3830 /* See if we have the same non-isa options. */
3831 else if (caller_opts->target_flags != callee_opts->target_flags)
3834 /* See if arch, tune, etc. are the same. */
3835 else if (caller_opts->arch != callee_opts->arch)
3838 else if (caller_opts->tune != callee_opts->tune)
3841 else if (caller_opts->fpmath != callee_opts->fpmath)
3844 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3855 /* Remember the last target of ix86_set_current_function. */
3856 static GTY(()) tree ix86_previous_fndecl;
3858 /* Establish appropriate back-end context for processing the function
3859 FNDECL. The argument might be NULL to indicate processing at top
3860 level, outside of any function scope. */
3862 ix86_set_current_function (tree fndecl)
3864 /* Only change the context if the function changes. This hook is called
3865 several times in the course of compiling a function, and we don't want to
3866 slow things down too much or call target_reinit when it isn't safe. */
3867 if (fndecl && fndecl != ix86_previous_fndecl)
3869 tree old_tree = (ix86_previous_fndecl
3870 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3873 tree new_tree = (fndecl
3874 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3877 ix86_previous_fndecl = fndecl;
3878 if (old_tree == new_tree)
3883 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3889 struct cl_target_option *def
3890 = TREE_TARGET_OPTION (target_option_current_node);
3892 cl_target_option_restore (def);
3899 /* Return true if this goes in large data/bss. */
3902 ix86_in_large_data_p (tree exp)
3904 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3907 /* Functions are never large data. */
3908 if (TREE_CODE (exp) == FUNCTION_DECL)
3911 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3913 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3914 if (strcmp (section, ".ldata") == 0
3915 || strcmp (section, ".lbss") == 0)
3921 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3923 /* If this is an incomplete type with size 0, then we can't put it
3924 in data because it might be too big when completed. */
3925 if (!size || size > ix86_section_threshold)
3932 /* Switch to the appropriate section for output of DECL.
3933 DECL is either a `VAR_DECL' node or a constant of some sort.
3934 RELOC indicates whether forming the initial value of DECL requires
3935 link-time relocations. */
3937 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3941 x86_64_elf_select_section (tree decl, int reloc,
3942 unsigned HOST_WIDE_INT align)
3944 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3945 && ix86_in_large_data_p (decl))
3947 const char *sname = NULL;
3948 unsigned int flags = SECTION_WRITE;
3949 switch (categorize_decl_for_section (decl, reloc))
3954 case SECCAT_DATA_REL:
3955 sname = ".ldata.rel";
3957 case SECCAT_DATA_REL_LOCAL:
3958 sname = ".ldata.rel.local";
3960 case SECCAT_DATA_REL_RO:
3961 sname = ".ldata.rel.ro";
3963 case SECCAT_DATA_REL_RO_LOCAL:
3964 sname = ".ldata.rel.ro.local";
3968 flags |= SECTION_BSS;
3971 case SECCAT_RODATA_MERGE_STR:
3972 case SECCAT_RODATA_MERGE_STR_INIT:
3973 case SECCAT_RODATA_MERGE_CONST:
3977 case SECCAT_SRODATA:
3984 /* We don't split these for medium model. Place them into
3985 default sections and hope for best. */
3987 case SECCAT_EMUTLS_VAR:
3988 case SECCAT_EMUTLS_TMPL:
3993 /* We might get called with string constants, but get_named_section
3994 doesn't like them as they are not DECLs. Also, we need to set
3995 flags in that case. */
3997 return get_section (sname, flags, NULL);
3998 return get_named_section (decl, sname, reloc);
4001 return default_elf_select_section (decl, reloc, align);
4004 /* Build up a unique section name, expressed as a
4005 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4006 RELOC indicates whether the initial value of EXP requires
4007 link-time relocations. */
4009 static void ATTRIBUTE_UNUSED
4010 x86_64_elf_unique_section (tree decl, int reloc)
4012 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4013 && ix86_in_large_data_p (decl))
4015 const char *prefix = NULL;
4016 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4017 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4019 switch (categorize_decl_for_section (decl, reloc))
4022 case SECCAT_DATA_REL:
4023 case SECCAT_DATA_REL_LOCAL:
4024 case SECCAT_DATA_REL_RO:
4025 case SECCAT_DATA_REL_RO_LOCAL:
4026 prefix = one_only ? ".ld" : ".ldata";
4029 prefix = one_only ? ".lb" : ".lbss";
4032 case SECCAT_RODATA_MERGE_STR:
4033 case SECCAT_RODATA_MERGE_STR_INIT:
4034 case SECCAT_RODATA_MERGE_CONST:
4035 prefix = one_only ? ".lr" : ".lrodata";
4037 case SECCAT_SRODATA:
4044 /* We don't split these for medium model. Place them into
4045 default sections and hope for best. */
4047 case SECCAT_EMUTLS_VAR:
4048 prefix = targetm.emutls.var_section;
4050 case SECCAT_EMUTLS_TMPL:
4051 prefix = targetm.emutls.tmpl_section;
4056 const char *name, *linkonce;
4059 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4060 name = targetm.strip_name_encoding (name);
4062 /* If we're using one_only, then there needs to be a .gnu.linkonce
4063 prefix to the section name. */
4064 linkonce = one_only ? ".gnu.linkonce" : "";
4066 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4068 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4072 default_unique_section (decl, reloc);
4075 #ifdef COMMON_ASM_OP
4076 /* This says how to output assembler code to declare an
4077 uninitialized external linkage data object.
4079 For medium model x86-64 we need to use .largecomm opcode for
4082 x86_elf_aligned_common (FILE *file,
4083 const char *name, unsigned HOST_WIDE_INT size,
4086 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4087 && size > (unsigned int)ix86_section_threshold)
4088 fprintf (file, ".largecomm\t");
4090 fprintf (file, "%s", COMMON_ASM_OP);
4091 assemble_name (file, name);
4092 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4093 size, align / BITS_PER_UNIT);
4097 /* Utility function for targets to use in implementing
4098 ASM_OUTPUT_ALIGNED_BSS. */
4101 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4102 const char *name, unsigned HOST_WIDE_INT size,
4105 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4106 && size > (unsigned int)ix86_section_threshold)
4107 switch_to_section (get_named_section (decl, ".lbss", 0));
4109 switch_to_section (bss_section);
4110 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4111 #ifdef ASM_DECLARE_OBJECT_NAME
4112 last_assemble_variable_decl = decl;
4113 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4115 /* Standard thing is just output label for the object. */
4116 ASM_OUTPUT_LABEL (file, name);
4117 #endif /* ASM_DECLARE_OBJECT_NAME */
4118 ASM_OUTPUT_SKIP (file, size ? size : 1);
4122 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4124 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4125 make the problem with not enough registers even worse. */
4126 #ifdef INSN_SCHEDULING
4128 flag_schedule_insns = 0;
4132 /* The Darwin libraries never set errno, so we might as well
4133 avoid calling them when that's the only reason we would. */
4134 flag_errno_math = 0;
4136 /* The default values of these switches depend on the TARGET_64BIT
4137 that is not known at this moment. Mark these values with 2 and
4138 let user the to override these. In case there is no command line option
4139 specifying them, we will set the defaults in override_options. */
4141 flag_omit_frame_pointer = 2;
4142 flag_pcc_struct_return = 2;
4143 flag_asynchronous_unwind_tables = 2;
4144 flag_vect_cost_model = 1;
4145 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4146 SUBTARGET_OPTIMIZATION_OPTIONS;
4150 /* Decide whether we can make a sibling call to a function. DECL is the
4151 declaration of the function being targeted by the call and EXP is the
4152 CALL_EXPR representing the call. */
4155 ix86_function_ok_for_sibcall (tree decl, tree exp)
4160 /* If we are generating position-independent code, we cannot sibcall
4161 optimize any indirect call, or a direct call to a global function,
4162 as the PLT requires %ebx be live. */
4163 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4170 func = TREE_TYPE (CALL_EXPR_FN (exp));
4171 if (POINTER_TYPE_P (func))
4172 func = TREE_TYPE (func);
4175 /* Check that the return value locations are the same. Like
4176 if we are returning floats on the 80387 register stack, we cannot
4177 make a sibcall from a function that doesn't return a float to a
4178 function that does or, conversely, from a function that does return
4179 a float to a function that doesn't; the necessary stack adjustment
4180 would not be executed. This is also the place we notice
4181 differences in the return value ABI. Note that it is ok for one
4182 of the functions to have void return type as long as the return
4183 value of the other is passed in a register. */
4184 a = ix86_function_value (TREE_TYPE (exp), func, false);
4185 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4187 if (STACK_REG_P (a) || STACK_REG_P (b))
4189 if (!rtx_equal_p (a, b))
4192 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4194 else if (!rtx_equal_p (a, b))
4197 /* If this call is indirect, we'll need to be able to use a call-clobbered
4198 register for the address of the target function. Make sure that all
4199 such registers are not used for passing parameters. */
4200 if (!decl && !TARGET_64BIT)
4204 /* We're looking at the CALL_EXPR, we need the type of the function. */
4205 type = CALL_EXPR_FN (exp); /* pointer expression */
4206 type = TREE_TYPE (type); /* pointer type */
4207 type = TREE_TYPE (type); /* function type */
4209 if (ix86_function_regparm (type, NULL) >= 3)
4211 /* ??? Need to count the actual number of registers to be used,
4212 not the possible number of registers. Fix later. */
4217 /* Dllimport'd functions are also called indirectly. */
4218 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4220 && decl && DECL_DLLIMPORT_P (decl)
4221 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4224 /* If we need to align the outgoing stack, then sibcalling would
4225 unalign the stack, which may break the called function. */
4226 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4229 /* Otherwise okay. That also includes certain types of indirect calls. */
4233 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4234 calling convention attributes;
4235 arguments as in struct attribute_spec.handler. */
4238 ix86_handle_cconv_attribute (tree *node, tree name,
4240 int flags ATTRIBUTE_UNUSED,
4243 if (TREE_CODE (*node) != FUNCTION_TYPE
4244 && TREE_CODE (*node) != METHOD_TYPE
4245 && TREE_CODE (*node) != FIELD_DECL
4246 && TREE_CODE (*node) != TYPE_DECL)
4248 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4250 *no_add_attrs = true;
4254 /* Can combine regparm with all attributes but fastcall. */
4255 if (is_attribute_p ("regparm", name))
4259 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4261 error ("fastcall and regparm attributes are not compatible");
4264 cst = TREE_VALUE (args);
4265 if (TREE_CODE (cst) != INTEGER_CST)
4267 warning (OPT_Wattributes,
4268 "%qE attribute requires an integer constant argument",
4270 *no_add_attrs = true;
4272 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4274 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4276 *no_add_attrs = true;
4284 /* Do not warn when emulating the MS ABI. */
4285 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4286 warning (OPT_Wattributes, "%qE attribute ignored",
4288 *no_add_attrs = true;
4292 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4293 if (is_attribute_p ("fastcall", name))
4295 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4297 error ("fastcall and cdecl attributes are not compatible");
4299 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4301 error ("fastcall and stdcall attributes are not compatible");
4303 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4305 error ("fastcall and regparm attributes are not compatible");
4309 /* Can combine stdcall with fastcall (redundant), regparm and
4311 else if (is_attribute_p ("stdcall", name))
4313 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4315 error ("stdcall and cdecl attributes are not compatible");
4317 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4319 error ("stdcall and fastcall attributes are not compatible");
4323 /* Can combine cdecl with regparm and sseregparm. */
4324 else if (is_attribute_p ("cdecl", name))
4326 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4328 error ("stdcall and cdecl attributes are not compatible");
4330 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4332 error ("fastcall and cdecl attributes are not compatible");
4336 /* Can combine sseregparm with all attributes. */
4341 /* Return 0 if the attributes for two types are incompatible, 1 if they
4342 are compatible, and 2 if they are nearly compatible (which causes a
4343 warning to be generated). */
4346 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4348 /* Check for mismatch of non-default calling convention. */
4349 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4351 if (TREE_CODE (type1) != FUNCTION_TYPE
4352 && TREE_CODE (type1) != METHOD_TYPE)
4355 /* Check for mismatched fastcall/regparm types. */
4356 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4357 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4358 || (ix86_function_regparm (type1, NULL)
4359 != ix86_function_regparm (type2, NULL)))
4362 /* Check for mismatched sseregparm types. */
4363 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4364 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4367 /* Check for mismatched return types (cdecl vs stdcall). */
4368 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4369 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4375 /* Return the regparm value for a function with the indicated TYPE and DECL.
4376 DECL may be NULL when calling function indirectly
4377 or considering a libcall. */
4380 ix86_function_regparm (const_tree type, const_tree decl)
4385 static bool error_issued;
4388 return (ix86_function_type_abi (type) == SYSV_ABI
4389 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4391 regparm = ix86_regparm;
4392 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4396 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4398 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4400 /* We can't use regparm(3) for nested functions because
4401 these pass static chain pointer in %ecx register. */
4402 if (!error_issued && regparm == 3
4403 && decl_function_context (decl)
4404 && !DECL_NO_STATIC_CHAIN (decl))
4406 error ("nested functions are limited to 2 register parameters");
4407 error_issued = true;
4415 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4418 /* Use register calling convention for local functions when possible. */
4420 && TREE_CODE (decl) == FUNCTION_DECL
4424 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4425 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4428 int local_regparm, globals = 0, regno;
4431 /* Make sure no regparm register is taken by a
4432 fixed register variable. */
4433 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4434 if (fixed_regs[local_regparm])
4437 /* We can't use regparm(3) for nested functions as these use
4438 static chain pointer in third argument. */
4439 if (local_regparm == 3
4440 && decl_function_context (decl)
4441 && !DECL_NO_STATIC_CHAIN (decl))
4444 /* If the function realigns its stackpointer, the prologue will
4445 clobber %ecx. If we've already generated code for the callee,
4446 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4447 scanning the attributes for the self-realigning property. */
4448 f = DECL_STRUCT_FUNCTION (decl);
4449 /* Since current internal arg pointer won't conflict with
4450 parameter passing regs, so no need to change stack
4451 realignment and adjust regparm number.
4453 Each fixed register usage increases register pressure,
4454 so less registers should be used for argument passing.
4455 This functionality can be overriden by an explicit
4457 for (regno = 0; regno <= DI_REG; regno++)
4458 if (fixed_regs[regno])
4462 = globals < local_regparm ? local_regparm - globals : 0;
4464 if (local_regparm > regparm)
4465 regparm = local_regparm;
4472 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4473 DFmode (2) arguments in SSE registers for a function with the
4474 indicated TYPE and DECL. DECL may be NULL when calling function
4475 indirectly or considering a libcall. Otherwise return 0. */
4478 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4480 gcc_assert (!TARGET_64BIT);
4482 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4483 by the sseregparm attribute. */
4484 if (TARGET_SSEREGPARM
4485 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4492 error ("Calling %qD with attribute sseregparm without "
4493 "SSE/SSE2 enabled", decl);
4495 error ("Calling %qT with attribute sseregparm without "
4496 "SSE/SSE2 enabled", type);
4504 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4505 (and DFmode for SSE2) arguments in SSE registers. */
4506 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4508 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4509 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4511 return TARGET_SSE2 ? 2 : 1;
4517 /* Return true if EAX is live at the start of the function. Used by
4518 ix86_expand_prologue to determine if we need special help before
4519 calling allocate_stack_worker. */
4522 ix86_eax_live_at_start_p (void)
4524 /* Cheat. Don't bother working forward from ix86_function_regparm
4525 to the function type to whether an actual argument is located in
4526 eax. Instead just look at cfg info, which is still close enough
4527 to correct at this point. This gives false positives for broken
4528 functions that might use uninitialized data that happens to be
4529 allocated in eax, but who cares? */
4530 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4533 /* Value is the number of bytes of arguments automatically
4534 popped when returning from a subroutine call.
4535 FUNDECL is the declaration node of the function (as a tree),
4536 FUNTYPE is the data type of the function (as a tree),
4537 or for a library call it is an identifier node for the subroutine name.
4538 SIZE is the number of bytes of arguments passed on the stack.
4540 On the 80386, the RTD insn may be used to pop them if the number
4541 of args is fixed, but if the number is variable then the caller
4542 must pop them all. RTD can't be used for library calls now
4543 because the library is compiled with the Unix compiler.
4544 Use of RTD is a selectable option, since it is incompatible with
4545 standard Unix calling sequences. If the option is not selected,
4546 the caller must always pop the args.
4548 The attribute stdcall is equivalent to RTD on a per module basis. */
4551 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4555 /* None of the 64-bit ABIs pop arguments. */
4559 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4561 /* Cdecl functions override -mrtd, and never pop the stack. */
4562 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4564 /* Stdcall and fastcall functions will pop the stack if not
4566 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4567 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4570 if (rtd && ! stdarg_p (funtype))
4574 /* Lose any fake structure return argument if it is passed on the stack. */
4575 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4576 && !KEEP_AGGREGATE_RETURN_POINTER)
4578 int nregs = ix86_function_regparm (funtype, fundecl);
4580 return GET_MODE_SIZE (Pmode);
4586 /* Argument support functions. */
4588 /* Return true when register may be used to pass function parameters. */
4590 ix86_function_arg_regno_p (int regno)
4593 const int *parm_regs;
4598 return (regno < REGPARM_MAX
4599 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4601 return (regno < REGPARM_MAX
4602 || (TARGET_MMX && MMX_REGNO_P (regno)
4603 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4604 || (TARGET_SSE && SSE_REGNO_P (regno)
4605 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4610 if (SSE_REGNO_P (regno) && TARGET_SSE)
4615 if (TARGET_SSE && SSE_REGNO_P (regno)
4616 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4620 /* TODO: The function should depend on current function ABI but
4621 builtins.c would need updating then. Therefore we use the
4624 /* RAX is used as hidden argument to va_arg functions. */
4625 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4628 if (ix86_abi == MS_ABI)
4629 parm_regs = x86_64_ms_abi_int_parameter_registers;
4631 parm_regs = x86_64_int_parameter_registers;
4632 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4633 : X86_64_REGPARM_MAX); i++)
4634 if (regno == parm_regs[i])
4639 /* Return if we do not know how to pass TYPE solely in registers. */
4642 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4644 if (must_pass_in_stack_var_size_or_pad (mode, type))
4647 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4648 The layout_type routine is crafty and tries to trick us into passing
4649 currently unsupported vector types on the stack by using TImode. */
4650 return (!TARGET_64BIT && mode == TImode
4651 && type && TREE_CODE (type) != VECTOR_TYPE);
4654 /* It returns the size, in bytes, of the area reserved for arguments passed
4655 in registers for the function represented by fndecl dependent to the used
4658 ix86_reg_parm_stack_space (const_tree fndecl)
4660 enum calling_abi call_abi = SYSV_ABI;
4661 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4662 call_abi = ix86_function_abi (fndecl);
4664 call_abi = ix86_function_type_abi (fndecl);
4665 if (call_abi == MS_ABI)
4670 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4673 ix86_function_type_abi (const_tree fntype)
4675 if (TARGET_64BIT && fntype != NULL)
4677 enum calling_abi abi = ix86_abi;
4678 if (abi == SYSV_ABI)
4680 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4683 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4690 static enum calling_abi
4691 ix86_function_abi (const_tree fndecl)
4695 return ix86_function_type_abi (TREE_TYPE (fndecl));
4698 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4701 ix86_cfun_abi (void)
4703 if (! cfun || ! TARGET_64BIT)
4705 return cfun->machine->call_abi;
4709 extern void init_regs (void);
4711 /* Implementation of call abi switching target hook. Specific to FNDECL
4712 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4713 for more details. */
4715 ix86_call_abi_override (const_tree fndecl)
4717 if (fndecl == NULL_TREE)
4718 cfun->machine->call_abi = ix86_abi;
4720 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4723 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4724 re-initialization of init_regs each time we switch function context since
4725 this is needed only during RTL expansion. */
4727 ix86_maybe_switch_abi (void)
4730 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4734 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4735 for a call to a function whose data type is FNTYPE.
4736 For a library call, FNTYPE is 0. */
4739 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4740 tree fntype, /* tree ptr for function decl */
4741 rtx libname, /* SYMBOL_REF of library name or 0 */
4744 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4745 memset (cum, 0, sizeof (*cum));
4748 cum->call_abi = ix86_function_abi (fndecl);
4750 cum->call_abi = ix86_function_type_abi (fntype);
4751 /* Set up the number of registers to use for passing arguments. */
4753 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4754 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4755 cum->nregs = ix86_regparm;
4758 if (cum->call_abi != ix86_abi)
4759 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4764 cum->sse_nregs = SSE_REGPARM_MAX;
4767 if (cum->call_abi != ix86_abi)
4768 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4769 : X64_SSE_REGPARM_MAX;
4773 cum->mmx_nregs = MMX_REGPARM_MAX;
4774 cum->warn_avx = true;
4775 cum->warn_sse = true;
4776 cum->warn_mmx = true;
4778 /* Because type might mismatch in between caller and callee, we need to
4779 use actual type of function for local calls.
4780 FIXME: cgraph_analyze can be told to actually record if function uses
4781 va_start so for local functions maybe_vaarg can be made aggressive
4783 FIXME: once typesytem is fixed, we won't need this code anymore. */
4785 fntype = TREE_TYPE (fndecl);
4786 cum->maybe_vaarg = (fntype
4787 ? (!prototype_p (fntype) || stdarg_p (fntype))
4792 /* If there are variable arguments, then we won't pass anything
4793 in registers in 32-bit mode. */
4794 if (stdarg_p (fntype))
4805 /* Use ecx and edx registers if function has fastcall attribute,
4806 else look for regparm information. */
4809 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4815 cum->nregs = ix86_function_regparm (fntype, fndecl);
4818 /* Set up the number of SSE registers used for passing SFmode
4819 and DFmode arguments. Warn for mismatching ABI. */
4820 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4824 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4825 But in the case of vector types, it is some vector mode.
4827 When we have only some of our vector isa extensions enabled, then there
4828 are some modes for which vector_mode_supported_p is false. For these
4829 modes, the generic vector support in gcc will choose some non-vector mode
4830 in order to implement the type. By computing the natural mode, we'll
4831 select the proper ABI location for the operand and not depend on whatever
4832 the middle-end decides to do with these vector types.
4834 The midde-end can't deal with the vector types > 16 bytes. In this
4835 case, we return the original mode and warn ABI change if CUM isn't
4838 static enum machine_mode
4839 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4841 enum machine_mode mode = TYPE_MODE (type);
4843 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4845 HOST_WIDE_INT size = int_size_in_bytes (type);
4846 if ((size == 8 || size == 16 || size == 32)
4847 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4848 && TYPE_VECTOR_SUBPARTS (type) > 1)
4850 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4852 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4853 mode = MIN_MODE_VECTOR_FLOAT;
4855 mode = MIN_MODE_VECTOR_INT;
4857 /* Get the mode which has this inner mode and number of units. */
4858 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4859 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4860 && GET_MODE_INNER (mode) == innermode)
4862 if (size == 32 && !TARGET_AVX)
4864 static bool warnedavx;
4871 warning (0, "AVX vector argument without AVX "
4872 "enabled changes the ABI");
4874 return TYPE_MODE (type);
4887 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4888 this may not agree with the mode that the type system has chosen for the
4889 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4890 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4893 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4898 if (orig_mode != BLKmode)
4899 tmp = gen_rtx_REG (orig_mode, regno);
4902 tmp = gen_rtx_REG (mode, regno);
4903 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4904 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4910 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4911 of this code is to classify each 8bytes of incoming argument by the register
4912 class and assign registers accordingly. */
4914 /* Return the union class of CLASS1 and CLASS2.
4915 See the x86-64 PS ABI for details. */
4917 static enum x86_64_reg_class
4918 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4920 /* Rule #1: If both classes are equal, this is the resulting class. */
4921 if (class1 == class2)
4924 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4926 if (class1 == X86_64_NO_CLASS)
4928 if (class2 == X86_64_NO_CLASS)
4931 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4932 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4933 return X86_64_MEMORY_CLASS;
4935 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4936 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4937 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4938 return X86_64_INTEGERSI_CLASS;
4939 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4940 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4941 return X86_64_INTEGER_CLASS;
4943 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4945 if (class1 == X86_64_X87_CLASS
4946 || class1 == X86_64_X87UP_CLASS
4947 || class1 == X86_64_COMPLEX_X87_CLASS
4948 || class2 == X86_64_X87_CLASS
4949 || class2 == X86_64_X87UP_CLASS
4950 || class2 == X86_64_COMPLEX_X87_CLASS)
4951 return X86_64_MEMORY_CLASS;
4953 /* Rule #6: Otherwise class SSE is used. */
4954 return X86_64_SSE_CLASS;
4957 /* Classify the argument of type TYPE and mode MODE.
4958 CLASSES will be filled by the register class used to pass each word
4959 of the operand. The number of words is returned. In case the parameter
4960 should be passed in memory, 0 is returned. As a special case for zero
4961 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4963 BIT_OFFSET is used internally for handling records and specifies offset
4964 of the offset in bits modulo 256 to avoid overflow cases.
4966 See the x86-64 PS ABI for details.
4970 classify_argument (enum machine_mode mode, const_tree type,
4971 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4973 HOST_WIDE_INT bytes =
4974 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4975 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4977 /* Variable sized entities are always passed/returned in memory. */
4981 if (mode != VOIDmode
4982 && targetm.calls.must_pass_in_stack (mode, type))
4985 if (type && AGGREGATE_TYPE_P (type))
4989 enum x86_64_reg_class subclasses[MAX_CLASSES];
4991 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4995 for (i = 0; i < words; i++)
4996 classes[i] = X86_64_NO_CLASS;
4998 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4999 signalize memory class, so handle it as special case. */
5002 classes[0] = X86_64_NO_CLASS;
5006 /* Classify each field of record and merge classes. */
5007 switch (TREE_CODE (type))
5010 /* And now merge the fields of structure. */
5011 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5013 if (TREE_CODE (field) == FIELD_DECL)
5017 if (TREE_TYPE (field) == error_mark_node)
5020 /* Bitfields are always classified as integer. Handle them
5021 early, since later code would consider them to be
5022 misaligned integers. */
5023 if (DECL_BIT_FIELD (field))
5025 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5026 i < ((int_bit_position (field) + (bit_offset % 64))
5027 + tree_low_cst (DECL_SIZE (field), 0)
5030 merge_classes (X86_64_INTEGER_CLASS,
5037 type = TREE_TYPE (field);
5039 /* Flexible array member is ignored. */
5040 if (TYPE_MODE (type) == BLKmode
5041 && TREE_CODE (type) == ARRAY_TYPE
5042 && TYPE_SIZE (type) == NULL_TREE
5043 && TYPE_DOMAIN (type) != NULL_TREE
5044 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5049 if (!warned && warn_psabi)
5052 inform (input_location,
5053 "The ABI of passing struct with"
5054 " a flexible array member has"
5055 " changed in GCC 4.4");
5059 num = classify_argument (TYPE_MODE (type), type,
5061 (int_bit_position (field)
5062 + bit_offset) % 256);
5065 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5066 for (i = 0; i < num && (i + pos) < words; i++)
5068 merge_classes (subclasses[i], classes[i + pos]);
5075 /* Arrays are handled as small records. */
5078 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5079 TREE_TYPE (type), subclasses, bit_offset);
5083 /* The partial classes are now full classes. */
5084 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5085 subclasses[0] = X86_64_SSE_CLASS;
5086 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5087 && !((bit_offset % 64) == 0 && bytes == 4))
5088 subclasses[0] = X86_64_INTEGER_CLASS;
5090 for (i = 0; i < words; i++)
5091 classes[i] = subclasses[i % num];
5096 case QUAL_UNION_TYPE:
5097 /* Unions are similar to RECORD_TYPE but offset is always 0.
5099 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5101 if (TREE_CODE (field) == FIELD_DECL)
5105 if (TREE_TYPE (field) == error_mark_node)
5108 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5109 TREE_TYPE (field), subclasses,
5113 for (i = 0; i < num; i++)
5114 classes[i] = merge_classes (subclasses[i], classes[i]);
5125 /* When size > 16 bytes, if the first one isn't
5126 X86_64_SSE_CLASS or any other ones aren't
5127 X86_64_SSEUP_CLASS, everything should be passed in
5129 if (classes[0] != X86_64_SSE_CLASS)
5132 for (i = 1; i < words; i++)
5133 if (classes[i] != X86_64_SSEUP_CLASS)
5137 /* Final merger cleanup. */
5138 for (i = 0; i < words; i++)
5140 /* If one class is MEMORY, everything should be passed in
5142 if (classes[i] == X86_64_MEMORY_CLASS)
5145 /* The X86_64_SSEUP_CLASS should be always preceded by
5146 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5147 if (classes[i] == X86_64_SSEUP_CLASS
5148 && classes[i - 1] != X86_64_SSE_CLASS
5149 && classes[i - 1] != X86_64_SSEUP_CLASS)
5151 /* The first one should never be X86_64_SSEUP_CLASS. */
5152 gcc_assert (i != 0);
5153 classes[i] = X86_64_SSE_CLASS;
5156 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5157 everything should be passed in memory. */
5158 if (classes[i] == X86_64_X87UP_CLASS
5159 && (classes[i - 1] != X86_64_X87_CLASS))
5163 /* The first one should never be X86_64_X87UP_CLASS. */
5164 gcc_assert (i != 0);
5165 if (!warned && warn_psabi)
5168 inform (input_location,
5169 "The ABI of passing union with long double"
5170 " has changed in GCC 4.4");
5178 /* Compute alignment needed. We align all types to natural boundaries with
5179 exception of XFmode that is aligned to 64bits. */
5180 if (mode != VOIDmode && mode != BLKmode)
5182 int mode_alignment = GET_MODE_BITSIZE (mode);
5185 mode_alignment = 128;
5186 else if (mode == XCmode)
5187 mode_alignment = 256;
5188 if (COMPLEX_MODE_P (mode))
5189 mode_alignment /= 2;
5190 /* Misaligned fields are always returned in memory. */
5191 if (bit_offset % mode_alignment)
5195 /* for V1xx modes, just use the base mode */
5196 if (VECTOR_MODE_P (mode) && mode != V1DImode
5197 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5198 mode = GET_MODE_INNER (mode);
5200 /* Classification of atomic types. */
5205 classes[0] = X86_64_SSE_CLASS;
5208 classes[0] = X86_64_SSE_CLASS;
5209 classes[1] = X86_64_SSEUP_CLASS;
5219 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5223 classes[0] = X86_64_INTEGERSI_CLASS;
5226 else if (size <= 64)
5228 classes[0] = X86_64_INTEGER_CLASS;
5231 else if (size <= 64+32)
5233 classes[0] = X86_64_INTEGER_CLASS;
5234 classes[1] = X86_64_INTEGERSI_CLASS;
5237 else if (size <= 64+64)
5239 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5247 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5251 /* OImode shouldn't be used directly. */
5256 if (!(bit_offset % 64))
5257 classes[0] = X86_64_SSESF_CLASS;
5259 classes[0] = X86_64_SSE_CLASS;
5262 classes[0] = X86_64_SSEDF_CLASS;
5265 classes[0] = X86_64_X87_CLASS;
5266 classes[1] = X86_64_X87UP_CLASS;
5269 classes[0] = X86_64_SSE_CLASS;
5270 classes[1] = X86_64_SSEUP_CLASS;
5273 classes[0] = X86_64_SSE_CLASS;
5274 if (!(bit_offset % 64))
5280 if (!warned && warn_psabi)
5283 inform (input_location,
5284 "The ABI of passing structure with complex float"
5285 " member has changed in GCC 4.4");
5287 classes[1] = X86_64_SSESF_CLASS;
5291 classes[0] = X86_64_SSEDF_CLASS;
5292 classes[1] = X86_64_SSEDF_CLASS;
5295 classes[0] = X86_64_COMPLEX_X87_CLASS;
5298 /* This modes is larger than 16 bytes. */
5306 classes[0] = X86_64_SSE_CLASS;
5307 classes[1] = X86_64_SSEUP_CLASS;
5308 classes[2] = X86_64_SSEUP_CLASS;
5309 classes[3] = X86_64_SSEUP_CLASS;
5317 classes[0] = X86_64_SSE_CLASS;
5318 classes[1] = X86_64_SSEUP_CLASS;
5325 classes[0] = X86_64_SSE_CLASS;
5331 gcc_assert (VECTOR_MODE_P (mode));
5336 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5338 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5339 classes[0] = X86_64_INTEGERSI_CLASS;
5341 classes[0] = X86_64_INTEGER_CLASS;
5342 classes[1] = X86_64_INTEGER_CLASS;
5343 return 1 + (bytes > 8);
5347 /* Examine the argument and return set number of register required in each
5348 class. Return 0 iff parameter should be passed in memory. */
5350 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5351 int *int_nregs, int *sse_nregs)
5353 enum x86_64_reg_class regclass[MAX_CLASSES];
5354 int n = classify_argument (mode, type, regclass, 0);
5360 for (n--; n >= 0; n--)
5361 switch (regclass[n])
5363 case X86_64_INTEGER_CLASS:
5364 case X86_64_INTEGERSI_CLASS:
5367 case X86_64_SSE_CLASS:
5368 case X86_64_SSESF_CLASS:
5369 case X86_64_SSEDF_CLASS:
5372 case X86_64_NO_CLASS:
5373 case X86_64_SSEUP_CLASS:
5375 case X86_64_X87_CLASS:
5376 case X86_64_X87UP_CLASS:
5380 case X86_64_COMPLEX_X87_CLASS:
5381 return in_return ? 2 : 0;
5382 case X86_64_MEMORY_CLASS:
5388 /* Construct container for the argument used by GCC interface. See
5389 FUNCTION_ARG for the detailed description. */
5392 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5393 const_tree type, int in_return, int nintregs, int nsseregs,
5394 const int *intreg, int sse_regno)
5396 /* The following variables hold the static issued_error state. */
5397 static bool issued_sse_arg_error;
5398 static bool issued_sse_ret_error;
5399 static bool issued_x87_ret_error;
5401 enum machine_mode tmpmode;
5403 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5404 enum x86_64_reg_class regclass[MAX_CLASSES];
5408 int needed_sseregs, needed_intregs;
5409 rtx exp[MAX_CLASSES];
5412 n = classify_argument (mode, type, regclass, 0);
5415 if (!examine_argument (mode, type, in_return, &needed_intregs,
5418 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5421 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5422 some less clueful developer tries to use floating-point anyway. */
5423 if (needed_sseregs && !TARGET_SSE)
5427 if (!issued_sse_ret_error)
5429 error ("SSE register return with SSE disabled");
5430 issued_sse_ret_error = true;
5433 else if (!issued_sse_arg_error)
5435 error ("SSE register argument with SSE disabled");
5436 issued_sse_arg_error = true;
5441 /* Likewise, error if the ABI requires us to return values in the
5442 x87 registers and the user specified -mno-80387. */
5443 if (!TARGET_80387 && in_return)
5444 for (i = 0; i < n; i++)
5445 if (regclass[i] == X86_64_X87_CLASS
5446 || regclass[i] == X86_64_X87UP_CLASS
5447 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5449 if (!issued_x87_ret_error)
5451 error ("x87 register return with x87 disabled");
5452 issued_x87_ret_error = true;
5457 /* First construct simple cases. Avoid SCmode, since we want to use
5458 single register to pass this type. */
5459 if (n == 1 && mode != SCmode)
5460 switch (regclass[0])
5462 case X86_64_INTEGER_CLASS:
5463 case X86_64_INTEGERSI_CLASS:
5464 return gen_rtx_REG (mode, intreg[0]);
5465 case X86_64_SSE_CLASS:
5466 case X86_64_SSESF_CLASS:
5467 case X86_64_SSEDF_CLASS:
5468 if (mode != BLKmode)
5469 return gen_reg_or_parallel (mode, orig_mode,
5470 SSE_REGNO (sse_regno));
5472 case X86_64_X87_CLASS:
5473 case X86_64_COMPLEX_X87_CLASS:
5474 return gen_rtx_REG (mode, FIRST_STACK_REG);
5475 case X86_64_NO_CLASS:
5476 /* Zero sized array, struct or class. */
5481 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5482 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5483 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5485 && regclass[0] == X86_64_SSE_CLASS
5486 && regclass[1] == X86_64_SSEUP_CLASS
5487 && regclass[2] == X86_64_SSEUP_CLASS
5488 && regclass[3] == X86_64_SSEUP_CLASS
5490 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5493 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5494 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5495 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5496 && regclass[1] == X86_64_INTEGER_CLASS
5497 && (mode == CDImode || mode == TImode || mode == TFmode)
5498 && intreg[0] + 1 == intreg[1])
5499 return gen_rtx_REG (mode, intreg[0]);
5501 /* Otherwise figure out the entries of the PARALLEL. */
5502 for (i = 0; i < n; i++)
5506 switch (regclass[i])
5508 case X86_64_NO_CLASS:
5510 case X86_64_INTEGER_CLASS:
5511 case X86_64_INTEGERSI_CLASS:
5512 /* Merge TImodes on aligned occasions here too. */
5513 if (i * 8 + 8 > bytes)
5514 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5515 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5519 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5520 if (tmpmode == BLKmode)
5522 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5523 gen_rtx_REG (tmpmode, *intreg),
5527 case X86_64_SSESF_CLASS:
5528 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5529 gen_rtx_REG (SFmode,
5530 SSE_REGNO (sse_regno)),
5534 case X86_64_SSEDF_CLASS:
5535 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5536 gen_rtx_REG (DFmode,
5537 SSE_REGNO (sse_regno)),
5541 case X86_64_SSE_CLASS:
5549 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5559 && regclass[1] == X86_64_SSEUP_CLASS
5560 && regclass[2] == X86_64_SSEUP_CLASS
5561 && regclass[3] == X86_64_SSEUP_CLASS);
5568 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5569 gen_rtx_REG (tmpmode,
5570 SSE_REGNO (sse_regno)),
5579 /* Empty aligned struct, union or class. */
5583 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5584 for (i = 0; i < nexps; i++)
5585 XVECEXP (ret, 0, i) = exp [i];
5589 /* Update the data in CUM to advance over an argument of mode MODE
5590 and data type TYPE. (TYPE is null for libcalls where that information
5591 may not be available.) */
5594 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5595 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5611 cum->words += words;
5612 cum->nregs -= words;
5613 cum->regno += words;
5615 if (cum->nregs <= 0)
5623 /* OImode shouldn't be used directly. */
5627 if (cum->float_in_sse < 2)
5630 if (cum->float_in_sse < 1)
5647 if (!type || !AGGREGATE_TYPE_P (type))
5649 cum->sse_words += words;
5650 cum->sse_nregs -= 1;
5651 cum->sse_regno += 1;
5652 if (cum->sse_nregs <= 0)
5665 if (!type || !AGGREGATE_TYPE_P (type))
5667 cum->mmx_words += words;
5668 cum->mmx_nregs -= 1;
5669 cum->mmx_regno += 1;
5670 if (cum->mmx_nregs <= 0)
5681 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5682 tree type, HOST_WIDE_INT words, int named)
5684 int int_nregs, sse_nregs;
5686 /* Unnamed 256bit vector mode parameters are passed on stack. */
5687 if (!named && VALID_AVX256_REG_MODE (mode))
5690 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5691 cum->words += words;
5692 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5694 cum->nregs -= int_nregs;
5695 cum->sse_nregs -= sse_nregs;
5696 cum->regno += int_nregs;
5697 cum->sse_regno += sse_nregs;
5700 cum->words += words;
5704 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5705 HOST_WIDE_INT words)
5707 /* Otherwise, this should be passed indirect. */
5708 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5710 cum->words += words;
5719 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5720 tree type, int named)
5722 HOST_WIDE_INT bytes, words;
5724 if (mode == BLKmode)
5725 bytes = int_size_in_bytes (type);
5727 bytes = GET_MODE_SIZE (mode);
5728 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5731 mode = type_natural_mode (type, NULL);
5733 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5734 function_arg_advance_ms_64 (cum, bytes, words);
5735 else if (TARGET_64BIT)
5736 function_arg_advance_64 (cum, mode, type, words, named);
5738 function_arg_advance_32 (cum, mode, type, bytes, words);
5741 /* Define where to put the arguments to a function.
5742 Value is zero to push the argument on the stack,
5743 or a hard register in which to store the argument.
5745 MODE is the argument's machine mode.
5746 TYPE is the data type of the argument (as a tree).
5747 This is null for libcalls where that information may
5749 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5750 the preceding args and about the function being called.
5751 NAMED is nonzero if this argument is a named parameter
5752 (otherwise it is an extra parameter matching an ellipsis). */
5755 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5756 enum machine_mode orig_mode, tree type,
5757 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5759 static bool warnedsse, warnedmmx;
5761 /* Avoid the AL settings for the Unix64 ABI. */
5762 if (mode == VOIDmode)
5778 if (words <= cum->nregs)
5780 int regno = cum->regno;
5782 /* Fastcall allocates the first two DWORD (SImode) or
5783 smaller arguments to ECX and EDX if it isn't an
5789 || (type && AGGREGATE_TYPE_P (type)))
5792 /* ECX not EAX is the first allocated register. */
5793 if (regno == AX_REG)
5796 return gen_rtx_REG (mode, regno);
5801 if (cum->float_in_sse < 2)
5804 if (cum->float_in_sse < 1)
5808 /* In 32bit, we pass TImode in xmm registers. */
5815 if (!type || !AGGREGATE_TYPE_P (type))
5817 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5820 warning (0, "SSE vector argument without SSE enabled "
5824 return gen_reg_or_parallel (mode, orig_mode,
5825 cum->sse_regno + FIRST_SSE_REG);
5830 /* OImode shouldn't be used directly. */
5839 if (!type || !AGGREGATE_TYPE_P (type))
5842 return gen_reg_or_parallel (mode, orig_mode,
5843 cum->sse_regno + FIRST_SSE_REG);
5852 if (!type || !AGGREGATE_TYPE_P (type))
5854 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5857 warning (0, "MMX vector argument without MMX enabled "
5861 return gen_reg_or_parallel (mode, orig_mode,
5862 cum->mmx_regno + FIRST_MMX_REG);
5871 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5872 enum machine_mode orig_mode, tree type, int named)
5874 /* Handle a hidden AL argument containing number of registers
5875 for varargs x86-64 functions. */
5876 if (mode == VOIDmode)
5877 return GEN_INT (cum->maybe_vaarg
5878 ? (cum->sse_nregs < 0
5879 ? (cum->call_abi == ix86_abi
5881 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5882 : X64_SSE_REGPARM_MAX))
5897 /* Unnamed 256bit vector mode parameters are passed on stack. */
5903 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5905 &x86_64_int_parameter_registers [cum->regno],
5910 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5911 enum machine_mode orig_mode, int named,
5912 HOST_WIDE_INT bytes)
5916 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5917 We use value of -2 to specify that current function call is MSABI. */
5918 if (mode == VOIDmode)
5919 return GEN_INT (-2);
5921 /* If we've run out of registers, it goes on the stack. */
5922 if (cum->nregs == 0)
5925 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5927 /* Only floating point modes are passed in anything but integer regs. */
5928 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5931 regno = cum->regno + FIRST_SSE_REG;
5936 /* Unnamed floating parameters are passed in both the
5937 SSE and integer registers. */
5938 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5939 t2 = gen_rtx_REG (mode, regno);
5940 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5941 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5942 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5945 /* Handle aggregated types passed in register. */
5946 if (orig_mode == BLKmode)
5948 if (bytes > 0 && bytes <= 8)
5949 mode = (bytes > 4 ? DImode : SImode);
5950 if (mode == BLKmode)
5954 return gen_reg_or_parallel (mode, orig_mode, regno);
5958 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5959 tree type, int named)
5961 enum machine_mode mode = omode;
5962 HOST_WIDE_INT bytes, words;
5964 if (mode == BLKmode)
5965 bytes = int_size_in_bytes (type);
5967 bytes = GET_MODE_SIZE (mode);
5968 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5970 /* To simplify the code below, represent vector types with a vector mode
5971 even if MMX/SSE are not active. */
5972 if (type && TREE_CODE (type) == VECTOR_TYPE)
5973 mode = type_natural_mode (type, cum);
5975 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5976 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5977 else if (TARGET_64BIT)
5978 return function_arg_64 (cum, mode, omode, type, named);
5980 return function_arg_32 (cum, mode, omode, type, bytes, words);
5983 /* A C expression that indicates when an argument must be passed by
5984 reference. If nonzero for an argument, a copy of that argument is
5985 made in memory and a pointer to the argument is passed instead of
5986 the argument itself. The pointer is passed in whatever way is
5987 appropriate for passing a pointer to that type. */
5990 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5991 enum machine_mode mode ATTRIBUTE_UNUSED,
5992 const_tree type, bool named ATTRIBUTE_UNUSED)
5994 /* See Windows x64 Software Convention. */
5995 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5997 int msize = (int) GET_MODE_SIZE (mode);
6000 /* Arrays are passed by reference. */
6001 if (TREE_CODE (type) == ARRAY_TYPE)
6004 if (AGGREGATE_TYPE_P (type))
6006 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6007 are passed by reference. */
6008 msize = int_size_in_bytes (type);
6012 /* __m128 is passed by reference. */
6014 case 1: case 2: case 4: case 8:
6020 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6026 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6029 contains_aligned_value_p (tree type)
6031 enum machine_mode mode = TYPE_MODE (type);
6032 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6036 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6038 if (TYPE_ALIGN (type) < 128)
6041 if (AGGREGATE_TYPE_P (type))
6043 /* Walk the aggregates recursively. */
6044 switch (TREE_CODE (type))
6048 case QUAL_UNION_TYPE:
6052 /* Walk all the structure fields. */
6053 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6055 if (TREE_CODE (field) == FIELD_DECL
6056 && contains_aligned_value_p (TREE_TYPE (field)))
6063 /* Just for use if some languages passes arrays by value. */
6064 if (contains_aligned_value_p (TREE_TYPE (type)))
6075 /* Gives the alignment boundary, in bits, of an argument with the
6076 specified mode and type. */
6079 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6084 /* Since canonical type is used for call, we convert it to
6085 canonical type if needed. */
6086 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6087 type = TYPE_CANONICAL (type);
6088 align = TYPE_ALIGN (type);
6091 align = GET_MODE_ALIGNMENT (mode);
6092 if (align < PARM_BOUNDARY)
6093 align = PARM_BOUNDARY;
6094 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6095 natural boundaries. */
6096 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6098 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6099 make an exception for SSE modes since these require 128bit
6102 The handling here differs from field_alignment. ICC aligns MMX
6103 arguments to 4 byte boundaries, while structure fields are aligned
6104 to 8 byte boundaries. */
6107 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6108 align = PARM_BOUNDARY;
6112 if (!contains_aligned_value_p (type))
6113 align = PARM_BOUNDARY;
6116 if (align > BIGGEST_ALIGNMENT)
6117 align = BIGGEST_ALIGNMENT;
6121 /* Return true if N is a possible register number of function value. */
6124 ix86_function_value_regno_p (int regno)
6131 case FIRST_FLOAT_REG:
6132 /* TODO: The function should depend on current function ABI but
6133 builtins.c would need updating then. Therefore we use the
6135 if (TARGET_64BIT && ix86_abi == MS_ABI)
6137 return TARGET_FLOAT_RETURNS_IN_80387;
6143 if (TARGET_MACHO || TARGET_64BIT)
6151 /* Define how to find the value returned by a function.
6152 VALTYPE is the data type of the value (as a tree).
6153 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6154 otherwise, FUNC is 0. */
6157 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6158 const_tree fntype, const_tree fn)
6162 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6163 we normally prevent this case when mmx is not available. However
6164 some ABIs may require the result to be returned like DImode. */
6165 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6166 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6168 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6169 we prevent this case when sse is not available. However some ABIs
6170 may require the result to be returned like integer TImode. */
6171 else if (mode == TImode
6172 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6173 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6175 /* 32-byte vector modes in %ymm0. */
6176 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6177 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6179 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6180 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6181 regno = FIRST_FLOAT_REG;
6183 /* Most things go in %eax. */
6186 /* Override FP return register with %xmm0 for local functions when
6187 SSE math is enabled or for functions with sseregparm attribute. */
6188 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6190 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6191 if ((sse_level >= 1 && mode == SFmode)
6192 || (sse_level == 2 && mode == DFmode))
6193 regno = FIRST_SSE_REG;
6196 /* OImode shouldn't be used directly. */
6197 gcc_assert (mode != OImode);
6199 return gen_rtx_REG (orig_mode, regno);
6203 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6208 /* Handle libcalls, which don't provide a type node. */
6209 if (valtype == NULL)
6221 return gen_rtx_REG (mode, FIRST_SSE_REG);
6224 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6228 return gen_rtx_REG (mode, AX_REG);
6232 ret = construct_container (mode, orig_mode, valtype, 1,
6233 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6234 x86_64_int_return_registers, 0);
6236 /* For zero sized structures, construct_container returns NULL, but we
6237 need to keep rest of compiler happy by returning meaningful value. */
6239 ret = gen_rtx_REG (orig_mode, AX_REG);
6245 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6247 unsigned int regno = AX_REG;
6251 switch (GET_MODE_SIZE (mode))
6254 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6255 && !COMPLEX_MODE_P (mode))
6256 regno = FIRST_SSE_REG;
6260 if (mode == SFmode || mode == DFmode)
6261 regno = FIRST_SSE_REG;
6267 return gen_rtx_REG (orig_mode, regno);
6271 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6272 enum machine_mode orig_mode, enum machine_mode mode)
6274 const_tree fn, fntype;
6277 if (fntype_or_decl && DECL_P (fntype_or_decl))
6278 fn = fntype_or_decl;
6279 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6281 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6282 return function_value_ms_64 (orig_mode, mode);
6283 else if (TARGET_64BIT)
6284 return function_value_64 (orig_mode, mode, valtype);
6286 return function_value_32 (orig_mode, mode, fntype, fn);
6290 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6291 bool outgoing ATTRIBUTE_UNUSED)
6293 enum machine_mode mode, orig_mode;
6295 orig_mode = TYPE_MODE (valtype);
6296 mode = type_natural_mode (valtype, NULL);
6297 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6301 ix86_libcall_value (enum machine_mode mode)
6303 return ix86_function_value_1 (NULL, NULL, mode, mode);
6306 /* Return true iff type is returned in memory. */
6308 static int ATTRIBUTE_UNUSED
6309 return_in_memory_32 (const_tree type, enum machine_mode mode)
6313 if (mode == BLKmode)
6316 size = int_size_in_bytes (type);
6318 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6321 if (VECTOR_MODE_P (mode) || mode == TImode)
6323 /* User-created vectors small enough to fit in EAX. */
6327 /* MMX/3dNow values are returned in MM0,
6328 except when it doesn't exits. */
6330 return (TARGET_MMX ? 0 : 1);
6332 /* SSE values are returned in XMM0, except when it doesn't exist. */
6334 return (TARGET_SSE ? 0 : 1);
6336 /* AVX values are returned in YMM0, except when it doesn't exist. */
6338 return TARGET_AVX ? 0 : 1;
6347 /* OImode shouldn't be used directly. */
6348 gcc_assert (mode != OImode);
6353 static int ATTRIBUTE_UNUSED
6354 return_in_memory_64 (const_tree type, enum machine_mode mode)
6356 int needed_intregs, needed_sseregs;
6357 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6360 static int ATTRIBUTE_UNUSED
6361 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6363 HOST_WIDE_INT size = int_size_in_bytes (type);
6365 /* __m128 is returned in xmm0. */
6366 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6367 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6370 /* Otherwise, the size must be exactly in [1248]. */
6371 return (size != 1 && size != 2 && size != 4 && size != 8);
6375 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6377 #ifdef SUBTARGET_RETURN_IN_MEMORY
6378 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6380 const enum machine_mode mode = type_natural_mode (type, NULL);
6384 if (ix86_function_type_abi (fntype) == MS_ABI)
6385 return return_in_memory_ms_64 (type, mode);
6387 return return_in_memory_64 (type, mode);
6390 return return_in_memory_32 (type, mode);
6394 /* Return false iff TYPE is returned in memory. This version is used
6395 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6396 but differs notably in that when MMX is available, 8-byte vectors
6397 are returned in memory, rather than in MMX registers. */
6400 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6403 enum machine_mode mode = type_natural_mode (type, NULL);
6406 return return_in_memory_64 (type, mode);
6408 if (mode == BLKmode)
6411 size = int_size_in_bytes (type);
6413 if (VECTOR_MODE_P (mode))
6415 /* Return in memory only if MMX registers *are* available. This
6416 seems backwards, but it is consistent with the existing
6423 else if (mode == TImode)
6425 else if (mode == XFmode)
6431 /* When returning SSE vector types, we have a choice of either
6432 (1) being abi incompatible with a -march switch, or
6433 (2) generating an error.
6434 Given no good solution, I think the safest thing is one warning.
6435 The user won't be able to use -Werror, but....
6437 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6438 called in response to actually generating a caller or callee that
6439 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6440 via aggregate_value_p for general type probing from tree-ssa. */
6443 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6445 static bool warnedsse, warnedmmx;
6447 if (!TARGET_64BIT && type)
6449 /* Look at the return type of the function, not the function type. */
6450 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6452 if (!TARGET_SSE && !warnedsse)
6455 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6458 warning (0, "SSE vector return without SSE enabled "
6463 if (!TARGET_MMX && !warnedmmx)
6465 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6468 warning (0, "MMX vector return without MMX enabled "
6478 /* Create the va_list data type. */
6480 /* Returns the calling convention specific va_list date type.
6481 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6484 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6486 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6488 /* For i386 we use plain pointer to argument area. */
6489 if (!TARGET_64BIT || abi == MS_ABI)
6490 return build_pointer_type (char_type_node);
6492 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6493 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6495 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6496 unsigned_type_node);
6497 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6498 unsigned_type_node);
6499 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6501 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6504 va_list_gpr_counter_field = f_gpr;
6505 va_list_fpr_counter_field = f_fpr;
6507 DECL_FIELD_CONTEXT (f_gpr) = record;
6508 DECL_FIELD_CONTEXT (f_fpr) = record;
6509 DECL_FIELD_CONTEXT (f_ovf) = record;
6510 DECL_FIELD_CONTEXT (f_sav) = record;
6512 TREE_CHAIN (record) = type_decl;
6513 TYPE_NAME (record) = type_decl;
6514 TYPE_FIELDS (record) = f_gpr;
6515 TREE_CHAIN (f_gpr) = f_fpr;
6516 TREE_CHAIN (f_fpr) = f_ovf;
6517 TREE_CHAIN (f_ovf) = f_sav;
6519 layout_type (record);
6521 /* The correct type is an array type of one element. */
6522 return build_array_type (record, build_index_type (size_zero_node));
6525 /* Setup the builtin va_list data type and for 64-bit the additional
6526 calling convention specific va_list data types. */
6529 ix86_build_builtin_va_list (void)
6531 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6533 /* Initialize abi specific va_list builtin types. */
6537 if (ix86_abi == MS_ABI)
6539 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6540 if (TREE_CODE (t) != RECORD_TYPE)
6541 t = build_variant_type_copy (t);
6542 sysv_va_list_type_node = t;
6547 if (TREE_CODE (t) != RECORD_TYPE)
6548 t = build_variant_type_copy (t);
6549 sysv_va_list_type_node = t;
6551 if (ix86_abi != MS_ABI)
6553 t = ix86_build_builtin_va_list_abi (MS_ABI);
6554 if (TREE_CODE (t) != RECORD_TYPE)
6555 t = build_variant_type_copy (t);
6556 ms_va_list_type_node = t;
6561 if (TREE_CODE (t) != RECORD_TYPE)
6562 t = build_variant_type_copy (t);
6563 ms_va_list_type_node = t;
6570 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6573 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6582 int regparm = ix86_regparm;
6584 if (cum->call_abi != ix86_abi)
6585 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6587 /* GPR size of varargs save area. */
6588 if (cfun->va_list_gpr_size)
6589 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6591 ix86_varargs_gpr_size = 0;
6593 /* FPR size of varargs save area. We don't need it if we don't pass
6594 anything in SSE registers. */
6595 if (cum->sse_nregs && cfun->va_list_fpr_size)
6596 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6598 ix86_varargs_fpr_size = 0;
6600 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6603 save_area = frame_pointer_rtx;
6604 set = get_varargs_alias_set ();
6606 for (i = cum->regno;
6608 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6611 mem = gen_rtx_MEM (Pmode,
6612 plus_constant (save_area, i * UNITS_PER_WORD));
6613 MEM_NOTRAP_P (mem) = 1;
6614 set_mem_alias_set (mem, set);
6615 emit_move_insn (mem, gen_rtx_REG (Pmode,
6616 x86_64_int_parameter_registers[i]));
6619 if (ix86_varargs_fpr_size)
6621 /* Now emit code to save SSE registers. The AX parameter contains number
6622 of SSE parameter registers used to call this function. We use
6623 sse_prologue_save insn template that produces computed jump across
6624 SSE saves. We need some preparation work to get this working. */
6626 label = gen_label_rtx ();
6627 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6629 /* Compute address to jump to :
6630 label - eax*4 + nnamed_sse_arguments*4 Or
6631 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6632 tmp_reg = gen_reg_rtx (Pmode);
6633 nsse_reg = gen_reg_rtx (Pmode);
6634 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6635 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6636 gen_rtx_MULT (Pmode, nsse_reg,
6639 /* vmovaps is one byte longer than movaps. */
6641 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6642 gen_rtx_PLUS (Pmode, tmp_reg,
6648 gen_rtx_CONST (DImode,
6649 gen_rtx_PLUS (DImode,
6651 GEN_INT (cum->sse_regno
6652 * (TARGET_AVX ? 5 : 4)))));
6654 emit_move_insn (nsse_reg, label_ref);
6655 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6657 /* Compute address of memory block we save into. We always use pointer
6658 pointing 127 bytes after first byte to store - this is needed to keep
6659 instruction size limited by 4 bytes (5 bytes for AVX) with one
6660 byte displacement. */
6661 tmp_reg = gen_reg_rtx (Pmode);
6662 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6663 plus_constant (save_area,
6664 ix86_varargs_gpr_size + 127)));
6665 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6666 MEM_NOTRAP_P (mem) = 1;
6667 set_mem_alias_set (mem, set);
6668 set_mem_align (mem, BITS_PER_WORD);
6670 /* And finally do the dirty job! */
6671 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6672 GEN_INT (cum->sse_regno), label));
6677 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6679 alias_set_type set = get_varargs_alias_set ();
6682 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6686 mem = gen_rtx_MEM (Pmode,
6687 plus_constant (virtual_incoming_args_rtx,
6688 i * UNITS_PER_WORD));
6689 MEM_NOTRAP_P (mem) = 1;
6690 set_mem_alias_set (mem, set);
6692 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6693 emit_move_insn (mem, reg);
6698 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6699 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6702 CUMULATIVE_ARGS next_cum;
6705 /* This argument doesn't appear to be used anymore. Which is good,
6706 because the old code here didn't suppress rtl generation. */
6707 gcc_assert (!no_rtl);
6712 fntype = TREE_TYPE (current_function_decl);
6714 /* For varargs, we do not want to skip the dummy va_dcl argument.
6715 For stdargs, we do want to skip the last named argument. */
6717 if (stdarg_p (fntype))
6718 function_arg_advance (&next_cum, mode, type, 1);
6720 if (cum->call_abi == MS_ABI)
6721 setup_incoming_varargs_ms_64 (&next_cum);
6723 setup_incoming_varargs_64 (&next_cum);
6726 /* Checks if TYPE is of kind va_list char *. */
6729 is_va_list_char_pointer (tree type)
6733 /* For 32-bit it is always true. */
6736 canonic = ix86_canonical_va_list_type (type);
6737 return (canonic == ms_va_list_type_node
6738 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6741 /* Implement va_start. */
6744 ix86_va_start (tree valist, rtx nextarg)
6746 HOST_WIDE_INT words, n_gpr, n_fpr;
6747 tree f_gpr, f_fpr, f_ovf, f_sav;
6748 tree gpr, fpr, ovf, sav, t;
6751 /* Only 64bit target needs something special. */
6752 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6754 std_expand_builtin_va_start (valist, nextarg);
6758 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6759 f_fpr = TREE_CHAIN (f_gpr);
6760 f_ovf = TREE_CHAIN (f_fpr);
6761 f_sav = TREE_CHAIN (f_ovf);
6763 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6764 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6765 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6766 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6767 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6769 /* Count number of gp and fp argument registers used. */
6770 words = crtl->args.info.words;
6771 n_gpr = crtl->args.info.regno;
6772 n_fpr = crtl->args.info.sse_regno;
6774 if (cfun->va_list_gpr_size)
6776 type = TREE_TYPE (gpr);
6777 t = build2 (MODIFY_EXPR, type,
6778 gpr, build_int_cst (type, n_gpr * 8));
6779 TREE_SIDE_EFFECTS (t) = 1;
6780 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6783 if (TARGET_SSE && cfun->va_list_fpr_size)
6785 type = TREE_TYPE (fpr);
6786 t = build2 (MODIFY_EXPR, type, fpr,
6787 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6788 TREE_SIDE_EFFECTS (t) = 1;
6789 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6792 /* Find the overflow area. */
6793 type = TREE_TYPE (ovf);
6794 t = make_tree (type, crtl->args.internal_arg_pointer);
6796 t = build2 (POINTER_PLUS_EXPR, type, t,
6797 size_int (words * UNITS_PER_WORD));
6798 t = build2 (MODIFY_EXPR, type, ovf, t);
6799 TREE_SIDE_EFFECTS (t) = 1;
6800 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6802 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6804 /* Find the register save area.
6805 Prologue of the function save it right above stack frame. */
6806 type = TREE_TYPE (sav);
6807 t = make_tree (type, frame_pointer_rtx);
6808 if (!ix86_varargs_gpr_size)
6809 t = build2 (POINTER_PLUS_EXPR, type, t,
6810 size_int (-8 * X86_64_REGPARM_MAX));
6811 t = build2 (MODIFY_EXPR, type, sav, t);
6812 TREE_SIDE_EFFECTS (t) = 1;
6813 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6817 /* Implement va_arg. */
6820 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6823 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6824 tree f_gpr, f_fpr, f_ovf, f_sav;
6825 tree gpr, fpr, ovf, sav, t;
6827 tree lab_false, lab_over = NULL_TREE;
6832 enum machine_mode nat_mode;
6835 /* Only 64bit target needs something special. */
6836 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6837 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6839 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6840 f_fpr = TREE_CHAIN (f_gpr);
6841 f_ovf = TREE_CHAIN (f_fpr);
6842 f_sav = TREE_CHAIN (f_ovf);
6844 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6845 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6846 valist = build_va_arg_indirect_ref (valist);
6847 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6848 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6849 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6851 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6853 type = build_pointer_type (type);
6854 size = int_size_in_bytes (type);
6855 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6857 nat_mode = type_natural_mode (type, NULL);
6866 /* Unnamed 256bit vector mode parameters are passed on stack. */
6867 if (ix86_cfun_abi () == SYSV_ABI)
6874 container = construct_container (nat_mode, TYPE_MODE (type),
6875 type, 0, X86_64_REGPARM_MAX,
6876 X86_64_SSE_REGPARM_MAX, intreg,
6881 /* Pull the value out of the saved registers. */
6883 addr = create_tmp_var (ptr_type_node, "addr");
6884 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6888 int needed_intregs, needed_sseregs;
6890 tree int_addr, sse_addr;
6892 lab_false = create_artificial_label ();
6893 lab_over = create_artificial_label ();
6895 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6897 need_temp = (!REG_P (container)
6898 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6899 || TYPE_ALIGN (type) > 128));
6901 /* In case we are passing structure, verify that it is consecutive block
6902 on the register save area. If not we need to do moves. */
6903 if (!need_temp && !REG_P (container))
6905 /* Verify that all registers are strictly consecutive */
6906 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6910 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6912 rtx slot = XVECEXP (container, 0, i);
6913 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6914 || INTVAL (XEXP (slot, 1)) != i * 16)
6922 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6924 rtx slot = XVECEXP (container, 0, i);
6925 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6926 || INTVAL (XEXP (slot, 1)) != i * 8)
6938 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6939 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6940 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6941 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6944 /* First ensure that we fit completely in registers. */
6947 t = build_int_cst (TREE_TYPE (gpr),
6948 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6949 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6950 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6951 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6952 gimplify_and_add (t, pre_p);
6956 t = build_int_cst (TREE_TYPE (fpr),
6957 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6958 + X86_64_REGPARM_MAX * 8);
6959 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6960 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6961 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6962 gimplify_and_add (t, pre_p);
6965 /* Compute index to start of area used for integer regs. */
6968 /* int_addr = gpr + sav; */
6969 t = fold_convert (sizetype, gpr);
6970 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6971 gimplify_assign (int_addr, t, pre_p);
6975 /* sse_addr = fpr + sav; */
6976 t = fold_convert (sizetype, fpr);
6977 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6978 gimplify_assign (sse_addr, t, pre_p);
6983 tree temp = create_tmp_var (type, "va_arg_tmp");
6986 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6987 gimplify_assign (addr, t, pre_p);
6989 for (i = 0; i < XVECLEN (container, 0); i++)
6991 rtx slot = XVECEXP (container, 0, i);
6992 rtx reg = XEXP (slot, 0);
6993 enum machine_mode mode = GET_MODE (reg);
6994 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6995 tree addr_type = build_pointer_type (piece_type);
6996 tree daddr_type = build_pointer_type_for_mode (piece_type,
7000 tree dest_addr, dest;
7002 if (SSE_REGNO_P (REGNO (reg)))
7004 src_addr = sse_addr;
7005 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7009 src_addr = int_addr;
7010 src_offset = REGNO (reg) * 8;
7012 src_addr = fold_convert (addr_type, src_addr);
7013 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7014 size_int (src_offset));
7015 src = build_va_arg_indirect_ref (src_addr);
7017 dest_addr = fold_convert (daddr_type, addr);
7018 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7019 size_int (INTVAL (XEXP (slot, 1))));
7020 dest = build_va_arg_indirect_ref (dest_addr);
7022 gimplify_assign (dest, src, pre_p);
7028 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7029 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7030 gimplify_assign (gpr, t, pre_p);
7035 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7036 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7037 gimplify_assign (fpr, t, pre_p);
7040 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7042 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7045 /* ... otherwise out of the overflow area. */
7047 /* When we align parameter on stack for caller, if the parameter
7048 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7049 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7050 here with caller. */
7051 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7052 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7053 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7055 /* Care for on-stack alignment if needed. */
7056 if (arg_boundary <= 64
7057 || integer_zerop (TYPE_SIZE (type)))
7061 HOST_WIDE_INT align = arg_boundary / 8;
7062 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7063 size_int (align - 1));
7064 t = fold_convert (sizetype, t);
7065 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7067 t = fold_convert (TREE_TYPE (ovf), t);
7069 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7070 gimplify_assign (addr, t, pre_p);
7072 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7073 size_int (rsize * UNITS_PER_WORD));
7074 gimplify_assign (unshare_expr (ovf), t, pre_p);
7077 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7079 ptrtype = build_pointer_type (type);
7080 addr = fold_convert (ptrtype, addr);
7083 addr = build_va_arg_indirect_ref (addr);
7084 return build_va_arg_indirect_ref (addr);
7087 /* Return nonzero if OPNUM's MEM should be matched
7088 in movabs* patterns. */
7091 ix86_check_movabs (rtx insn, int opnum)
7095 set = PATTERN (insn);
7096 if (GET_CODE (set) == PARALLEL)
7097 set = XVECEXP (set, 0, 0);
7098 gcc_assert (GET_CODE (set) == SET);
7099 mem = XEXP (set, opnum);
7100 while (GET_CODE (mem) == SUBREG)
7101 mem = SUBREG_REG (mem);
7102 gcc_assert (MEM_P (mem));
7103 return (volatile_ok || !MEM_VOLATILE_P (mem));
7106 /* Initialize the table of extra 80387 mathematical constants. */
7109 init_ext_80387_constants (void)
7111 static const char * cst[5] =
7113 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7114 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7115 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7116 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7117 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7121 for (i = 0; i < 5; i++)
7123 real_from_string (&ext_80387_constants_table[i], cst[i]);
7124 /* Ensure each constant is rounded to XFmode precision. */
7125 real_convert (&ext_80387_constants_table[i],
7126 XFmode, &ext_80387_constants_table[i]);
7129 ext_80387_constants_init = 1;
7132 /* Return true if the constant is something that can be loaded with
7133 a special instruction. */
7136 standard_80387_constant_p (rtx x)
7138 enum machine_mode mode = GET_MODE (x);
7142 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7145 if (x == CONST0_RTX (mode))
7147 if (x == CONST1_RTX (mode))
7150 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7152 /* For XFmode constants, try to find a special 80387 instruction when
7153 optimizing for size or on those CPUs that benefit from them. */
7155 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7159 if (! ext_80387_constants_init)
7160 init_ext_80387_constants ();
7162 for (i = 0; i < 5; i++)
7163 if (real_identical (&r, &ext_80387_constants_table[i]))
7167 /* Load of the constant -0.0 or -1.0 will be split as
7168 fldz;fchs or fld1;fchs sequence. */
7169 if (real_isnegzero (&r))
7171 if (real_identical (&r, &dconstm1))
7177 /* Return the opcode of the special instruction to be used to load
7181 standard_80387_constant_opcode (rtx x)
7183 switch (standard_80387_constant_p (x))
7207 /* Return the CONST_DOUBLE representing the 80387 constant that is
7208 loaded by the specified special instruction. The argument IDX
7209 matches the return value from standard_80387_constant_p. */
7212 standard_80387_constant_rtx (int idx)
7216 if (! ext_80387_constants_init)
7217 init_ext_80387_constants ();
7233 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7237 /* Return 1 if mode is a valid mode for sse. */
7239 standard_sse_mode_p (enum machine_mode mode)
7256 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7257 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7258 modes and AVX is enabled. */
7261 standard_sse_constant_p (rtx x)
7263 enum machine_mode mode = GET_MODE (x);
7265 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7267 if (vector_all_ones_operand (x, mode))
7269 if (standard_sse_mode_p (mode))
7270 return TARGET_SSE2 ? 2 : -2;
7271 else if (VALID_AVX256_REG_MODE (mode))
7272 return TARGET_AVX ? 3 : -3;
7278 /* Return the opcode of the special instruction to be used to load
7282 standard_sse_constant_opcode (rtx insn, rtx x)
7284 switch (standard_sse_constant_p (x))
7287 switch (get_attr_mode (insn))
7290 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7292 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7294 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7296 return "vxorps\t%x0, %x0, %x0";
7298 return "vxorpd\t%x0, %x0, %x0";
7300 return "vpxor\t%x0, %x0, %x0";
7306 switch (get_attr_mode (insn))
7311 return "vpcmpeqd\t%0, %0, %0";
7317 return "pcmpeqd\t%0, %0";
7322 /* Returns 1 if OP contains a symbol reference */
7325 symbolic_reference_mentioned_p (rtx op)
7330 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7333 fmt = GET_RTX_FORMAT (GET_CODE (op));
7334 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7340 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7341 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7345 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7352 /* Return 1 if it is appropriate to emit `ret' instructions in the
7353 body of a function. Do this only if the epilogue is simple, needing a
7354 couple of insns. Prior to reloading, we can't tell how many registers
7355 must be saved, so return 0 then. Return 0 if there is no frame
7356 marker to de-allocate. */
7359 ix86_can_use_return_insn_p (void)
7361 struct ix86_frame frame;
7363 if (! reload_completed || frame_pointer_needed)
7366 /* Don't allow more than 32 pop, since that's all we can do
7367 with one instruction. */
7368 if (crtl->args.pops_args
7369 && crtl->args.size >= 32768)
7372 ix86_compute_frame_layout (&frame);
7373 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7376 /* Value should be nonzero if functions must have frame pointers.
7377 Zero means the frame pointer need not be set up (and parms may
7378 be accessed via the stack pointer) in functions that seem suitable. */
7381 ix86_frame_pointer_required (void)
7383 /* If we accessed previous frames, then the generated code expects
7384 to be able to access the saved ebp value in our frame. */
7385 if (cfun->machine->accesses_prev_frame)
7388 /* Several x86 os'es need a frame pointer for other reasons,
7389 usually pertaining to setjmp. */
7390 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7393 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7394 the frame pointer by default. Turn it back on now if we've not
7395 got a leaf function. */
7396 if (TARGET_OMIT_LEAF_FRAME_POINTER
7397 && (!current_function_is_leaf
7398 || ix86_current_function_calls_tls_descriptor))
7407 /* Record that the current function accesses previous call frames. */
7410 ix86_setup_frame_addresses (void)
7412 cfun->machine->accesses_prev_frame = 1;
7415 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7416 # define USE_HIDDEN_LINKONCE 1
7418 # define USE_HIDDEN_LINKONCE 0
7421 static int pic_labels_used;
7423 /* Fills in the label name that should be used for a pc thunk for
7424 the given register. */
7427 get_pc_thunk_name (char name[32], unsigned int regno)
7429 gcc_assert (!TARGET_64BIT);
7431 if (USE_HIDDEN_LINKONCE)
7432 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7434 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7438 /* This function generates code for -fpic that loads %ebx with
7439 the return address of the caller and then returns. */
7442 ix86_file_end (void)
7447 for (regno = 0; regno < 8; ++regno)
7451 if (! ((pic_labels_used >> regno) & 1))
7454 get_pc_thunk_name (name, regno);
7459 switch_to_section (darwin_sections[text_coal_section]);
7460 fputs ("\t.weak_definition\t", asm_out_file);
7461 assemble_name (asm_out_file, name);
7462 fputs ("\n\t.private_extern\t", asm_out_file);
7463 assemble_name (asm_out_file, name);
7464 fputs ("\n", asm_out_file);
7465 ASM_OUTPUT_LABEL (asm_out_file, name);
7469 if (USE_HIDDEN_LINKONCE)
7473 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7475 TREE_PUBLIC (decl) = 1;
7476 TREE_STATIC (decl) = 1;
7477 DECL_ONE_ONLY (decl) = 1;
7479 (*targetm.asm_out.unique_section) (decl, 0);
7480 switch_to_section (get_named_section (decl, NULL, 0));
7482 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7483 fputs ("\t.hidden\t", asm_out_file);
7484 assemble_name (asm_out_file, name);
7485 fputc ('\n', asm_out_file);
7486 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7490 switch_to_section (text_section);
7491 ASM_OUTPUT_LABEL (asm_out_file, name);
7494 xops[0] = gen_rtx_REG (Pmode, regno);
7495 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7496 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7497 output_asm_insn ("ret", xops);
7500 if (NEED_INDICATE_EXEC_STACK)
7501 file_end_indicate_exec_stack ();
7504 /* Emit code for the SET_GOT patterns. */
7507 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7513 if (TARGET_VXWORKS_RTP && flag_pic)
7515 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7516 xops[2] = gen_rtx_MEM (Pmode,
7517 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7518 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7520 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7521 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7522 an unadorned address. */
7523 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7524 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7525 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7529 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7531 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7533 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7536 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7538 output_asm_insn ("call\t%a2", xops);
7541 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7542 is what will be referenced by the Mach-O PIC subsystem. */
7544 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7547 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7548 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7551 output_asm_insn ("pop%z0\t%0", xops);
7556 get_pc_thunk_name (name, REGNO (dest));
7557 pic_labels_used |= 1 << REGNO (dest);
7559 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7560 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7561 output_asm_insn ("call\t%X2", xops);
7562 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7563 is what will be referenced by the Mach-O PIC subsystem. */
7566 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7568 targetm.asm_out.internal_label (asm_out_file, "L",
7569 CODE_LABEL_NUMBER (label));
7576 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7577 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7579 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7584 /* Generate an "push" pattern for input ARG. */
7589 return gen_rtx_SET (VOIDmode,
7591 gen_rtx_PRE_DEC (Pmode,
7592 stack_pointer_rtx)),
7596 /* Return >= 0 if there is an unused call-clobbered register available
7597 for the entire function. */
7600 ix86_select_alt_pic_regnum (void)
7602 if (current_function_is_leaf && !crtl->profile
7603 && !ix86_current_function_calls_tls_descriptor)
7606 /* Can't use the same register for both PIC and DRAP. */
7608 drap = REGNO (crtl->drap_reg);
7611 for (i = 2; i >= 0; --i)
7612 if (i != drap && !df_regs_ever_live_p (i))
7616 return INVALID_REGNUM;
7619 /* Return 1 if we need to save REGNO. */
7621 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7623 if (pic_offset_table_rtx
7624 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7625 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7627 || crtl->calls_eh_return
7628 || crtl->uses_const_pool))
7630 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7635 if (crtl->calls_eh_return && maybe_eh_return)
7640 unsigned test = EH_RETURN_DATA_REGNO (i);
7641 if (test == INVALID_REGNUM)
7649 && regno == REGNO (crtl->drap_reg))
7652 return (df_regs_ever_live_p (regno)
7653 && !call_used_regs[regno]
7654 && !fixed_regs[regno]
7655 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7658 /* Return number of saved general prupose registers. */
7661 ix86_nsaved_regs (void)
7666 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7667 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7672 /* Return number of saved SSE registrers. */
7675 ix86_nsaved_sseregs (void)
7680 if (ix86_cfun_abi () != MS_ABI)
7682 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7683 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7688 /* Given FROM and TO register numbers, say whether this elimination is
7689 allowed. If stack alignment is needed, we can only replace argument
7690 pointer with hard frame pointer, or replace frame pointer with stack
7691 pointer. Otherwise, frame pointer elimination is automatically
7692 handled and all other eliminations are valid. */
7695 ix86_can_eliminate (int from, int to)
7697 if (stack_realign_fp)
7698 return ((from == ARG_POINTER_REGNUM
7699 && to == HARD_FRAME_POINTER_REGNUM)
7700 || (from == FRAME_POINTER_REGNUM
7701 && to == STACK_POINTER_REGNUM));
7703 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7706 /* Return the offset between two registers, one to be eliminated, and the other
7707 its replacement, at the start of a routine. */
7710 ix86_initial_elimination_offset (int from, int to)
7712 struct ix86_frame frame;
7713 ix86_compute_frame_layout (&frame);
7715 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7716 return frame.hard_frame_pointer_offset;
7717 else if (from == FRAME_POINTER_REGNUM
7718 && to == HARD_FRAME_POINTER_REGNUM)
7719 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7722 gcc_assert (to == STACK_POINTER_REGNUM);
7724 if (from == ARG_POINTER_REGNUM)
7725 return frame.stack_pointer_offset;
7727 gcc_assert (from == FRAME_POINTER_REGNUM);
7728 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7732 /* In a dynamically-aligned function, we can't know the offset from
7733 stack pointer to frame pointer, so we must ensure that setjmp
7734 eliminates fp against the hard fp (%ebp) rather than trying to
7735 index from %esp up to the top of the frame across a gap that is
7736 of unknown (at compile-time) size. */
7738 ix86_builtin_setjmp_frame_value (void)
7740 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7743 /* Fill structure ix86_frame about frame of currently computed function. */
7746 ix86_compute_frame_layout (struct ix86_frame *frame)
7748 HOST_WIDE_INT total_size;
7749 unsigned int stack_alignment_needed;
7750 HOST_WIDE_INT offset;
7751 unsigned int preferred_alignment;
7752 HOST_WIDE_INT size = get_frame_size ();
7754 frame->nregs = ix86_nsaved_regs ();
7755 frame->nsseregs = ix86_nsaved_sseregs ();
7758 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7759 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7761 /* MS ABI seem to require stack alignment to be always 16 except for function
7763 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7765 preferred_alignment = 16;
7766 stack_alignment_needed = 16;
7767 crtl->preferred_stack_boundary = 128;
7768 crtl->stack_alignment_needed = 128;
7771 gcc_assert (!size || stack_alignment_needed);
7772 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7773 gcc_assert (preferred_alignment <= stack_alignment_needed);
7775 /* During reload iteration the amount of registers saved can change.
7776 Recompute the value as needed. Do not recompute when amount of registers
7777 didn't change as reload does multiple calls to the function and does not
7778 expect the decision to change within single iteration. */
7779 if (!optimize_function_for_size_p (cfun)
7780 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7782 int count = frame->nregs;
7784 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7785 /* The fast prologue uses move instead of push to save registers. This
7786 is significantly longer, but also executes faster as modern hardware
7787 can execute the moves in parallel, but can't do that for push/pop.
7789 Be careful about choosing what prologue to emit: When function takes
7790 many instructions to execute we may use slow version as well as in
7791 case function is known to be outside hot spot (this is known with
7792 feedback only). Weight the size of function by number of registers
7793 to save as it is cheap to use one or two push instructions but very
7794 slow to use many of them. */
7796 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7797 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7798 || (flag_branch_probabilities
7799 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7800 cfun->machine->use_fast_prologue_epilogue = false;
7802 cfun->machine->use_fast_prologue_epilogue
7803 = !expensive_function_p (count);
7805 if (TARGET_PROLOGUE_USING_MOVE
7806 && cfun->machine->use_fast_prologue_epilogue)
7807 frame->save_regs_using_mov = true;
7809 frame->save_regs_using_mov = false;
7812 /* Skip return address and saved base pointer. */
7813 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7815 frame->hard_frame_pointer_offset = offset;
7817 /* Set offset to aligned because the realigned frame starts from
7819 if (stack_realign_fp)
7820 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7822 /* Register save area */
7823 offset += frame->nregs * UNITS_PER_WORD;
7825 /* Align SSE reg save area. */
7826 if (frame->nsseregs)
7827 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7829 frame->padding0 = 0;
7831 /* SSE register save area. */
7832 offset += frame->padding0 + frame->nsseregs * 16;
7835 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7836 offset += frame->va_arg_size;
7838 /* Align start of frame for local function. */
7839 frame->padding1 = ((offset + stack_alignment_needed - 1)
7840 & -stack_alignment_needed) - offset;
7842 offset += frame->padding1;
7844 /* Frame pointer points here. */
7845 frame->frame_pointer_offset = offset;
7849 /* Add outgoing arguments area. Can be skipped if we eliminated
7850 all the function calls as dead code.
7851 Skipping is however impossible when function calls alloca. Alloca
7852 expander assumes that last crtl->outgoing_args_size
7853 of stack frame are unused. */
7854 if (ACCUMULATE_OUTGOING_ARGS
7855 && (!current_function_is_leaf || cfun->calls_alloca
7856 || ix86_current_function_calls_tls_descriptor))
7858 offset += crtl->outgoing_args_size;
7859 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7862 frame->outgoing_arguments_size = 0;
7864 /* Align stack boundary. Only needed if we're calling another function
7866 if (!current_function_is_leaf || cfun->calls_alloca
7867 || ix86_current_function_calls_tls_descriptor)
7868 frame->padding2 = ((offset + preferred_alignment - 1)
7869 & -preferred_alignment) - offset;
7871 frame->padding2 = 0;
7873 offset += frame->padding2;
7875 /* We've reached end of stack frame. */
7876 frame->stack_pointer_offset = offset;
7878 /* Size prologue needs to allocate. */
7879 frame->to_allocate =
7880 (size + frame->padding1 + frame->padding2
7881 + frame->outgoing_arguments_size + frame->va_arg_size);
7883 if ((!frame->to_allocate && frame->nregs <= 1)
7884 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7885 frame->save_regs_using_mov = false;
7887 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7888 && current_function_is_leaf
7889 && !ix86_current_function_calls_tls_descriptor)
7891 frame->red_zone_size = frame->to_allocate;
7892 if (frame->save_regs_using_mov)
7893 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7894 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7895 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7898 frame->red_zone_size = 0;
7899 frame->to_allocate -= frame->red_zone_size;
7900 frame->stack_pointer_offset -= frame->red_zone_size;
7902 fprintf (stderr, "\n");
7903 fprintf (stderr, "size: %ld\n", (long)size);
7904 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7905 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7906 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7907 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7908 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7909 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7910 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7911 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7912 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7913 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7914 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7915 (long)frame->hard_frame_pointer_offset);
7916 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7917 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7918 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7919 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7923 /* Emit code to save registers in the prologue. */
7926 ix86_emit_save_regs (void)
7931 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7932 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7934 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7935 RTX_FRAME_RELATED_P (insn) = 1;
7939 /* Emit code to save registers using MOV insns. First register
7940 is restored from POINTER + OFFSET. */
7942 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7947 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7948 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7950 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7952 gen_rtx_REG (Pmode, regno));
7953 RTX_FRAME_RELATED_P (insn) = 1;
7954 offset += UNITS_PER_WORD;
7958 /* Emit code to save registers using MOV insns. First register
7959 is restored from POINTER + OFFSET. */
7961 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7967 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7968 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7970 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7971 set_mem_align (mem, 128);
7972 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7973 RTX_FRAME_RELATED_P (insn) = 1;
7978 /* Expand prologue or epilogue stack adjustment.
7979 The pattern exist to put a dependency on all ebp-based memory accesses.
7980 STYLE should be negative if instructions should be marked as frame related,
7981 zero if %r11 register is live and cannot be freely used and positive
7985 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7990 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7991 else if (x86_64_immediate_operand (offset, DImode))
7992 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7996 /* r11 is used by indirect sibcall return as well, set before the
7997 epilogue and used after the epilogue. ATM indirect sibcall
7998 shouldn't be used together with huge frame sizes in one
7999 function because of the frame_size check in sibcall.c. */
8001 r11 = gen_rtx_REG (DImode, R11_REG);
8002 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8004 RTX_FRAME_RELATED_P (insn) = 1;
8005 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8009 RTX_FRAME_RELATED_P (insn) = 1;
8012 /* Find an available register to be used as dynamic realign argument
8013 pointer regsiter. Such a register will be written in prologue and
8014 used in begin of body, so it must not be
8015 1. parameter passing register.
8017 We reuse static-chain register if it is available. Otherwise, we
8018 use DI for i386 and R13 for x86-64. We chose R13 since it has
8021 Return: the regno of chosen register. */
8024 find_drap_reg (void)
8026 tree decl = cfun->decl;
8030 /* Use R13 for nested function or function need static chain.
8031 Since function with tail call may use any caller-saved
8032 registers in epilogue, DRAP must not use caller-saved
8033 register in such case. */
8034 if ((decl_function_context (decl)
8035 && !DECL_NO_STATIC_CHAIN (decl))
8036 || crtl->tail_call_emit)
8043 /* Use DI for nested function or function need static chain.
8044 Since function with tail call may use any caller-saved
8045 registers in epilogue, DRAP must not use caller-saved
8046 register in such case. */
8047 if ((decl_function_context (decl)
8048 && !DECL_NO_STATIC_CHAIN (decl))
8049 || crtl->tail_call_emit)
8052 /* Reuse static chain register if it isn't used for parameter
8054 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8055 && !lookup_attribute ("fastcall",
8056 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8063 /* Update incoming stack boundary and estimated stack alignment. */
8066 ix86_update_stack_boundary (void)
8068 /* Prefer the one specified at command line. */
8069 ix86_incoming_stack_boundary
8070 = (ix86_user_incoming_stack_boundary
8071 ? ix86_user_incoming_stack_boundary
8072 : ix86_default_incoming_stack_boundary);
8074 /* Incoming stack alignment can be changed on individual functions
8075 via force_align_arg_pointer attribute. We use the smallest
8076 incoming stack boundary. */
8077 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8078 && lookup_attribute (ix86_force_align_arg_pointer_string,
8079 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8080 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8082 /* The incoming stack frame has to be aligned at least at
8083 parm_stack_boundary. */
8084 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8085 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8087 /* Stack at entrance of main is aligned by runtime. We use the
8088 smallest incoming stack boundary. */
8089 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8090 && DECL_NAME (current_function_decl)
8091 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8092 && DECL_FILE_SCOPE_P (current_function_decl))
8093 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8095 /* x86_64 vararg needs 16byte stack alignment for register save
8099 && crtl->stack_alignment_estimated < 128)
8100 crtl->stack_alignment_estimated = 128;
8103 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8104 needed or an rtx for DRAP otherwise. */
8107 ix86_get_drap_rtx (void)
8109 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8110 crtl->need_drap = true;
8112 if (stack_realign_drap)
8114 /* Assign DRAP to vDRAP and returns vDRAP */
8115 unsigned int regno = find_drap_reg ();
8120 arg_ptr = gen_rtx_REG (Pmode, regno);
8121 crtl->drap_reg = arg_ptr;
8124 drap_vreg = copy_to_reg (arg_ptr);
8128 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8129 RTX_FRAME_RELATED_P (insn) = 1;
8136 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8139 ix86_internal_arg_pointer (void)
8141 return virtual_incoming_args_rtx;
8144 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8145 This is called from dwarf2out.c to emit call frame instructions
8146 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8148 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8150 rtx unspec = SET_SRC (pattern);
8151 gcc_assert (GET_CODE (unspec) == UNSPEC);
8155 case UNSPEC_REG_SAVE:
8156 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8157 SET_DEST (pattern));
8159 case UNSPEC_DEF_CFA:
8160 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8161 INTVAL (XVECEXP (unspec, 0, 0)));
8168 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8169 to be generated in correct form. */
8171 ix86_finalize_stack_realign_flags (void)
8173 /* Check if stack realign is really needed after reload, and
8174 stores result in cfun */
8175 unsigned int incoming_stack_boundary
8176 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8177 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8178 unsigned int stack_realign = (incoming_stack_boundary
8179 < (current_function_is_leaf
8180 ? crtl->max_used_stack_slot_alignment
8181 : crtl->stack_alignment_needed));
8183 if (crtl->stack_realign_finalized)
8185 /* After stack_realign_needed is finalized, we can't no longer
8187 gcc_assert (crtl->stack_realign_needed == stack_realign);
8191 crtl->stack_realign_needed = stack_realign;
8192 crtl->stack_realign_finalized = true;
8196 /* Expand the prologue into a bunch of separate insns. */
8199 ix86_expand_prologue (void)
8203 struct ix86_frame frame;
8204 HOST_WIDE_INT allocate;
8206 ix86_finalize_stack_realign_flags ();
8208 /* DRAP should not coexist with stack_realign_fp */
8209 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8211 ix86_compute_frame_layout (&frame);
8213 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8214 of DRAP is needed and stack realignment is really needed after reload */
8215 if (crtl->drap_reg && crtl->stack_realign_needed)
8218 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8219 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8220 ? 0 : UNITS_PER_WORD);
8222 gcc_assert (stack_realign_drap);
8224 /* Grab the argument pointer. */
8225 x = plus_constant (stack_pointer_rtx,
8226 (UNITS_PER_WORD + param_ptr_offset));
8229 /* Only need to push parameter pointer reg if it is caller
8231 if (!call_used_regs[REGNO (crtl->drap_reg)])
8233 /* Push arg pointer reg */
8234 insn = emit_insn (gen_push (y));
8235 RTX_FRAME_RELATED_P (insn) = 1;
8238 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8239 RTX_FRAME_RELATED_P (insn) = 1;
8241 /* Align the stack. */
8242 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8244 GEN_INT (-align_bytes)));
8245 RTX_FRAME_RELATED_P (insn) = 1;
8247 /* Replicate the return address on the stack so that return
8248 address can be reached via (argp - 1) slot. This is needed
8249 to implement macro RETURN_ADDR_RTX and intrinsic function
8250 expand_builtin_return_addr etc. */
8252 x = gen_frame_mem (Pmode,
8253 plus_constant (x, -UNITS_PER_WORD));
8254 insn = emit_insn (gen_push (x));
8255 RTX_FRAME_RELATED_P (insn) = 1;
8258 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8259 slower on all targets. Also sdb doesn't like it. */
8261 if (frame_pointer_needed)
8263 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8264 RTX_FRAME_RELATED_P (insn) = 1;
8266 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8267 RTX_FRAME_RELATED_P (insn) = 1;
8270 if (stack_realign_fp)
8272 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8273 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8275 /* Align the stack. */
8276 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8278 GEN_INT (-align_bytes)));
8279 RTX_FRAME_RELATED_P (insn) = 1;
8282 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8284 if (!frame.save_regs_using_mov)
8285 ix86_emit_save_regs ();
8287 allocate += frame.nregs * UNITS_PER_WORD;
8289 /* When using red zone we may start register saving before allocating
8290 the stack frame saving one cycle of the prologue. However I will
8291 avoid doing this if I am going to have to probe the stack since
8292 at least on x86_64 the stack probe can turn into a call that clobbers
8293 a red zone location */
8294 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8295 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8296 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8297 && !crtl->stack_realign_needed)
8298 ? hard_frame_pointer_rtx
8299 : stack_pointer_rtx,
8300 -frame.nregs * UNITS_PER_WORD);
8304 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8305 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8306 GEN_INT (-allocate), -1);
8309 /* Only valid for Win32. */
8310 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8314 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8316 if (cfun->machine->call_abi == MS_ABI)
8319 eax_live = ix86_eax_live_at_start_p ();
8323 emit_insn (gen_push (eax));
8324 allocate -= UNITS_PER_WORD;
8327 emit_move_insn (eax, GEN_INT (allocate));
8330 insn = gen_allocate_stack_worker_64 (eax, eax);
8332 insn = gen_allocate_stack_worker_32 (eax, eax);
8333 insn = emit_insn (insn);
8334 RTX_FRAME_RELATED_P (insn) = 1;
8335 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8336 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8337 add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
8341 if (frame_pointer_needed)
8342 t = plus_constant (hard_frame_pointer_rtx,
8345 - frame.nregs * UNITS_PER_WORD);
8347 t = plus_constant (stack_pointer_rtx, allocate);
8348 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8352 if (frame.save_regs_using_mov
8353 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8354 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8356 if (!frame_pointer_needed
8357 || !frame.to_allocate
8358 || crtl->stack_realign_needed)
8359 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8361 + frame.nsseregs * 16 + frame.padding0);
8363 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8364 -frame.nregs * UNITS_PER_WORD);
8366 if (!frame_pointer_needed
8367 || !frame.to_allocate
8368 || crtl->stack_realign_needed)
8369 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8372 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8373 - frame.nregs * UNITS_PER_WORD
8374 - frame.nsseregs * 16
8377 pic_reg_used = false;
8378 if (pic_offset_table_rtx
8379 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8382 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8384 if (alt_pic_reg_used != INVALID_REGNUM)
8385 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8387 pic_reg_used = true;
8394 if (ix86_cmodel == CM_LARGE_PIC)
8396 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8397 rtx label = gen_label_rtx ();
8399 LABEL_PRESERVE_P (label) = 1;
8400 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8401 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8402 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8403 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8404 pic_offset_table_rtx, tmp_reg));
8407 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8410 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8413 /* In the pic_reg_used case, make sure that the got load isn't deleted
8414 when mcount needs it. Blockage to avoid call movement across mcount
8415 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8417 if (crtl->profile && pic_reg_used)
8418 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8420 if (crtl->drap_reg && !crtl->stack_realign_needed)
8422 /* vDRAP is setup but after reload it turns out stack realign
8423 isn't necessary, here we will emit prologue to setup DRAP
8424 without stack realign adjustment */
8425 int drap_bp_offset = UNITS_PER_WORD * 2;
8426 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8427 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8430 /* Prevent instructions from being scheduled into register save push
8431 sequence when access to the redzone area is done through frame pointer.
8432 The offset betweeh the frame pointer and the stack pointer is calculated
8433 relative to the value of the stack pointer at the end of the function
8434 prologue, and moving instructions that access redzone area via frame
8435 pointer inside push sequence violates this assumption. */
8436 if (frame_pointer_needed && frame.red_zone_size)
8437 emit_insn (gen_memory_blockage ());
8439 /* Emit cld instruction if stringops are used in the function. */
8440 if (TARGET_CLD && ix86_current_function_needs_cld)
8441 emit_insn (gen_cld ());
8444 /* Emit code to restore saved registers using MOV insns. First register
8445 is restored from POINTER + OFFSET. */
8447 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8448 int maybe_eh_return)
8451 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8453 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8454 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8456 /* Ensure that adjust_address won't be forced to produce pointer
8457 out of range allowed by x86-64 instruction set. */
8458 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8462 r11 = gen_rtx_REG (DImode, R11_REG);
8463 emit_move_insn (r11, GEN_INT (offset));
8464 emit_insn (gen_adddi3 (r11, r11, pointer));
8465 base_address = gen_rtx_MEM (Pmode, r11);
8468 emit_move_insn (gen_rtx_REG (Pmode, regno),
8469 adjust_address (base_address, Pmode, offset));
8470 offset += UNITS_PER_WORD;
8474 /* Emit code to restore saved registers using MOV insns. First register
8475 is restored from POINTER + OFFSET. */
8477 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8478 int maybe_eh_return)
8481 rtx base_address = gen_rtx_MEM (TImode, pointer);
8484 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8485 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8487 /* Ensure that adjust_address won't be forced to produce pointer
8488 out of range allowed by x86-64 instruction set. */
8489 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8493 r11 = gen_rtx_REG (DImode, R11_REG);
8494 emit_move_insn (r11, GEN_INT (offset));
8495 emit_insn (gen_adddi3 (r11, r11, pointer));
8496 base_address = gen_rtx_MEM (TImode, r11);
8499 mem = adjust_address (base_address, TImode, offset);
8500 set_mem_align (mem, 128);
8501 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8506 /* Restore function stack, frame, and registers. */
8509 ix86_expand_epilogue (int style)
8513 struct ix86_frame frame;
8514 HOST_WIDE_INT offset;
8516 ix86_finalize_stack_realign_flags ();
8518 /* When stack is realigned, SP must be valid. */
8519 sp_valid = (!frame_pointer_needed
8520 || current_function_sp_is_unchanging
8521 || stack_realign_fp);
8523 ix86_compute_frame_layout (&frame);
8525 /* See the comment about red zone and frame
8526 pointer usage in ix86_expand_prologue. */
8527 if (frame_pointer_needed && frame.red_zone_size)
8528 emit_insn (gen_memory_blockage ());
8530 /* Calculate start of saved registers relative to ebp. Special care
8531 must be taken for the normal return case of a function using
8532 eh_return: the eax and edx registers are marked as saved, but not
8533 restored along this path. */
8534 offset = frame.nregs;
8535 if (crtl->calls_eh_return && style != 2)
8537 offset *= -UNITS_PER_WORD;
8538 offset -= frame.nsseregs * 16 + frame.padding0;
8540 /* If we're only restoring one register and sp is not valid then
8541 using a move instruction to restore the register since it's
8542 less work than reloading sp and popping the register.
8544 The default code result in stack adjustment using add/lea instruction,
8545 while this code results in LEAVE instruction (or discrete equivalent),
8546 so it is profitable in some other cases as well. Especially when there
8547 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8548 and there is exactly one register to pop. This heuristic may need some
8549 tuning in future. */
8550 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8551 || (TARGET_EPILOGUE_USING_MOVE
8552 && cfun->machine->use_fast_prologue_epilogue
8553 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8554 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8555 || (frame_pointer_needed && TARGET_USE_LEAVE
8556 && cfun->machine->use_fast_prologue_epilogue
8557 && (frame.nregs + frame.nsseregs) == 1)
8558 || crtl->calls_eh_return)
8560 /* Restore registers. We can use ebp or esp to address the memory
8561 locations. If both are available, default to ebp, since offsets
8562 are known to be small. Only exception is esp pointing directly
8563 to the end of block of saved registers, where we may simplify
8566 If we are realigning stack with bp and sp, regs restore can't
8567 be addressed by bp. sp must be used instead. */
8569 if (!frame_pointer_needed
8570 || (sp_valid && !frame.to_allocate)
8571 || stack_realign_fp)
8573 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8574 frame.to_allocate, style == 2);
8575 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8577 + frame.nsseregs * 16
8578 + frame.padding0, style == 2);
8582 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8583 offset, style == 2);
8584 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8586 + frame.nsseregs * 16
8587 + frame.padding0, style == 2);
8590 /* eh_return epilogues need %ecx added to the stack pointer. */
8593 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8595 /* Stack align doesn't work with eh_return. */
8596 gcc_assert (!crtl->stack_realign_needed);
8598 if (frame_pointer_needed)
8600 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8601 tmp = plus_constant (tmp, UNITS_PER_WORD);
8602 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8604 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8605 emit_move_insn (hard_frame_pointer_rtx, tmp);
8607 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8612 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8613 tmp = plus_constant (tmp, (frame.to_allocate
8614 + frame.nregs * UNITS_PER_WORD
8615 + frame.nsseregs * 16
8617 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8620 else if (!frame_pointer_needed)
8621 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8622 GEN_INT (frame.to_allocate
8623 + frame.nregs * UNITS_PER_WORD
8624 + frame.nsseregs * 16
8627 /* If not an i386, mov & pop is faster than "leave". */
8628 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8629 || !cfun->machine->use_fast_prologue_epilogue)
8630 emit_insn ((*ix86_gen_leave) ());
8633 pro_epilogue_adjust_stack (stack_pointer_rtx,
8634 hard_frame_pointer_rtx,
8637 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8642 /* First step is to deallocate the stack frame so that we can
8645 If we realign stack with frame pointer, then stack pointer
8646 won't be able to recover via lea $offset(%bp), %sp, because
8647 there is a padding area between bp and sp for realign.
8648 "add $to_allocate, %sp" must be used instead. */
8651 gcc_assert (frame_pointer_needed);
8652 gcc_assert (!stack_realign_fp);
8653 pro_epilogue_adjust_stack (stack_pointer_rtx,
8654 hard_frame_pointer_rtx,
8655 GEN_INT (offset), style);
8656 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8657 frame.to_allocate, style == 2);
8658 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8659 GEN_INT (frame.nsseregs * 16), style);
8661 else if (frame.to_allocate || frame.nsseregs)
8663 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8666 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8667 GEN_INT (frame.to_allocate
8668 + frame.nsseregs * 16
8669 + frame.padding0), style);
8672 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8673 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8674 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8675 if (frame_pointer_needed)
8677 /* Leave results in shorter dependency chains on CPUs that are
8678 able to grok it fast. */
8679 if (TARGET_USE_LEAVE)
8680 emit_insn ((*ix86_gen_leave) ());
8683 /* For stack realigned really happens, recover stack
8684 pointer to hard frame pointer is a must, if not using
8686 if (stack_realign_fp)
8687 pro_epilogue_adjust_stack (stack_pointer_rtx,
8688 hard_frame_pointer_rtx,
8690 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8695 if (crtl->drap_reg && crtl->stack_realign_needed)
8697 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8698 ? 0 : UNITS_PER_WORD);
8699 gcc_assert (stack_realign_drap);
8700 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8702 GEN_INT (-(UNITS_PER_WORD
8703 + param_ptr_offset))));
8704 if (!call_used_regs[REGNO (crtl->drap_reg)])
8705 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8709 /* Sibcall epilogues don't want a return instruction. */
8713 if (crtl->args.pops_args && crtl->args.size)
8715 rtx popc = GEN_INT (crtl->args.pops_args);
8717 /* i386 can only pop 64K bytes. If asked to pop more, pop
8718 return address, do explicit add, and jump indirectly to the
8721 if (crtl->args.pops_args >= 65536)
8723 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8725 /* There is no "pascal" calling convention in any 64bit ABI. */
8726 gcc_assert (!TARGET_64BIT);
8728 emit_insn (gen_popsi1 (ecx));
8729 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8730 emit_jump_insn (gen_return_indirect_internal (ecx));
8733 emit_jump_insn (gen_return_pop_internal (popc));
8736 emit_jump_insn (gen_return_internal ());
8739 /* Reset from the function's potential modifications. */
8742 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8743 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8745 if (pic_offset_table_rtx)
8746 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8748 /* Mach-O doesn't support labels at the end of objects, so if
8749 it looks like we might want one, insert a NOP. */
8751 rtx insn = get_last_insn ();
8754 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8755 insn = PREV_INSN (insn);
8759 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8760 fputs ("\tnop\n", file);
8766 /* Extract the parts of an RTL expression that is a valid memory address
8767 for an instruction. Return 0 if the structure of the address is
8768 grossly off. Return -1 if the address contains ASHIFT, so it is not
8769 strictly valid, but still used for computing length of lea instruction. */
8772 ix86_decompose_address (rtx addr, struct ix86_address *out)
8774 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8775 rtx base_reg, index_reg;
8776 HOST_WIDE_INT scale = 1;
8777 rtx scale_rtx = NULL_RTX;
8779 enum ix86_address_seg seg = SEG_DEFAULT;
8781 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8783 else if (GET_CODE (addr) == PLUS)
8793 addends[n++] = XEXP (op, 1);
8796 while (GET_CODE (op) == PLUS);
8801 for (i = n; i >= 0; --i)
8804 switch (GET_CODE (op))
8809 index = XEXP (op, 0);
8810 scale_rtx = XEXP (op, 1);
8814 if (XINT (op, 1) == UNSPEC_TP
8815 && TARGET_TLS_DIRECT_SEG_REFS
8816 && seg == SEG_DEFAULT)
8817 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8846 else if (GET_CODE (addr) == MULT)
8848 index = XEXP (addr, 0); /* index*scale */
8849 scale_rtx = XEXP (addr, 1);
8851 else if (GET_CODE (addr) == ASHIFT)
8855 /* We're called for lea too, which implements ashift on occasion. */
8856 index = XEXP (addr, 0);
8857 tmp = XEXP (addr, 1);
8858 if (!CONST_INT_P (tmp))
8860 scale = INTVAL (tmp);
8861 if ((unsigned HOST_WIDE_INT) scale > 3)
8867 disp = addr; /* displacement */
8869 /* Extract the integral value of scale. */
8872 if (!CONST_INT_P (scale_rtx))
8874 scale = INTVAL (scale_rtx);
8877 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8878 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8880 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8881 if (base_reg && index_reg && scale == 1
8882 && (index_reg == arg_pointer_rtx
8883 || index_reg == frame_pointer_rtx
8884 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8887 tmp = base, base = index, index = tmp;
8888 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8891 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8892 if ((base_reg == hard_frame_pointer_rtx
8893 || base_reg == frame_pointer_rtx
8894 || base_reg == arg_pointer_rtx) && !disp)
8897 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8898 Avoid this by transforming to [%esi+0].
8899 Reload calls address legitimization without cfun defined, so we need
8900 to test cfun for being non-NULL. */
8901 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8902 && base_reg && !index_reg && !disp
8904 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8907 /* Special case: encode reg+reg instead of reg*2. */
8908 if (!base && index && scale && scale == 2)
8909 base = index, base_reg = index_reg, scale = 1;
8911 /* Special case: scaling cannot be encoded without base or displacement. */
8912 if (!base && !disp && index && scale != 1)
8924 /* Return cost of the memory address x.
8925 For i386, it is better to use a complex address than let gcc copy
8926 the address into a reg and make a new pseudo. But not if the address
8927 requires to two regs - that would mean more pseudos with longer
8930 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8932 struct ix86_address parts;
8934 int ok = ix86_decompose_address (x, &parts);
8938 if (parts.base && GET_CODE (parts.base) == SUBREG)
8939 parts.base = SUBREG_REG (parts.base);
8940 if (parts.index && GET_CODE (parts.index) == SUBREG)
8941 parts.index = SUBREG_REG (parts.index);
8943 /* Attempt to minimize number of registers in the address. */
8945 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8947 && (!REG_P (parts.index)
8948 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8952 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8954 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8955 && parts.base != parts.index)
8958 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8959 since it's predecode logic can't detect the length of instructions
8960 and it degenerates to vector decoded. Increase cost of such
8961 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8962 to split such addresses or even refuse such addresses at all.
8964 Following addressing modes are affected:
8969 The first and last case may be avoidable by explicitly coding the zero in
8970 memory address, but I don't have AMD-K6 machine handy to check this
8974 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8975 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8976 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8982 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8983 this is used for to form addresses to local data when -fPIC is in
8987 darwin_local_data_pic (rtx disp)
8989 return (GET_CODE (disp) == UNSPEC
8990 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8993 /* Determine if a given RTX is a valid constant. We already know this
8994 satisfies CONSTANT_P. */
8997 legitimate_constant_p (rtx x)
8999 switch (GET_CODE (x))
9004 if (GET_CODE (x) == PLUS)
9006 if (!CONST_INT_P (XEXP (x, 1)))
9011 if (TARGET_MACHO && darwin_local_data_pic (x))
9014 /* Only some unspecs are valid as "constants". */
9015 if (GET_CODE (x) == UNSPEC)
9016 switch (XINT (x, 1))
9021 return TARGET_64BIT;
9024 x = XVECEXP (x, 0, 0);
9025 return (GET_CODE (x) == SYMBOL_REF
9026 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9028 x = XVECEXP (x, 0, 0);
9029 return (GET_CODE (x) == SYMBOL_REF
9030 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9035 /* We must have drilled down to a symbol. */
9036 if (GET_CODE (x) == LABEL_REF)
9038 if (GET_CODE (x) != SYMBOL_REF)
9043 /* TLS symbols are never valid. */
9044 if (SYMBOL_REF_TLS_MODEL (x))
9047 /* DLLIMPORT symbols are never valid. */
9048 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9049 && SYMBOL_REF_DLLIMPORT_P (x))
9054 if (GET_MODE (x) == TImode
9055 && x != CONST0_RTX (TImode)
9061 if (!standard_sse_constant_p (x))
9068 /* Otherwise we handle everything else in the move patterns. */
9072 /* Determine if it's legal to put X into the constant pool. This
9073 is not possible for the address of thread-local symbols, which
9074 is checked above. */
9077 ix86_cannot_force_const_mem (rtx x)
9079 /* We can always put integral constants and vectors in memory. */
9080 switch (GET_CODE (x))
9090 return !legitimate_constant_p (x);
9093 /* Determine if a given RTX is a valid constant address. */
9096 constant_address_p (rtx x)
9098 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9101 /* Nonzero if the constant value X is a legitimate general operand
9102 when generating PIC code. It is given that flag_pic is on and
9103 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9106 legitimate_pic_operand_p (rtx x)
9110 switch (GET_CODE (x))
9113 inner = XEXP (x, 0);
9114 if (GET_CODE (inner) == PLUS
9115 && CONST_INT_P (XEXP (inner, 1)))
9116 inner = XEXP (inner, 0);
9118 /* Only some unspecs are valid as "constants". */
9119 if (GET_CODE (inner) == UNSPEC)
9120 switch (XINT (inner, 1))
9125 return TARGET_64BIT;
9127 x = XVECEXP (inner, 0, 0);
9128 return (GET_CODE (x) == SYMBOL_REF
9129 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9130 case UNSPEC_MACHOPIC_OFFSET:
9131 return legitimate_pic_address_disp_p (x);
9139 return legitimate_pic_address_disp_p (x);
9146 /* Determine if a given CONST RTX is a valid memory displacement
9150 legitimate_pic_address_disp_p (rtx disp)
9154 /* In 64bit mode we can allow direct addresses of symbols and labels
9155 when they are not dynamic symbols. */
9158 rtx op0 = disp, op1;
9160 switch (GET_CODE (disp))
9166 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9168 op0 = XEXP (XEXP (disp, 0), 0);
9169 op1 = XEXP (XEXP (disp, 0), 1);
9170 if (!CONST_INT_P (op1)
9171 || INTVAL (op1) >= 16*1024*1024
9172 || INTVAL (op1) < -16*1024*1024)
9174 if (GET_CODE (op0) == LABEL_REF)
9176 if (GET_CODE (op0) != SYMBOL_REF)
9181 /* TLS references should always be enclosed in UNSPEC. */
9182 if (SYMBOL_REF_TLS_MODEL (op0))
9184 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9185 && ix86_cmodel != CM_LARGE_PIC)
9193 if (GET_CODE (disp) != CONST)
9195 disp = XEXP (disp, 0);
9199 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9200 of GOT tables. We should not need these anyway. */
9201 if (GET_CODE (disp) != UNSPEC
9202 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9203 && XINT (disp, 1) != UNSPEC_GOTOFF
9204 && XINT (disp, 1) != UNSPEC_PLTOFF))
9207 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9208 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9214 if (GET_CODE (disp) == PLUS)
9216 if (!CONST_INT_P (XEXP (disp, 1)))
9218 disp = XEXP (disp, 0);
9222 if (TARGET_MACHO && darwin_local_data_pic (disp))
9225 if (GET_CODE (disp) != UNSPEC)
9228 switch (XINT (disp, 1))
9233 /* We need to check for both symbols and labels because VxWorks loads
9234 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9236 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9237 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9239 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9240 While ABI specify also 32bit relocation but we don't produce it in
9241 small PIC model at all. */
9242 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9243 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9245 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9247 case UNSPEC_GOTTPOFF:
9248 case UNSPEC_GOTNTPOFF:
9249 case UNSPEC_INDNTPOFF:
9252 disp = XVECEXP (disp, 0, 0);
9253 return (GET_CODE (disp) == SYMBOL_REF
9254 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9256 disp = XVECEXP (disp, 0, 0);
9257 return (GET_CODE (disp) == SYMBOL_REF
9258 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9260 disp = XVECEXP (disp, 0, 0);
9261 return (GET_CODE (disp) == SYMBOL_REF
9262 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9268 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9269 memory address for an instruction. The MODE argument is the machine mode
9270 for the MEM expression that wants to use this address.
9272 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9273 convert common non-canonical forms to canonical form so that they will
9277 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9278 rtx addr, int strict)
9280 struct ix86_address parts;
9281 rtx base, index, disp;
9282 HOST_WIDE_INT scale;
9283 const char *reason = NULL;
9284 rtx reason_rtx = NULL_RTX;
9286 if (ix86_decompose_address (addr, &parts) <= 0)
9288 reason = "decomposition failed";
9293 index = parts.index;
9295 scale = parts.scale;
9297 /* Validate base register.
9299 Don't allow SUBREG's that span more than a word here. It can lead to spill
9300 failures when the base is one word out of a two word structure, which is
9301 represented internally as a DImode int. */
9310 else if (GET_CODE (base) == SUBREG
9311 && REG_P (SUBREG_REG (base))
9312 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9314 reg = SUBREG_REG (base);
9317 reason = "base is not a register";
9321 if (GET_MODE (base) != Pmode)
9323 reason = "base is not in Pmode";
9327 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9328 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9330 reason = "base is not valid";
9335 /* Validate index register.
9337 Don't allow SUBREG's that span more than a word here -- same as above. */
9346 else if (GET_CODE (index) == SUBREG
9347 && REG_P (SUBREG_REG (index))
9348 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9350 reg = SUBREG_REG (index);
9353 reason = "index is not a register";
9357 if (GET_MODE (index) != Pmode)
9359 reason = "index is not in Pmode";
9363 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9364 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9366 reason = "index is not valid";
9371 /* Validate scale factor. */
9374 reason_rtx = GEN_INT (scale);
9377 reason = "scale without index";
9381 if (scale != 2 && scale != 4 && scale != 8)
9383 reason = "scale is not a valid multiplier";
9388 /* Validate displacement. */
9393 if (GET_CODE (disp) == CONST
9394 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9395 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9396 switch (XINT (XEXP (disp, 0), 1))
9398 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9399 used. While ABI specify also 32bit relocations, we don't produce
9400 them at all and use IP relative instead. */
9403 gcc_assert (flag_pic);
9405 goto is_legitimate_pic;
9406 reason = "64bit address unspec";
9409 case UNSPEC_GOTPCREL:
9410 gcc_assert (flag_pic);
9411 goto is_legitimate_pic;
9413 case UNSPEC_GOTTPOFF:
9414 case UNSPEC_GOTNTPOFF:
9415 case UNSPEC_INDNTPOFF:
9421 reason = "invalid address unspec";
9425 else if (SYMBOLIC_CONST (disp)
9429 && MACHOPIC_INDIRECT
9430 && !machopic_operand_p (disp)
9436 if (TARGET_64BIT && (index || base))
9438 /* foo@dtpoff(%rX) is ok. */
9439 if (GET_CODE (disp) != CONST
9440 || GET_CODE (XEXP (disp, 0)) != PLUS
9441 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9442 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9443 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9444 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9446 reason = "non-constant pic memory reference";
9450 else if (! legitimate_pic_address_disp_p (disp))
9452 reason = "displacement is an invalid pic construct";
9456 /* This code used to verify that a symbolic pic displacement
9457 includes the pic_offset_table_rtx register.
9459 While this is good idea, unfortunately these constructs may
9460 be created by "adds using lea" optimization for incorrect
9469 This code is nonsensical, but results in addressing
9470 GOT table with pic_offset_table_rtx base. We can't
9471 just refuse it easily, since it gets matched by
9472 "addsi3" pattern, that later gets split to lea in the
9473 case output register differs from input. While this
9474 can be handled by separate addsi pattern for this case
9475 that never results in lea, this seems to be easier and
9476 correct fix for crash to disable this test. */
9478 else if (GET_CODE (disp) != LABEL_REF
9479 && !CONST_INT_P (disp)
9480 && (GET_CODE (disp) != CONST
9481 || !legitimate_constant_p (disp))
9482 && (GET_CODE (disp) != SYMBOL_REF
9483 || !legitimate_constant_p (disp)))
9485 reason = "displacement is not constant";
9488 else if (TARGET_64BIT
9489 && !x86_64_immediate_operand (disp, VOIDmode))
9491 reason = "displacement is out of range";
9496 /* Everything looks valid. */
9503 /* Return a unique alias set for the GOT. */
9505 static alias_set_type
9506 ix86_GOT_alias_set (void)
9508 static alias_set_type set = -1;
9510 set = new_alias_set ();
9514 /* Return a legitimate reference for ORIG (an address) using the
9515 register REG. If REG is 0, a new pseudo is generated.
9517 There are two types of references that must be handled:
9519 1. Global data references must load the address from the GOT, via
9520 the PIC reg. An insn is emitted to do this load, and the reg is
9523 2. Static data references, constant pool addresses, and code labels
9524 compute the address as an offset from the GOT, whose base is in
9525 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9526 differentiate them from global data objects. The returned
9527 address is the PIC reg + an unspec constant.
9529 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9530 reg also appears in the address. */
9533 legitimize_pic_address (rtx orig, rtx reg)
9540 if (TARGET_MACHO && !TARGET_64BIT)
9543 reg = gen_reg_rtx (Pmode);
9544 /* Use the generic Mach-O PIC machinery. */
9545 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9549 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9551 else if (TARGET_64BIT
9552 && ix86_cmodel != CM_SMALL_PIC
9553 && gotoff_operand (addr, Pmode))
9556 /* This symbol may be referenced via a displacement from the PIC
9557 base address (@GOTOFF). */
9559 if (reload_in_progress)
9560 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9561 if (GET_CODE (addr) == CONST)
9562 addr = XEXP (addr, 0);
9563 if (GET_CODE (addr) == PLUS)
9565 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9567 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9570 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9571 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9573 tmpreg = gen_reg_rtx (Pmode);
9576 emit_move_insn (tmpreg, new_rtx);
9580 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9581 tmpreg, 1, OPTAB_DIRECT);
9584 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9586 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9588 /* This symbol may be referenced via a displacement from the PIC
9589 base address (@GOTOFF). */
9591 if (reload_in_progress)
9592 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9593 if (GET_CODE (addr) == CONST)
9594 addr = XEXP (addr, 0);
9595 if (GET_CODE (addr) == PLUS)
9597 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9599 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9602 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9603 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9604 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9608 emit_move_insn (reg, new_rtx);
9612 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9613 /* We can't use @GOTOFF for text labels on VxWorks;
9614 see gotoff_operand. */
9615 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9617 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9619 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9620 return legitimize_dllimport_symbol (addr, true);
9621 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9622 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9623 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9625 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9626 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9630 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9632 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9633 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9634 new_rtx = gen_const_mem (Pmode, new_rtx);
9635 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9638 reg = gen_reg_rtx (Pmode);
9639 /* Use directly gen_movsi, otherwise the address is loaded
9640 into register for CSE. We don't want to CSE this addresses,
9641 instead we CSE addresses from the GOT table, so skip this. */
9642 emit_insn (gen_movsi (reg, new_rtx));
9647 /* This symbol must be referenced via a load from the
9648 Global Offset Table (@GOT). */
9650 if (reload_in_progress)
9651 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9652 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9653 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9655 new_rtx = force_reg (Pmode, new_rtx);
9656 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9657 new_rtx = gen_const_mem (Pmode, new_rtx);
9658 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9661 reg = gen_reg_rtx (Pmode);
9662 emit_move_insn (reg, new_rtx);
9668 if (CONST_INT_P (addr)
9669 && !x86_64_immediate_operand (addr, VOIDmode))
9673 emit_move_insn (reg, addr);
9677 new_rtx = force_reg (Pmode, addr);
9679 else if (GET_CODE (addr) == CONST)
9681 addr = XEXP (addr, 0);
9683 /* We must match stuff we generate before. Assume the only
9684 unspecs that can get here are ours. Not that we could do
9685 anything with them anyway.... */
9686 if (GET_CODE (addr) == UNSPEC
9687 || (GET_CODE (addr) == PLUS
9688 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9690 gcc_assert (GET_CODE (addr) == PLUS);
9692 if (GET_CODE (addr) == PLUS)
9694 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9696 /* Check first to see if this is a constant offset from a @GOTOFF
9697 symbol reference. */
9698 if (gotoff_operand (op0, Pmode)
9699 && CONST_INT_P (op1))
9703 if (reload_in_progress)
9704 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9705 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9707 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9708 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9709 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9713 emit_move_insn (reg, new_rtx);
9719 if (INTVAL (op1) < -16*1024*1024
9720 || INTVAL (op1) >= 16*1024*1024)
9722 if (!x86_64_immediate_operand (op1, Pmode))
9723 op1 = force_reg (Pmode, op1);
9724 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9730 base = legitimize_pic_address (XEXP (addr, 0), reg);
9731 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9732 base == reg ? NULL_RTX : reg);
9734 if (CONST_INT_P (new_rtx))
9735 new_rtx = plus_constant (base, INTVAL (new_rtx));
9738 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9740 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9741 new_rtx = XEXP (new_rtx, 1);
9743 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9751 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9754 get_thread_pointer (int to_reg)
9758 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9762 reg = gen_reg_rtx (Pmode);
9763 insn = gen_rtx_SET (VOIDmode, reg, tp);
9764 insn = emit_insn (insn);
9769 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
9770 false if we expect this to be used for a memory address and true if
9771 we expect to load the address into a register. */
9774 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9776 rtx dest, base, off, pic, tp;
9781 case TLS_MODEL_GLOBAL_DYNAMIC:
9782 dest = gen_reg_rtx (Pmode);
9783 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9785 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9787 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9790 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9791 insns = get_insns ();
9794 RTL_CONST_CALL_P (insns) = 1;
9795 emit_libcall_block (insns, dest, rax, x);
9797 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9798 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9800 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9802 if (TARGET_GNU2_TLS)
9804 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9806 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9810 case TLS_MODEL_LOCAL_DYNAMIC:
9811 base = gen_reg_rtx (Pmode);
9812 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9814 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9816 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9819 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9820 insns = get_insns ();
9823 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9824 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9825 RTL_CONST_CALL_P (insns) = 1;
9826 emit_libcall_block (insns, base, rax, note);
9828 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9829 emit_insn (gen_tls_local_dynamic_base_64 (base));
9831 emit_insn (gen_tls_local_dynamic_base_32 (base));
9833 if (TARGET_GNU2_TLS)
9835 rtx x = ix86_tls_module_base ();
9837 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9838 gen_rtx_MINUS (Pmode, x, tp));
9841 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9842 off = gen_rtx_CONST (Pmode, off);
9844 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9846 if (TARGET_GNU2_TLS)
9848 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9850 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9855 case TLS_MODEL_INITIAL_EXEC:
9859 type = UNSPEC_GOTNTPOFF;
9863 if (reload_in_progress)
9864 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9865 pic = pic_offset_table_rtx;
9866 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9868 else if (!TARGET_ANY_GNU_TLS)
9870 pic = gen_reg_rtx (Pmode);
9871 emit_insn (gen_set_got (pic));
9872 type = UNSPEC_GOTTPOFF;
9877 type = UNSPEC_INDNTPOFF;
9880 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9881 off = gen_rtx_CONST (Pmode, off);
9883 off = gen_rtx_PLUS (Pmode, pic, off);
9884 off = gen_const_mem (Pmode, off);
9885 set_mem_alias_set (off, ix86_GOT_alias_set ());
9887 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9889 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9890 off = force_reg (Pmode, off);
9891 return gen_rtx_PLUS (Pmode, base, off);
9895 base = get_thread_pointer (true);
9896 dest = gen_reg_rtx (Pmode);
9897 emit_insn (gen_subsi3 (dest, base, off));
9901 case TLS_MODEL_LOCAL_EXEC:
9902 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9903 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9904 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9905 off = gen_rtx_CONST (Pmode, off);
9907 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9909 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9910 return gen_rtx_PLUS (Pmode, base, off);
9914 base = get_thread_pointer (true);
9915 dest = gen_reg_rtx (Pmode);
9916 emit_insn (gen_subsi3 (dest, base, off));
9927 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9930 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9931 htab_t dllimport_map;
9934 get_dllimport_decl (tree decl)
9936 struct tree_map *h, in;
9940 size_t namelen, prefixlen;
9946 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9948 in.hash = htab_hash_pointer (decl);
9949 in.base.from = decl;
9950 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9951 h = (struct tree_map *) *loc;
9955 *loc = h = GGC_NEW (struct tree_map);
9957 h->base.from = decl;
9958 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9959 DECL_ARTIFICIAL (to) = 1;
9960 DECL_IGNORED_P (to) = 1;
9961 DECL_EXTERNAL (to) = 1;
9962 TREE_READONLY (to) = 1;
9964 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9965 name = targetm.strip_name_encoding (name);
9966 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9967 ? "*__imp_" : "*__imp__";
9968 namelen = strlen (name);
9969 prefixlen = strlen (prefix);
9970 imp_name = (char *) alloca (namelen + prefixlen + 1);
9971 memcpy (imp_name, prefix, prefixlen);
9972 memcpy (imp_name + prefixlen, name, namelen + 1);
9974 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9975 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9976 SET_SYMBOL_REF_DECL (rtl, to);
9977 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9979 rtl = gen_const_mem (Pmode, rtl);
9980 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9982 SET_DECL_RTL (to, rtl);
9983 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9988 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9989 true if we require the result be a register. */
9992 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9997 gcc_assert (SYMBOL_REF_DECL (symbol));
9998 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10000 x = DECL_RTL (imp_decl);
10002 x = force_reg (Pmode, x);
10006 /* Try machine-dependent ways of modifying an illegitimate address
10007 to be legitimate. If we find one, return the new, valid address.
10008 This macro is used in only one place: `memory_address' in explow.c.
10010 OLDX is the address as it was before break_out_memory_refs was called.
10011 In some cases it is useful to look at this to decide what needs to be done.
10013 MODE and WIN are passed so that this macro can use
10014 GO_IF_LEGITIMATE_ADDRESS.
10016 It is always safe for this macro to do nothing. It exists to recognize
10017 opportunities to optimize the output.
10019 For the 80386, we handle X+REG by loading X into a register R and
10020 using R+REG. R will go in a general reg and indexing will be used.
10021 However, if REG is a broken-out memory address or multiplication,
10022 nothing needs to be done because REG can certainly go in a general reg.
10024 When -fpic is used, special handling is needed for symbolic references.
10025 See comments by legitimize_pic_address in i386.c for details. */
10028 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10029 enum machine_mode mode)
10034 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10036 return legitimize_tls_address (x, (enum tls_model) log, false);
10037 if (GET_CODE (x) == CONST
10038 && GET_CODE (XEXP (x, 0)) == PLUS
10039 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10040 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10042 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10043 (enum tls_model) log, false);
10044 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10047 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10049 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10050 return legitimize_dllimport_symbol (x, true);
10051 if (GET_CODE (x) == CONST
10052 && GET_CODE (XEXP (x, 0)) == PLUS
10053 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10054 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10056 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10057 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10061 if (flag_pic && SYMBOLIC_CONST (x))
10062 return legitimize_pic_address (x, 0);
10064 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10065 if (GET_CODE (x) == ASHIFT
10066 && CONST_INT_P (XEXP (x, 1))
10067 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10070 log = INTVAL (XEXP (x, 1));
10071 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10072 GEN_INT (1 << log));
10075 if (GET_CODE (x) == PLUS)
10077 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10079 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10080 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10081 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10084 log = INTVAL (XEXP (XEXP (x, 0), 1));
10085 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10086 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10087 GEN_INT (1 << log));
10090 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10091 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10092 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10095 log = INTVAL (XEXP (XEXP (x, 1), 1));
10096 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10097 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10098 GEN_INT (1 << log));
10101 /* Put multiply first if it isn't already. */
10102 if (GET_CODE (XEXP (x, 1)) == MULT)
10104 rtx tmp = XEXP (x, 0);
10105 XEXP (x, 0) = XEXP (x, 1);
10110 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10111 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10112 created by virtual register instantiation, register elimination, and
10113 similar optimizations. */
10114 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10117 x = gen_rtx_PLUS (Pmode,
10118 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10119 XEXP (XEXP (x, 1), 0)),
10120 XEXP (XEXP (x, 1), 1));
10124 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10125 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10126 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10127 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10128 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10129 && CONSTANT_P (XEXP (x, 1)))
10132 rtx other = NULL_RTX;
10134 if (CONST_INT_P (XEXP (x, 1)))
10136 constant = XEXP (x, 1);
10137 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10139 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10141 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10142 other = XEXP (x, 1);
10150 x = gen_rtx_PLUS (Pmode,
10151 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10152 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10153 plus_constant (other, INTVAL (constant)));
10157 if (changed && legitimate_address_p (mode, x, FALSE))
10160 if (GET_CODE (XEXP (x, 0)) == MULT)
10163 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10166 if (GET_CODE (XEXP (x, 1)) == MULT)
10169 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10173 && REG_P (XEXP (x, 1))
10174 && REG_P (XEXP (x, 0)))
10177 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10180 x = legitimize_pic_address (x, 0);
10183 if (changed && legitimate_address_p (mode, x, FALSE))
10186 if (REG_P (XEXP (x, 0)))
10188 rtx temp = gen_reg_rtx (Pmode);
10189 rtx val = force_operand (XEXP (x, 1), temp);
10191 emit_move_insn (temp, val);
10193 XEXP (x, 1) = temp;
10197 else if (REG_P (XEXP (x, 1)))
10199 rtx temp = gen_reg_rtx (Pmode);
10200 rtx val = force_operand (XEXP (x, 0), temp);
10202 emit_move_insn (temp, val);
10204 XEXP (x, 0) = temp;
10212 /* Print an integer constant expression in assembler syntax. Addition
10213 and subtraction are the only arithmetic that may appear in these
10214 expressions. FILE is the stdio stream to write to, X is the rtx, and
10215 CODE is the operand print code from the output string. */
10218 output_pic_addr_const (FILE *file, rtx x, int code)
10222 switch (GET_CODE (x))
10225 gcc_assert (flag_pic);
10230 if (! TARGET_MACHO || TARGET_64BIT)
10231 output_addr_const (file, x);
10234 const char *name = XSTR (x, 0);
10236 /* Mark the decl as referenced so that cgraph will
10237 output the function. */
10238 if (SYMBOL_REF_DECL (x))
10239 mark_decl_referenced (SYMBOL_REF_DECL (x));
10242 if (MACHOPIC_INDIRECT
10243 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10244 name = machopic_indirection_name (x, /*stub_p=*/true);
10246 assemble_name (file, name);
10248 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10249 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10250 fputs ("@PLT", file);
10257 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10258 assemble_name (asm_out_file, buf);
10262 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10266 /* This used to output parentheses around the expression,
10267 but that does not work on the 386 (either ATT or BSD assembler). */
10268 output_pic_addr_const (file, XEXP (x, 0), code);
10272 if (GET_MODE (x) == VOIDmode)
10274 /* We can use %d if the number is <32 bits and positive. */
10275 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10276 fprintf (file, "0x%lx%08lx",
10277 (unsigned long) CONST_DOUBLE_HIGH (x),
10278 (unsigned long) CONST_DOUBLE_LOW (x));
10280 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10283 /* We can't handle floating point constants;
10284 PRINT_OPERAND must handle them. */
10285 output_operand_lossage ("floating constant misused");
10289 /* Some assemblers need integer constants to appear first. */
10290 if (CONST_INT_P (XEXP (x, 0)))
10292 output_pic_addr_const (file, XEXP (x, 0), code);
10294 output_pic_addr_const (file, XEXP (x, 1), code);
10298 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10299 output_pic_addr_const (file, XEXP (x, 1), code);
10301 output_pic_addr_const (file, XEXP (x, 0), code);
10307 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10308 output_pic_addr_const (file, XEXP (x, 0), code);
10310 output_pic_addr_const (file, XEXP (x, 1), code);
10312 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10316 gcc_assert (XVECLEN (x, 0) == 1);
10317 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10318 switch (XINT (x, 1))
10321 fputs ("@GOT", file);
10323 case UNSPEC_GOTOFF:
10324 fputs ("@GOTOFF", file);
10326 case UNSPEC_PLTOFF:
10327 fputs ("@PLTOFF", file);
10329 case UNSPEC_GOTPCREL:
10330 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10331 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10333 case UNSPEC_GOTTPOFF:
10334 /* FIXME: This might be @TPOFF in Sun ld too. */
10335 fputs ("@GOTTPOFF", file);
10338 fputs ("@TPOFF", file);
10340 case UNSPEC_NTPOFF:
10342 fputs ("@TPOFF", file);
10344 fputs ("@NTPOFF", file);
10346 case UNSPEC_DTPOFF:
10347 fputs ("@DTPOFF", file);
10349 case UNSPEC_GOTNTPOFF:
10351 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10352 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10354 fputs ("@GOTNTPOFF", file);
10356 case UNSPEC_INDNTPOFF:
10357 fputs ("@INDNTPOFF", file);
10360 case UNSPEC_MACHOPIC_OFFSET:
10362 machopic_output_function_base_name (file);
10366 output_operand_lossage ("invalid UNSPEC as operand");
10372 output_operand_lossage ("invalid expression as operand");
10376 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10377 We need to emit DTP-relative relocations. */
10379 static void ATTRIBUTE_UNUSED
10380 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10382 fputs (ASM_LONG, file);
10383 output_addr_const (file, x);
10384 fputs ("@DTPOFF", file);
10390 fputs (", 0", file);
10393 gcc_unreachable ();
10397 /* Return true if X is a representation of the PIC register. This copes
10398 with calls from ix86_find_base_term, where the register might have
10399 been replaced by a cselib value. */
10402 ix86_pic_register_p (rtx x)
10404 if (GET_CODE (x) == VALUE)
10405 return (pic_offset_table_rtx
10406 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10408 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10411 /* In the name of slightly smaller debug output, and to cater to
10412 general assembler lossage, recognize PIC+GOTOFF and turn it back
10413 into a direct symbol reference.
10415 On Darwin, this is necessary to avoid a crash, because Darwin
10416 has a different PIC label for each routine but the DWARF debugging
10417 information is not associated with any particular routine, so it's
10418 necessary to remove references to the PIC label from RTL stored by
10419 the DWARF output code. */
10422 ix86_delegitimize_address (rtx orig_x)
10425 /* reg_addend is NULL or a multiple of some register. */
10426 rtx reg_addend = NULL_RTX;
10427 /* const_addend is NULL or a const_int. */
10428 rtx const_addend = NULL_RTX;
10429 /* This is the result, or NULL. */
10430 rtx result = NULL_RTX;
10437 if (GET_CODE (x) != CONST
10438 || GET_CODE (XEXP (x, 0)) != UNSPEC
10439 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10440 || !MEM_P (orig_x))
10442 return XVECEXP (XEXP (x, 0), 0, 0);
10445 if (GET_CODE (x) != PLUS
10446 || GET_CODE (XEXP (x, 1)) != CONST)
10449 if (ix86_pic_register_p (XEXP (x, 0)))
10450 /* %ebx + GOT/GOTOFF */
10452 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10454 /* %ebx + %reg * scale + GOT/GOTOFF */
10455 reg_addend = XEXP (x, 0);
10456 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10457 reg_addend = XEXP (reg_addend, 1);
10458 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10459 reg_addend = XEXP (reg_addend, 0);
10462 if (!REG_P (reg_addend)
10463 && GET_CODE (reg_addend) != MULT
10464 && GET_CODE (reg_addend) != ASHIFT)
10470 x = XEXP (XEXP (x, 1), 0);
10471 if (GET_CODE (x) == PLUS
10472 && CONST_INT_P (XEXP (x, 1)))
10474 const_addend = XEXP (x, 1);
10478 if (GET_CODE (x) == UNSPEC
10479 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10480 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10481 result = XVECEXP (x, 0, 0);
10483 if (TARGET_MACHO && darwin_local_data_pic (x)
10484 && !MEM_P (orig_x))
10485 result = XVECEXP (x, 0, 0);
10491 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10493 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10497 /* If X is a machine specific address (i.e. a symbol or label being
10498 referenced as a displacement from the GOT implemented using an
10499 UNSPEC), then return the base term. Otherwise return X. */
10502 ix86_find_base_term (rtx x)
10508 if (GET_CODE (x) != CONST)
10510 term = XEXP (x, 0);
10511 if (GET_CODE (term) == PLUS
10512 && (CONST_INT_P (XEXP (term, 1))
10513 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10514 term = XEXP (term, 0);
10515 if (GET_CODE (term) != UNSPEC
10516 || XINT (term, 1) != UNSPEC_GOTPCREL)
10519 return XVECEXP (term, 0, 0);
10522 return ix86_delegitimize_address (x);
10526 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10527 int fp, FILE *file)
10529 const char *suffix;
10531 if (mode == CCFPmode || mode == CCFPUmode)
10533 enum rtx_code second_code, bypass_code;
10534 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10535 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10536 code = ix86_fp_compare_code_to_integer (code);
10540 code = reverse_condition (code);
10591 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10595 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10596 Those same assemblers have the same but opposite lossage on cmov. */
10597 if (mode == CCmode)
10598 suffix = fp ? "nbe" : "a";
10599 else if (mode == CCCmode)
10602 gcc_unreachable ();
10618 gcc_unreachable ();
10622 gcc_assert (mode == CCmode || mode == CCCmode);
10639 gcc_unreachable ();
10643 /* ??? As above. */
10644 gcc_assert (mode == CCmode || mode == CCCmode);
10645 suffix = fp ? "nb" : "ae";
10648 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10652 /* ??? As above. */
10653 if (mode == CCmode)
10655 else if (mode == CCCmode)
10656 suffix = fp ? "nb" : "ae";
10658 gcc_unreachable ();
10661 suffix = fp ? "u" : "p";
10664 suffix = fp ? "nu" : "np";
10667 gcc_unreachable ();
10669 fputs (suffix, file);
10672 /* Print the name of register X to FILE based on its machine mode and number.
10673 If CODE is 'w', pretend the mode is HImode.
10674 If CODE is 'b', pretend the mode is QImode.
10675 If CODE is 'k', pretend the mode is SImode.
10676 If CODE is 'q', pretend the mode is DImode.
10677 If CODE is 'x', pretend the mode is V4SFmode.
10678 If CODE is 't', pretend the mode is V8SFmode.
10679 If CODE is 'h', pretend the reg is the 'high' byte register.
10680 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10681 If CODE is 'd', duplicate the operand for AVX instruction.
10685 print_reg (rtx x, int code, FILE *file)
10688 bool duplicated = code == 'd' && TARGET_AVX;
10690 gcc_assert (x == pc_rtx
10691 || (REGNO (x) != ARG_POINTER_REGNUM
10692 && REGNO (x) != FRAME_POINTER_REGNUM
10693 && REGNO (x) != FLAGS_REG
10694 && REGNO (x) != FPSR_REG
10695 && REGNO (x) != FPCR_REG));
10697 if (ASSEMBLER_DIALECT == ASM_ATT)
10702 gcc_assert (TARGET_64BIT);
10703 fputs ("rip", file);
10707 if (code == 'w' || MMX_REG_P (x))
10709 else if (code == 'b')
10711 else if (code == 'k')
10713 else if (code == 'q')
10715 else if (code == 'y')
10717 else if (code == 'h')
10719 else if (code == 'x')
10721 else if (code == 't')
10724 code = GET_MODE_SIZE (GET_MODE (x));
10726 /* Irritatingly, AMD extended registers use different naming convention
10727 from the normal registers. */
10728 if (REX_INT_REG_P (x))
10730 gcc_assert (TARGET_64BIT);
10734 error ("extended registers have no high halves");
10737 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10740 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10743 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10746 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10749 error ("unsupported operand size for extended register");
10759 if (STACK_TOP_P (x))
10768 if (! ANY_FP_REG_P (x))
10769 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10774 reg = hi_reg_name[REGNO (x)];
10777 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10779 reg = qi_reg_name[REGNO (x)];
10782 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10784 reg = qi_high_reg_name[REGNO (x)];
10789 gcc_assert (!duplicated);
10791 fputs (hi_reg_name[REGNO (x)] + 1, file);
10796 gcc_unreachable ();
10802 if (ASSEMBLER_DIALECT == ASM_ATT)
10803 fprintf (file, ", %%%s", reg);
10805 fprintf (file, ", %s", reg);
10809 /* Locate some local-dynamic symbol still in use by this function
10810 so that we can print its name in some tls_local_dynamic_base
10814 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10818 if (GET_CODE (x) == SYMBOL_REF
10819 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10821 cfun->machine->some_ld_name = XSTR (x, 0);
10828 static const char *
10829 get_some_local_dynamic_name (void)
10833 if (cfun->machine->some_ld_name)
10834 return cfun->machine->some_ld_name;
10836 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10838 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10839 return cfun->machine->some_ld_name;
10841 gcc_unreachable ();
10844 /* Meaning of CODE:
10845 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10846 C -- print opcode suffix for set/cmov insn.
10847 c -- like C, but print reversed condition
10848 E,e -- likewise, but for compare-and-branch fused insn.
10849 F,f -- likewise, but for floating-point.
10850 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10852 R -- print the prefix for register names.
10853 z -- print the opcode suffix for the size of the current operand.
10854 Z -- likewise, with special suffixes for x87 instructions.
10855 * -- print a star (in certain assembler syntax)
10856 A -- print an absolute memory reference.
10857 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10858 s -- print a shift double count, followed by the assemblers argument
10860 b -- print the QImode name of the register for the indicated operand.
10861 %b0 would print %al if operands[0] is reg 0.
10862 w -- likewise, print the HImode name of the register.
10863 k -- likewise, print the SImode name of the register.
10864 q -- likewise, print the DImode name of the register.
10865 x -- likewise, print the V4SFmode name of the register.
10866 t -- likewise, print the V8SFmode name of the register.
10867 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10868 y -- print "st(0)" instead of "st" as a register.
10869 d -- print duplicated register operand for AVX instruction.
10870 D -- print condition for SSE cmp instruction.
10871 P -- if PIC, print an @PLT suffix.
10872 X -- don't print any sort of PIC '@' suffix for a symbol.
10873 & -- print some in-use local-dynamic symbol name.
10874 H -- print a memory address offset by 8; used for sse high-parts
10875 Y -- print condition for SSE5 com* instruction.
10876 + -- print a branch hint as 'cs' or 'ds' prefix
10877 ; -- print a semicolon (after prefixes due to bug in older gas).
10881 print_operand (FILE *file, rtx x, int code)
10888 if (ASSEMBLER_DIALECT == ASM_ATT)
10893 assemble_name (file, get_some_local_dynamic_name ());
10897 switch (ASSEMBLER_DIALECT)
10904 /* Intel syntax. For absolute addresses, registers should not
10905 be surrounded by braces. */
10909 PRINT_OPERAND (file, x, 0);
10916 gcc_unreachable ();
10919 PRINT_OPERAND (file, x, 0);
10924 if (ASSEMBLER_DIALECT == ASM_ATT)
10929 if (ASSEMBLER_DIALECT == ASM_ATT)
10934 if (ASSEMBLER_DIALECT == ASM_ATT)
10939 if (ASSEMBLER_DIALECT == ASM_ATT)
10944 if (ASSEMBLER_DIALECT == ASM_ATT)
10949 if (ASSEMBLER_DIALECT == ASM_ATT)
10954 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10956 /* Opcodes don't get size suffixes if using Intel opcodes. */
10957 if (ASSEMBLER_DIALECT == ASM_INTEL)
10960 switch (GET_MODE_SIZE (GET_MODE (x)))
10979 output_operand_lossage
10980 ("invalid operand size for operand code '%c'", code);
10985 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10987 (0, "non-integer operand used with operand code '%c'", code);
10991 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
10992 if (ASSEMBLER_DIALECT == ASM_INTEL)
10995 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
10997 switch (GET_MODE_SIZE (GET_MODE (x)))
11000 #ifdef HAVE_AS_IX86_FILDS
11010 #ifdef HAVE_AS_IX86_FILDQ
11013 fputs ("ll", file);
11021 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11023 /* 387 opcodes don't get size suffixes
11024 if the operands are registers. */
11025 if (STACK_REG_P (x))
11028 switch (GET_MODE_SIZE (GET_MODE (x)))
11049 output_operand_lossage
11050 ("invalid operand type used with operand code '%c'", code);
11054 output_operand_lossage
11055 ("invalid operand size for operand code '%c'", code);
11072 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11074 PRINT_OPERAND (file, x, 0);
11075 fputs (", ", file);
11080 /* Little bit of braindamage here. The SSE compare instructions
11081 does use completely different names for the comparisons that the
11082 fp conditional moves. */
11085 switch (GET_CODE (x))
11088 fputs ("eq", file);
11091 fputs ("eq_us", file);
11094 fputs ("lt", file);
11097 fputs ("nge", file);
11100 fputs ("le", file);
11103 fputs ("ngt", file);
11106 fputs ("unord", file);
11109 fputs ("neq", file);
11112 fputs ("neq_oq", file);
11115 fputs ("ge", file);
11118 fputs ("nlt", file);
11121 fputs ("gt", file);
11124 fputs ("nle", file);
11127 fputs ("ord", file);
11130 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11136 switch (GET_CODE (x))
11140 fputs ("eq", file);
11144 fputs ("lt", file);
11148 fputs ("le", file);
11151 fputs ("unord", file);
11155 fputs ("neq", file);
11159 fputs ("nlt", file);
11163 fputs ("nle", file);
11166 fputs ("ord", file);
11169 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11175 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11176 if (ASSEMBLER_DIALECT == ASM_ATT)
11178 switch (GET_MODE (x))
11180 case HImode: putc ('w', file); break;
11182 case SFmode: putc ('l', file); break;
11184 case DFmode: putc ('q', file); break;
11185 default: gcc_unreachable ();
11192 if (!COMPARISON_P (x))
11194 output_operand_lossage ("operand is neither a constant nor a "
11195 "condition code, invalid operand code "
11199 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11202 if (!COMPARISON_P (x))
11204 output_operand_lossage ("operand is neither a constant nor a "
11205 "condition code, invalid operand code "
11209 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11210 if (ASSEMBLER_DIALECT == ASM_ATT)
11213 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11216 /* Like above, but reverse condition */
11218 /* Check to see if argument to %c is really a constant
11219 and not a condition code which needs to be reversed. */
11220 if (!COMPARISON_P (x))
11222 output_operand_lossage ("operand is neither a constant nor a "
11223 "condition code, invalid operand "
11227 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11230 if (!COMPARISON_P (x))
11232 output_operand_lossage ("operand is neither a constant nor a "
11233 "condition code, invalid operand "
11237 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11238 if (ASSEMBLER_DIALECT == ASM_ATT)
11241 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11245 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11249 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11253 /* It doesn't actually matter what mode we use here, as we're
11254 only going to use this for printing. */
11255 x = adjust_address_nv (x, DImode, 8);
11263 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11266 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11269 int pred_val = INTVAL (XEXP (x, 0));
11271 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11272 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11274 int taken = pred_val > REG_BR_PROB_BASE / 2;
11275 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11277 /* Emit hints only in the case default branch prediction
11278 heuristics would fail. */
11279 if (taken != cputaken)
11281 /* We use 3e (DS) prefix for taken branches and
11282 2e (CS) prefix for not taken branches. */
11284 fputs ("ds ; ", file);
11286 fputs ("cs ; ", file);
11294 switch (GET_CODE (x))
11297 fputs ("neq", file);
11300 fputs ("eq", file);
11304 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11308 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11312 fputs ("le", file);
11316 fputs ("lt", file);
11319 fputs ("unord", file);
11322 fputs ("ord", file);
11325 fputs ("ueq", file);
11328 fputs ("nlt", file);
11331 fputs ("nle", file);
11334 fputs ("ule", file);
11337 fputs ("ult", file);
11340 fputs ("une", file);
11343 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11350 fputs (" ; ", file);
11357 output_operand_lossage ("invalid operand code '%c'", code);
11362 print_reg (x, code, file);
11364 else if (MEM_P (x))
11366 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11367 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11368 && GET_MODE (x) != BLKmode)
11371 switch (GET_MODE_SIZE (GET_MODE (x)))
11373 case 1: size = "BYTE"; break;
11374 case 2: size = "WORD"; break;
11375 case 4: size = "DWORD"; break;
11376 case 8: size = "QWORD"; break;
11377 case 12: size = "XWORD"; break;
11379 if (GET_MODE (x) == XFmode)
11385 gcc_unreachable ();
11388 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11391 else if (code == 'w')
11393 else if (code == 'k')
11396 fputs (size, file);
11397 fputs (" PTR ", file);
11401 /* Avoid (%rip) for call operands. */
11402 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11403 && !CONST_INT_P (x))
11404 output_addr_const (file, x);
11405 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11406 output_operand_lossage ("invalid constraints for operand");
11408 output_address (x);
11411 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11416 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11417 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11419 if (ASSEMBLER_DIALECT == ASM_ATT)
11421 fprintf (file, "0x%08lx", (long unsigned int) l);
11424 /* These float cases don't actually occur as immediate operands. */
11425 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11429 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11430 fprintf (file, "%s", dstr);
11433 else if (GET_CODE (x) == CONST_DOUBLE
11434 && GET_MODE (x) == XFmode)
11438 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11439 fprintf (file, "%s", dstr);
11444 /* We have patterns that allow zero sets of memory, for instance.
11445 In 64-bit mode, we should probably support all 8-byte vectors,
11446 since we can in fact encode that into an immediate. */
11447 if (GET_CODE (x) == CONST_VECTOR)
11449 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11455 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11457 if (ASSEMBLER_DIALECT == ASM_ATT)
11460 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11461 || GET_CODE (x) == LABEL_REF)
11463 if (ASSEMBLER_DIALECT == ASM_ATT)
11466 fputs ("OFFSET FLAT:", file);
11469 if (CONST_INT_P (x))
11470 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11472 output_pic_addr_const (file, x, code);
11474 output_addr_const (file, x);
11478 /* Print a memory operand whose address is ADDR. */
11481 print_operand_address (FILE *file, rtx addr)
11483 struct ix86_address parts;
11484 rtx base, index, disp;
11486 int ok = ix86_decompose_address (addr, &parts);
11491 index = parts.index;
11493 scale = parts.scale;
11501 if (ASSEMBLER_DIALECT == ASM_ATT)
11503 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11506 gcc_unreachable ();
11509 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11510 if (TARGET_64BIT && !base && !index)
11514 if (GET_CODE (disp) == CONST
11515 && GET_CODE (XEXP (disp, 0)) == PLUS
11516 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11517 symbol = XEXP (XEXP (disp, 0), 0);
11519 if (GET_CODE (symbol) == LABEL_REF
11520 || (GET_CODE (symbol) == SYMBOL_REF
11521 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11524 if (!base && !index)
11526 /* Displacement only requires special attention. */
11528 if (CONST_INT_P (disp))
11530 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11531 fputs ("ds:", file);
11532 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11535 output_pic_addr_const (file, disp, 0);
11537 output_addr_const (file, disp);
11541 if (ASSEMBLER_DIALECT == ASM_ATT)
11546 output_pic_addr_const (file, disp, 0);
11547 else if (GET_CODE (disp) == LABEL_REF)
11548 output_asm_label (disp);
11550 output_addr_const (file, disp);
11555 print_reg (base, 0, file);
11559 print_reg (index, 0, file);
11561 fprintf (file, ",%d", scale);
11567 rtx offset = NULL_RTX;
11571 /* Pull out the offset of a symbol; print any symbol itself. */
11572 if (GET_CODE (disp) == CONST
11573 && GET_CODE (XEXP (disp, 0)) == PLUS
11574 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11576 offset = XEXP (XEXP (disp, 0), 1);
11577 disp = gen_rtx_CONST (VOIDmode,
11578 XEXP (XEXP (disp, 0), 0));
11582 output_pic_addr_const (file, disp, 0);
11583 else if (GET_CODE (disp) == LABEL_REF)
11584 output_asm_label (disp);
11585 else if (CONST_INT_P (disp))
11588 output_addr_const (file, disp);
11594 print_reg (base, 0, file);
11597 if (INTVAL (offset) >= 0)
11599 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11603 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11610 print_reg (index, 0, file);
11612 fprintf (file, "*%d", scale);
11620 output_addr_const_extra (FILE *file, rtx x)
11624 if (GET_CODE (x) != UNSPEC)
11627 op = XVECEXP (x, 0, 0);
11628 switch (XINT (x, 1))
11630 case UNSPEC_GOTTPOFF:
11631 output_addr_const (file, op);
11632 /* FIXME: This might be @TPOFF in Sun ld. */
11633 fputs ("@GOTTPOFF", file);
11636 output_addr_const (file, op);
11637 fputs ("@TPOFF", file);
11639 case UNSPEC_NTPOFF:
11640 output_addr_const (file, op);
11642 fputs ("@TPOFF", file);
11644 fputs ("@NTPOFF", file);
11646 case UNSPEC_DTPOFF:
11647 output_addr_const (file, op);
11648 fputs ("@DTPOFF", file);
11650 case UNSPEC_GOTNTPOFF:
11651 output_addr_const (file, op);
11653 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11654 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11656 fputs ("@GOTNTPOFF", file);
11658 case UNSPEC_INDNTPOFF:
11659 output_addr_const (file, op);
11660 fputs ("@INDNTPOFF", file);
11663 case UNSPEC_MACHOPIC_OFFSET:
11664 output_addr_const (file, op);
11666 machopic_output_function_base_name (file);
11677 /* Split one or more DImode RTL references into pairs of SImode
11678 references. The RTL can be REG, offsettable MEM, integer constant, or
11679 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11680 split and "num" is its length. lo_half and hi_half are output arrays
11681 that parallel "operands". */
11684 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11688 rtx op = operands[num];
11690 /* simplify_subreg refuse to split volatile memory addresses,
11691 but we still have to handle it. */
11694 lo_half[num] = adjust_address (op, SImode, 0);
11695 hi_half[num] = adjust_address (op, SImode, 4);
11699 lo_half[num] = simplify_gen_subreg (SImode, op,
11700 GET_MODE (op) == VOIDmode
11701 ? DImode : GET_MODE (op), 0);
11702 hi_half[num] = simplify_gen_subreg (SImode, op,
11703 GET_MODE (op) == VOIDmode
11704 ? DImode : GET_MODE (op), 4);
11708 /* Split one or more TImode RTL references into pairs of DImode
11709 references. The RTL can be REG, offsettable MEM, integer constant, or
11710 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11711 split and "num" is its length. lo_half and hi_half are output arrays
11712 that parallel "operands". */
11715 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11719 rtx op = operands[num];
11721 /* simplify_subreg refuse to split volatile memory addresses, but we
11722 still have to handle it. */
11725 lo_half[num] = adjust_address (op, DImode, 0);
11726 hi_half[num] = adjust_address (op, DImode, 8);
11730 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11731 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11736 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11737 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11738 is the expression of the binary operation. The output may either be
11739 emitted here, or returned to the caller, like all output_* functions.
11741 There is no guarantee that the operands are the same mode, as they
11742 might be within FLOAT or FLOAT_EXTEND expressions. */
11744 #ifndef SYSV386_COMPAT
11745 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11746 wants to fix the assemblers because that causes incompatibility
11747 with gcc. No-one wants to fix gcc because that causes
11748 incompatibility with assemblers... You can use the option of
11749 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11750 #define SYSV386_COMPAT 1
11754 output_387_binary_op (rtx insn, rtx *operands)
11756 static char buf[40];
11759 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11761 #ifdef ENABLE_CHECKING
11762 /* Even if we do not want to check the inputs, this documents input
11763 constraints. Which helps in understanding the following code. */
11764 if (STACK_REG_P (operands[0])
11765 && ((REG_P (operands[1])
11766 && REGNO (operands[0]) == REGNO (operands[1])
11767 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11768 || (REG_P (operands[2])
11769 && REGNO (operands[0]) == REGNO (operands[2])
11770 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11771 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11774 gcc_assert (is_sse);
11777 switch (GET_CODE (operands[3]))
11780 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11781 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11789 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11790 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11798 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11799 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11807 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11808 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11816 gcc_unreachable ();
11823 strcpy (buf, ssep);
11824 if (GET_MODE (operands[0]) == SFmode)
11825 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11827 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11831 strcpy (buf, ssep + 1);
11832 if (GET_MODE (operands[0]) == SFmode)
11833 strcat (buf, "ss\t{%2, %0|%0, %2}");
11835 strcat (buf, "sd\t{%2, %0|%0, %2}");
11841 switch (GET_CODE (operands[3]))
11845 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11847 rtx temp = operands[2];
11848 operands[2] = operands[1];
11849 operands[1] = temp;
11852 /* know operands[0] == operands[1]. */
11854 if (MEM_P (operands[2]))
11860 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11862 if (STACK_TOP_P (operands[0]))
11863 /* How is it that we are storing to a dead operand[2]?
11864 Well, presumably operands[1] is dead too. We can't
11865 store the result to st(0) as st(0) gets popped on this
11866 instruction. Instead store to operands[2] (which I
11867 think has to be st(1)). st(1) will be popped later.
11868 gcc <= 2.8.1 didn't have this check and generated
11869 assembly code that the Unixware assembler rejected. */
11870 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11872 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11876 if (STACK_TOP_P (operands[0]))
11877 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11879 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11884 if (MEM_P (operands[1]))
11890 if (MEM_P (operands[2]))
11896 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11899 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11900 derived assemblers, confusingly reverse the direction of
11901 the operation for fsub{r} and fdiv{r} when the
11902 destination register is not st(0). The Intel assembler
11903 doesn't have this brain damage. Read !SYSV386_COMPAT to
11904 figure out what the hardware really does. */
11905 if (STACK_TOP_P (operands[0]))
11906 p = "{p\t%0, %2|rp\t%2, %0}";
11908 p = "{rp\t%2, %0|p\t%0, %2}";
11910 if (STACK_TOP_P (operands[0]))
11911 /* As above for fmul/fadd, we can't store to st(0). */
11912 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11914 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11919 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11922 if (STACK_TOP_P (operands[0]))
11923 p = "{rp\t%0, %1|p\t%1, %0}";
11925 p = "{p\t%1, %0|rp\t%0, %1}";
11927 if (STACK_TOP_P (operands[0]))
11928 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11930 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11935 if (STACK_TOP_P (operands[0]))
11937 if (STACK_TOP_P (operands[1]))
11938 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11940 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11943 else if (STACK_TOP_P (operands[1]))
11946 p = "{\t%1, %0|r\t%0, %1}";
11948 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11954 p = "{r\t%2, %0|\t%0, %2}";
11956 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11962 gcc_unreachable ();
11969 /* Return needed mode for entity in optimize_mode_switching pass. */
11972 ix86_mode_needed (int entity, rtx insn)
11974 enum attr_i387_cw mode;
11976 /* The mode UNINITIALIZED is used to store control word after a
11977 function call or ASM pattern. The mode ANY specify that function
11978 has no requirements on the control word and make no changes in the
11979 bits we are interested in. */
11982 || (NONJUMP_INSN_P (insn)
11983 && (asm_noperands (PATTERN (insn)) >= 0
11984 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11985 return I387_CW_UNINITIALIZED;
11987 if (recog_memoized (insn) < 0)
11988 return I387_CW_ANY;
11990 mode = get_attr_i387_cw (insn);
11995 if (mode == I387_CW_TRUNC)
12000 if (mode == I387_CW_FLOOR)
12005 if (mode == I387_CW_CEIL)
12010 if (mode == I387_CW_MASK_PM)
12015 gcc_unreachable ();
12018 return I387_CW_ANY;
12021 /* Output code to initialize control word copies used by trunc?f?i and
12022 rounding patterns. CURRENT_MODE is set to current control word,
12023 while NEW_MODE is set to new control word. */
12026 emit_i387_cw_initialization (int mode)
12028 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12031 enum ix86_stack_slot slot;
12033 rtx reg = gen_reg_rtx (HImode);
12035 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12036 emit_move_insn (reg, copy_rtx (stored_mode));
12038 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12039 || optimize_function_for_size_p (cfun))
12043 case I387_CW_TRUNC:
12044 /* round toward zero (truncate) */
12045 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12046 slot = SLOT_CW_TRUNC;
12049 case I387_CW_FLOOR:
12050 /* round down toward -oo */
12051 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12052 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12053 slot = SLOT_CW_FLOOR;
12057 /* round up toward +oo */
12058 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12059 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12060 slot = SLOT_CW_CEIL;
12063 case I387_CW_MASK_PM:
12064 /* mask precision exception for nearbyint() */
12065 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12066 slot = SLOT_CW_MASK_PM;
12070 gcc_unreachable ();
12077 case I387_CW_TRUNC:
12078 /* round toward zero (truncate) */
12079 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12080 slot = SLOT_CW_TRUNC;
12083 case I387_CW_FLOOR:
12084 /* round down toward -oo */
12085 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12086 slot = SLOT_CW_FLOOR;
12090 /* round up toward +oo */
12091 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12092 slot = SLOT_CW_CEIL;
12095 case I387_CW_MASK_PM:
12096 /* mask precision exception for nearbyint() */
12097 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12098 slot = SLOT_CW_MASK_PM;
12102 gcc_unreachable ();
12106 gcc_assert (slot < MAX_386_STACK_LOCALS);
12108 new_mode = assign_386_stack_local (HImode, slot);
12109 emit_move_insn (new_mode, reg);
12112 /* Output code for INSN to convert a float to a signed int. OPERANDS
12113 are the insn operands. The output may be [HSD]Imode and the input
12114 operand may be [SDX]Fmode. */
12117 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12119 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12120 int dimode_p = GET_MODE (operands[0]) == DImode;
12121 int round_mode = get_attr_i387_cw (insn);
12123 /* Jump through a hoop or two for DImode, since the hardware has no
12124 non-popping instruction. We used to do this a different way, but
12125 that was somewhat fragile and broke with post-reload splitters. */
12126 if ((dimode_p || fisttp) && !stack_top_dies)
12127 output_asm_insn ("fld\t%y1", operands);
12129 gcc_assert (STACK_TOP_P (operands[1]));
12130 gcc_assert (MEM_P (operands[0]));
12131 gcc_assert (GET_MODE (operands[1]) != TFmode);
12134 output_asm_insn ("fisttp%Z0\t%0", operands);
12137 if (round_mode != I387_CW_ANY)
12138 output_asm_insn ("fldcw\t%3", operands);
12139 if (stack_top_dies || dimode_p)
12140 output_asm_insn ("fistp%Z0\t%0", operands);
12142 output_asm_insn ("fist%Z0\t%0", operands);
12143 if (round_mode != I387_CW_ANY)
12144 output_asm_insn ("fldcw\t%2", operands);
12150 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12151 have the values zero or one, indicates the ffreep insn's operand
12152 from the OPERANDS array. */
12154 static const char *
12155 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12157 if (TARGET_USE_FFREEP)
12158 #if HAVE_AS_IX86_FFREEP
12159 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12162 static char retval[] = ".word\t0xc_df";
12163 int regno = REGNO (operands[opno]);
12165 gcc_assert (FP_REGNO_P (regno));
12167 retval[9] = '0' + (regno - FIRST_STACK_REG);
12172 return opno ? "fstp\t%y1" : "fstp\t%y0";
12176 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12177 should be used. UNORDERED_P is true when fucom should be used. */
12180 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12182 int stack_top_dies;
12183 rtx cmp_op0, cmp_op1;
12184 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12188 cmp_op0 = operands[0];
12189 cmp_op1 = operands[1];
12193 cmp_op0 = operands[1];
12194 cmp_op1 = operands[2];
12199 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12200 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12201 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12202 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12204 if (GET_MODE (operands[0]) == SFmode)
12206 return &ucomiss[TARGET_AVX ? 0 : 1];
12208 return &comiss[TARGET_AVX ? 0 : 1];
12211 return &ucomisd[TARGET_AVX ? 0 : 1];
12213 return &comisd[TARGET_AVX ? 0 : 1];
12216 gcc_assert (STACK_TOP_P (cmp_op0));
12218 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12220 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12222 if (stack_top_dies)
12224 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12225 return output_387_ffreep (operands, 1);
12228 return "ftst\n\tfnstsw\t%0";
12231 if (STACK_REG_P (cmp_op1)
12233 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12234 && REGNO (cmp_op1) != FIRST_STACK_REG)
12236 /* If both the top of the 387 stack dies, and the other operand
12237 is also a stack register that dies, then this must be a
12238 `fcompp' float compare */
12242 /* There is no double popping fcomi variant. Fortunately,
12243 eflags is immune from the fstp's cc clobbering. */
12245 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12247 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12248 return output_387_ffreep (operands, 0);
12253 return "fucompp\n\tfnstsw\t%0";
12255 return "fcompp\n\tfnstsw\t%0";
12260 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12262 static const char * const alt[16] =
12264 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12265 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12266 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12267 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12269 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12270 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12274 "fcomi\t{%y1, %0|%0, %y1}",
12275 "fcomip\t{%y1, %0|%0, %y1}",
12276 "fucomi\t{%y1, %0|%0, %y1}",
12277 "fucomip\t{%y1, %0|%0, %y1}",
12288 mask = eflags_p << 3;
12289 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12290 mask |= unordered_p << 1;
12291 mask |= stack_top_dies;
12293 gcc_assert (mask < 16);
12302 ix86_output_addr_vec_elt (FILE *file, int value)
12304 const char *directive = ASM_LONG;
12308 directive = ASM_QUAD;
12310 gcc_assert (!TARGET_64BIT);
12313 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12317 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12319 const char *directive = ASM_LONG;
12322 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12323 directive = ASM_QUAD;
12325 gcc_assert (!TARGET_64BIT);
12327 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12328 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12329 fprintf (file, "%s%s%d-%s%d\n",
12330 directive, LPREFIX, value, LPREFIX, rel);
12331 else if (HAVE_AS_GOTOFF_IN_DATA)
12332 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12334 else if (TARGET_MACHO)
12336 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12337 machopic_output_function_base_name (file);
12338 fprintf(file, "\n");
12342 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12343 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12346 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12350 ix86_expand_clear (rtx dest)
12354 /* We play register width games, which are only valid after reload. */
12355 gcc_assert (reload_completed);
12357 /* Avoid HImode and its attendant prefix byte. */
12358 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12359 dest = gen_rtx_REG (SImode, REGNO (dest));
12360 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12362 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12363 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12365 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12366 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12372 /* X is an unchanging MEM. If it is a constant pool reference, return
12373 the constant pool rtx, else NULL. */
12376 maybe_get_pool_constant (rtx x)
12378 x = ix86_delegitimize_address (XEXP (x, 0));
12380 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12381 return get_pool_constant (x);
12387 ix86_expand_move (enum machine_mode mode, rtx operands[])
12390 enum tls_model model;
12395 if (GET_CODE (op1) == SYMBOL_REF)
12397 model = SYMBOL_REF_TLS_MODEL (op1);
12400 op1 = legitimize_tls_address (op1, model, true);
12401 op1 = force_operand (op1, op0);
12405 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12406 && SYMBOL_REF_DLLIMPORT_P (op1))
12407 op1 = legitimize_dllimport_symbol (op1, false);
12409 else if (GET_CODE (op1) == CONST
12410 && GET_CODE (XEXP (op1, 0)) == PLUS
12411 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12413 rtx addend = XEXP (XEXP (op1, 0), 1);
12414 rtx symbol = XEXP (XEXP (op1, 0), 0);
12417 model = SYMBOL_REF_TLS_MODEL (symbol);
12419 tmp = legitimize_tls_address (symbol, model, true);
12420 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12421 && SYMBOL_REF_DLLIMPORT_P (symbol))
12422 tmp = legitimize_dllimport_symbol (symbol, true);
12426 tmp = force_operand (tmp, NULL);
12427 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12428 op0, 1, OPTAB_DIRECT);
12434 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12436 if (TARGET_MACHO && !TARGET_64BIT)
12441 rtx temp = ((reload_in_progress
12442 || ((op0 && REG_P (op0))
12444 ? op0 : gen_reg_rtx (Pmode));
12445 op1 = machopic_indirect_data_reference (op1, temp);
12446 op1 = machopic_legitimize_pic_address (op1, mode,
12447 temp == op1 ? 0 : temp);
12449 else if (MACHOPIC_INDIRECT)
12450 op1 = machopic_indirect_data_reference (op1, 0);
12458 op1 = force_reg (Pmode, op1);
12459 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12461 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12462 op1 = legitimize_pic_address (op1, reg);
12471 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12472 || !push_operand (op0, mode))
12474 op1 = force_reg (mode, op1);
12476 if (push_operand (op0, mode)
12477 && ! general_no_elim_operand (op1, mode))
12478 op1 = copy_to_mode_reg (mode, op1);
12480 /* Force large constants in 64bit compilation into register
12481 to get them CSEed. */
12482 if (can_create_pseudo_p ()
12483 && (mode == DImode) && TARGET_64BIT
12484 && immediate_operand (op1, mode)
12485 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12486 && !register_operand (op0, mode)
12488 op1 = copy_to_mode_reg (mode, op1);
12490 if (can_create_pseudo_p ()
12491 && FLOAT_MODE_P (mode)
12492 && GET_CODE (op1) == CONST_DOUBLE)
12494 /* If we are loading a floating point constant to a register,
12495 force the value to memory now, since we'll get better code
12496 out the back end. */
12498 op1 = validize_mem (force_const_mem (mode, op1));
12499 if (!register_operand (op0, mode))
12501 rtx temp = gen_reg_rtx (mode);
12502 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12503 emit_move_insn (op0, temp);
12509 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12513 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12515 rtx op0 = operands[0], op1 = operands[1];
12516 unsigned int align = GET_MODE_ALIGNMENT (mode);
12518 /* Force constants other than zero into memory. We do not know how
12519 the instructions used to build constants modify the upper 64 bits
12520 of the register, once we have that information we may be able
12521 to handle some of them more efficiently. */
12522 if (can_create_pseudo_p ()
12523 && register_operand (op0, mode)
12524 && (CONSTANT_P (op1)
12525 || (GET_CODE (op1) == SUBREG
12526 && CONSTANT_P (SUBREG_REG (op1))))
12527 && standard_sse_constant_p (op1) <= 0)
12528 op1 = validize_mem (force_const_mem (mode, op1));
12530 /* We need to check memory alignment for SSE mode since attribute
12531 can make operands unaligned. */
12532 if (can_create_pseudo_p ()
12533 && SSE_REG_MODE_P (mode)
12534 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12535 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12539 /* ix86_expand_vector_move_misalign() does not like constants ... */
12540 if (CONSTANT_P (op1)
12541 || (GET_CODE (op1) == SUBREG
12542 && CONSTANT_P (SUBREG_REG (op1))))
12543 op1 = validize_mem (force_const_mem (mode, op1));
12545 /* ... nor both arguments in memory. */
12546 if (!register_operand (op0, mode)
12547 && !register_operand (op1, mode))
12548 op1 = force_reg (mode, op1);
12550 tmp[0] = op0; tmp[1] = op1;
12551 ix86_expand_vector_move_misalign (mode, tmp);
12555 /* Make operand1 a register if it isn't already. */
12556 if (can_create_pseudo_p ()
12557 && !register_operand (op0, mode)
12558 && !register_operand (op1, mode))
12560 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12564 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12567 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12568 straight to ix86_expand_vector_move. */
12569 /* Code generation for scalar reg-reg moves of single and double precision data:
12570 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12574 if (x86_sse_partial_reg_dependency == true)
12579 Code generation for scalar loads of double precision data:
12580 if (x86_sse_split_regs == true)
12581 movlpd mem, reg (gas syntax)
12585 Code generation for unaligned packed loads of single precision data
12586 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12587 if (x86_sse_unaligned_move_optimal)
12590 if (x86_sse_partial_reg_dependency == true)
12602 Code generation for unaligned packed loads of double precision data
12603 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12604 if (x86_sse_unaligned_move_optimal)
12607 if (x86_sse_split_regs == true)
12620 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12629 switch (GET_MODE_CLASS (mode))
12631 case MODE_VECTOR_INT:
12633 switch (GET_MODE_SIZE (mode))
12636 op0 = gen_lowpart (V16QImode, op0);
12637 op1 = gen_lowpart (V16QImode, op1);
12638 emit_insn (gen_avx_movdqu (op0, op1));
12641 op0 = gen_lowpart (V32QImode, op0);
12642 op1 = gen_lowpart (V32QImode, op1);
12643 emit_insn (gen_avx_movdqu256 (op0, op1));
12646 gcc_unreachable ();
12649 case MODE_VECTOR_FLOAT:
12650 op0 = gen_lowpart (mode, op0);
12651 op1 = gen_lowpart (mode, op1);
12656 emit_insn (gen_avx_movups (op0, op1));
12659 emit_insn (gen_avx_movups256 (op0, op1));
12662 emit_insn (gen_avx_movupd (op0, op1));
12665 emit_insn (gen_avx_movupd256 (op0, op1));
12668 gcc_unreachable ();
12673 gcc_unreachable ();
12681 /* If we're optimizing for size, movups is the smallest. */
12682 if (optimize_insn_for_size_p ())
12684 op0 = gen_lowpart (V4SFmode, op0);
12685 op1 = gen_lowpart (V4SFmode, op1);
12686 emit_insn (gen_sse_movups (op0, op1));
12690 /* ??? If we have typed data, then it would appear that using
12691 movdqu is the only way to get unaligned data loaded with
12693 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12695 op0 = gen_lowpart (V16QImode, op0);
12696 op1 = gen_lowpart (V16QImode, op1);
12697 emit_insn (gen_sse2_movdqu (op0, op1));
12701 if (TARGET_SSE2 && mode == V2DFmode)
12705 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12707 op0 = gen_lowpart (V2DFmode, op0);
12708 op1 = gen_lowpart (V2DFmode, op1);
12709 emit_insn (gen_sse2_movupd (op0, op1));
12713 /* When SSE registers are split into halves, we can avoid
12714 writing to the top half twice. */
12715 if (TARGET_SSE_SPLIT_REGS)
12717 emit_clobber (op0);
12722 /* ??? Not sure about the best option for the Intel chips.
12723 The following would seem to satisfy; the register is
12724 entirely cleared, breaking the dependency chain. We
12725 then store to the upper half, with a dependency depth
12726 of one. A rumor has it that Intel recommends two movsd
12727 followed by an unpacklpd, but this is unconfirmed. And
12728 given that the dependency depth of the unpacklpd would
12729 still be one, I'm not sure why this would be better. */
12730 zero = CONST0_RTX (V2DFmode);
12733 m = adjust_address (op1, DFmode, 0);
12734 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12735 m = adjust_address (op1, DFmode, 8);
12736 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12740 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12742 op0 = gen_lowpart (V4SFmode, op0);
12743 op1 = gen_lowpart (V4SFmode, op1);
12744 emit_insn (gen_sse_movups (op0, op1));
12748 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12749 emit_move_insn (op0, CONST0_RTX (mode));
12751 emit_clobber (op0);
12753 if (mode != V4SFmode)
12754 op0 = gen_lowpart (V4SFmode, op0);
12755 m = adjust_address (op1, V2SFmode, 0);
12756 emit_insn (gen_sse_loadlps (op0, op0, m));
12757 m = adjust_address (op1, V2SFmode, 8);
12758 emit_insn (gen_sse_loadhps (op0, op0, m));
12761 else if (MEM_P (op0))
12763 /* If we're optimizing for size, movups is the smallest. */
12764 if (optimize_insn_for_size_p ())
12766 op0 = gen_lowpart (V4SFmode, op0);
12767 op1 = gen_lowpart (V4SFmode, op1);
12768 emit_insn (gen_sse_movups (op0, op1));
12772 /* ??? Similar to above, only less clear because of quote
12773 typeless stores unquote. */
12774 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12775 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12777 op0 = gen_lowpart (V16QImode, op0);
12778 op1 = gen_lowpart (V16QImode, op1);
12779 emit_insn (gen_sse2_movdqu (op0, op1));
12783 if (TARGET_SSE2 && mode == V2DFmode)
12785 m = adjust_address (op0, DFmode, 0);
12786 emit_insn (gen_sse2_storelpd (m, op1));
12787 m = adjust_address (op0, DFmode, 8);
12788 emit_insn (gen_sse2_storehpd (m, op1));
12792 if (mode != V4SFmode)
12793 op1 = gen_lowpart (V4SFmode, op1);
12794 m = adjust_address (op0, V2SFmode, 0);
12795 emit_insn (gen_sse_storelps (m, op1));
12796 m = adjust_address (op0, V2SFmode, 8);
12797 emit_insn (gen_sse_storehps (m, op1));
12801 gcc_unreachable ();
12804 /* Expand a push in MODE. This is some mode for which we do not support
12805 proper push instructions, at least from the registers that we expect
12806 the value to live in. */
12809 ix86_expand_push (enum machine_mode mode, rtx x)
12813 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12814 GEN_INT (-GET_MODE_SIZE (mode)),
12815 stack_pointer_rtx, 1, OPTAB_DIRECT);
12816 if (tmp != stack_pointer_rtx)
12817 emit_move_insn (stack_pointer_rtx, tmp);
12819 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12821 /* When we push an operand onto stack, it has to be aligned at least
12822 at the function argument boundary. However since we don't have
12823 the argument type, we can't determine the actual argument
12825 emit_move_insn (tmp, x);
12828 /* Helper function of ix86_fixup_binary_operands to canonicalize
12829 operand order. Returns true if the operands should be swapped. */
12832 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12835 rtx dst = operands[0];
12836 rtx src1 = operands[1];
12837 rtx src2 = operands[2];
12839 /* If the operation is not commutative, we can't do anything. */
12840 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12843 /* Highest priority is that src1 should match dst. */
12844 if (rtx_equal_p (dst, src1))
12846 if (rtx_equal_p (dst, src2))
12849 /* Next highest priority is that immediate constants come second. */
12850 if (immediate_operand (src2, mode))
12852 if (immediate_operand (src1, mode))
12855 /* Lowest priority is that memory references should come second. */
12865 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12866 destination to use for the operation. If different from the true
12867 destination in operands[0], a copy operation will be required. */
12870 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12873 rtx dst = operands[0];
12874 rtx src1 = operands[1];
12875 rtx src2 = operands[2];
12877 /* Canonicalize operand order. */
12878 if (ix86_swap_binary_operands_p (code, mode, operands))
12882 /* It is invalid to swap operands of different modes. */
12883 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12890 /* Both source operands cannot be in memory. */
12891 if (MEM_P (src1) && MEM_P (src2))
12893 /* Optimization: Only read from memory once. */
12894 if (rtx_equal_p (src1, src2))
12896 src2 = force_reg (mode, src2);
12900 src2 = force_reg (mode, src2);
12903 /* If the destination is memory, and we do not have matching source
12904 operands, do things in registers. */
12905 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12906 dst = gen_reg_rtx (mode);
12908 /* Source 1 cannot be a constant. */
12909 if (CONSTANT_P (src1))
12910 src1 = force_reg (mode, src1);
12912 /* Source 1 cannot be a non-matching memory. */
12913 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12914 src1 = force_reg (mode, src1);
12916 operands[1] = src1;
12917 operands[2] = src2;
12921 /* Similarly, but assume that the destination has already been
12922 set up properly. */
12925 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12926 enum machine_mode mode, rtx operands[])
12928 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12929 gcc_assert (dst == operands[0]);
12932 /* Attempt to expand a binary operator. Make the expansion closer to the
12933 actual machine, then just general_operand, which will allow 3 separate
12934 memory references (one output, two input) in a single insn. */
12937 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12940 rtx src1, src2, dst, op, clob;
12942 dst = ix86_fixup_binary_operands (code, mode, operands);
12943 src1 = operands[1];
12944 src2 = operands[2];
12946 /* Emit the instruction. */
12948 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12949 if (reload_in_progress)
12951 /* Reload doesn't know about the flags register, and doesn't know that
12952 it doesn't want to clobber it. We can only do this with PLUS. */
12953 gcc_assert (code == PLUS);
12958 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12959 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12962 /* Fix up the destination if needed. */
12963 if (dst != operands[0])
12964 emit_move_insn (operands[0], dst);
12967 /* Return TRUE or FALSE depending on whether the binary operator meets the
12968 appropriate constraints. */
12971 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12974 rtx dst = operands[0];
12975 rtx src1 = operands[1];
12976 rtx src2 = operands[2];
12978 /* Both source operands cannot be in memory. */
12979 if (MEM_P (src1) && MEM_P (src2))
12982 /* Canonicalize operand order for commutative operators. */
12983 if (ix86_swap_binary_operands_p (code, mode, operands))
12990 /* If the destination is memory, we must have a matching source operand. */
12991 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12994 /* Source 1 cannot be a constant. */
12995 if (CONSTANT_P (src1))
12998 /* Source 1 cannot be a non-matching memory. */
12999 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13005 /* Attempt to expand a unary operator. Make the expansion closer to the
13006 actual machine, then just general_operand, which will allow 2 separate
13007 memory references (one output, one input) in a single insn. */
13010 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13013 int matching_memory;
13014 rtx src, dst, op, clob;
13019 /* If the destination is memory, and we do not have matching source
13020 operands, do things in registers. */
13021 matching_memory = 0;
13024 if (rtx_equal_p (dst, src))
13025 matching_memory = 1;
13027 dst = gen_reg_rtx (mode);
13030 /* When source operand is memory, destination must match. */
13031 if (MEM_P (src) && !matching_memory)
13032 src = force_reg (mode, src);
13034 /* Emit the instruction. */
13036 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13037 if (reload_in_progress || code == NOT)
13039 /* Reload doesn't know about the flags register, and doesn't know that
13040 it doesn't want to clobber it. */
13041 gcc_assert (code == NOT);
13046 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13047 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13050 /* Fix up the destination if needed. */
13051 if (dst != operands[0])
13052 emit_move_insn (operands[0], dst);
13055 #define LEA_SEARCH_THRESHOLD 12
13057 /* Search backward for non-agu definition of register number REGNO1
13058 or register number REGNO2 in INSN's basic block until
13059 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13060 2. Reach BB boundary, or
13061 3. Reach agu definition.
13062 Returns the distance between the non-agu definition point and INSN.
13063 If no definition point, returns -1. */
13066 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13069 basic_block bb = BLOCK_FOR_INSN (insn);
13072 enum attr_type insn_type;
13074 if (insn != BB_HEAD (bb))
13076 rtx prev = PREV_INSN (insn);
13077 while (prev && distance < LEA_SEARCH_THRESHOLD)
13082 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13083 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13084 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13085 && (regno1 == DF_REF_REGNO (*def_rec)
13086 || regno2 == DF_REF_REGNO (*def_rec)))
13088 insn_type = get_attr_type (prev);
13089 if (insn_type != TYPE_LEA)
13093 if (prev == BB_HEAD (bb))
13095 prev = PREV_INSN (prev);
13099 if (distance < LEA_SEARCH_THRESHOLD)
13103 bool simple_loop = false;
13105 FOR_EACH_EDGE (e, ei, bb->preds)
13108 simple_loop = true;
13114 rtx prev = BB_END (bb);
13117 && distance < LEA_SEARCH_THRESHOLD)
13122 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13123 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13124 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13125 && (regno1 == DF_REF_REGNO (*def_rec)
13126 || regno2 == DF_REF_REGNO (*def_rec)))
13128 insn_type = get_attr_type (prev);
13129 if (insn_type != TYPE_LEA)
13133 prev = PREV_INSN (prev);
13141 /* get_attr_type may modify recog data. We want to make sure
13142 that recog data is valid for instruction INSN, on which
13143 distance_non_agu_define is called. INSN is unchanged here. */
13144 extract_insn_cached (insn);
13148 /* Return the distance between INSN and the next insn that uses
13149 register number REGNO0 in memory address. Return -1 if no such
13150 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13153 distance_agu_use (unsigned int regno0, rtx insn)
13155 basic_block bb = BLOCK_FOR_INSN (insn);
13160 if (insn != BB_END (bb))
13162 rtx next = NEXT_INSN (insn);
13163 while (next && distance < LEA_SEARCH_THRESHOLD)
13169 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13170 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13171 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13172 && regno0 == DF_REF_REGNO (*use_rec))
13174 /* Return DISTANCE if OP0 is used in memory
13175 address in NEXT. */
13179 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13180 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13181 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13182 && regno0 == DF_REF_REGNO (*def_rec))
13184 /* Return -1 if OP0 is set in NEXT. */
13188 if (next == BB_END (bb))
13190 next = NEXT_INSN (next);
13194 if (distance < LEA_SEARCH_THRESHOLD)
13198 bool simple_loop = false;
13200 FOR_EACH_EDGE (e, ei, bb->succs)
13203 simple_loop = true;
13209 rtx next = BB_HEAD (bb);
13212 && distance < LEA_SEARCH_THRESHOLD)
13218 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13219 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13220 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13221 && regno0 == DF_REF_REGNO (*use_rec))
13223 /* Return DISTANCE if OP0 is used in memory
13224 address in NEXT. */
13228 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13229 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13230 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13231 && regno0 == DF_REF_REGNO (*def_rec))
13233 /* Return -1 if OP0 is set in NEXT. */
13238 next = NEXT_INSN (next);
13246 /* Define this macro to tune LEA priority vs ADD, it take effect when
13247 there is a dilemma of choicing LEA or ADD
13248 Negative value: ADD is more preferred than LEA
13250 Positive value: LEA is more preferred than ADD*/
13251 #define IX86_LEA_PRIORITY 2
13253 /* Return true if it is ok to optimize an ADD operation to LEA
13254 operation to avoid flag register consumation. For the processors
13255 like ATOM, if the destination register of LEA holds an actual
13256 address which will be used soon, LEA is better and otherwise ADD
13260 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13261 rtx insn, rtx operands[])
13263 unsigned int regno0 = true_regnum (operands[0]);
13264 unsigned int regno1 = true_regnum (operands[1]);
13265 unsigned int regno2;
13267 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13268 return regno0 != regno1;
13270 regno2 = true_regnum (operands[2]);
13272 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13273 if (regno0 != regno1 && regno0 != regno2)
13277 int dist_define, dist_use;
13278 dist_define = distance_non_agu_define (regno1, regno2, insn);
13279 if (dist_define <= 0)
13282 /* If this insn has both backward non-agu dependence and forward
13283 agu dependence, the one with short distance take effect. */
13284 dist_use = distance_agu_use (regno0, insn);
13286 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13293 /* Return true if destination reg of SET_BODY is shift count of
13297 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13303 /* Retrieve destination of SET_BODY. */
13304 switch (GET_CODE (set_body))
13307 set_dest = SET_DEST (set_body);
13308 if (!set_dest || !REG_P (set_dest))
13312 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13313 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13321 /* Retrieve shift count of USE_BODY. */
13322 switch (GET_CODE (use_body))
13325 shift_rtx = XEXP (use_body, 1);
13328 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13329 if (ix86_dep_by_shift_count_body (set_body,
13330 XVECEXP (use_body, 0, i)))
13338 && (GET_CODE (shift_rtx) == ASHIFT
13339 || GET_CODE (shift_rtx) == LSHIFTRT
13340 || GET_CODE (shift_rtx) == ASHIFTRT
13341 || GET_CODE (shift_rtx) == ROTATE
13342 || GET_CODE (shift_rtx) == ROTATERT))
13344 rtx shift_count = XEXP (shift_rtx, 1);
13346 /* Return true if shift count is dest of SET_BODY. */
13347 if (REG_P (shift_count)
13348 && true_regnum (set_dest) == true_regnum (shift_count))
13355 /* Return true if destination reg of SET_INSN is shift count of
13359 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13361 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13362 PATTERN (use_insn));
13365 /* Return TRUE or FALSE depending on whether the unary operator meets the
13366 appropriate constraints. */
13369 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13370 enum machine_mode mode ATTRIBUTE_UNUSED,
13371 rtx operands[2] ATTRIBUTE_UNUSED)
13373 /* If one of operands is memory, source and destination must match. */
13374 if ((MEM_P (operands[0])
13375 || MEM_P (operands[1]))
13376 && ! rtx_equal_p (operands[0], operands[1]))
13381 /* Post-reload splitter for converting an SF or DFmode value in an
13382 SSE register into an unsigned SImode. */
13385 ix86_split_convert_uns_si_sse (rtx operands[])
13387 enum machine_mode vecmode;
13388 rtx value, large, zero_or_two31, input, two31, x;
13390 large = operands[1];
13391 zero_or_two31 = operands[2];
13392 input = operands[3];
13393 two31 = operands[4];
13394 vecmode = GET_MODE (large);
13395 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13397 /* Load up the value into the low element. We must ensure that the other
13398 elements are valid floats -- zero is the easiest such value. */
13401 if (vecmode == V4SFmode)
13402 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13404 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13408 input = gen_rtx_REG (vecmode, REGNO (input));
13409 emit_move_insn (value, CONST0_RTX (vecmode));
13410 if (vecmode == V4SFmode)
13411 emit_insn (gen_sse_movss (value, value, input));
13413 emit_insn (gen_sse2_movsd (value, value, input));
13416 emit_move_insn (large, two31);
13417 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13419 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13420 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13422 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13423 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13425 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13426 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13428 large = gen_rtx_REG (V4SImode, REGNO (large));
13429 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13431 x = gen_rtx_REG (V4SImode, REGNO (value));
13432 if (vecmode == V4SFmode)
13433 emit_insn (gen_sse2_cvttps2dq (x, value));
13435 emit_insn (gen_sse2_cvttpd2dq (x, value));
13438 emit_insn (gen_xorv4si3 (value, value, large));
13441 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13442 Expects the 64-bit DImode to be supplied in a pair of integral
13443 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13444 -mfpmath=sse, !optimize_size only. */
13447 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13449 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13450 rtx int_xmm, fp_xmm;
13451 rtx biases, exponents;
13454 int_xmm = gen_reg_rtx (V4SImode);
13455 if (TARGET_INTER_UNIT_MOVES)
13456 emit_insn (gen_movdi_to_sse (int_xmm, input));
13457 else if (TARGET_SSE_SPLIT_REGS)
13459 emit_clobber (int_xmm);
13460 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13464 x = gen_reg_rtx (V2DImode);
13465 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13466 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13469 x = gen_rtx_CONST_VECTOR (V4SImode,
13470 gen_rtvec (4, GEN_INT (0x43300000UL),
13471 GEN_INT (0x45300000UL),
13472 const0_rtx, const0_rtx));
13473 exponents = validize_mem (force_const_mem (V4SImode, x));
13475 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13476 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13478 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13479 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13480 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13481 (0x1.0p84 + double(fp_value_hi_xmm)).
13482 Note these exponents differ by 32. */
13484 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13486 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13487 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13488 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13489 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13490 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13491 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13492 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13493 biases = validize_mem (force_const_mem (V2DFmode, biases));
13494 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13496 /* Add the upper and lower DFmode values together. */
13498 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13501 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13502 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13503 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13506 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13509 /* Not used, but eases macroization of patterns. */
13511 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13512 rtx input ATTRIBUTE_UNUSED)
13514 gcc_unreachable ();
13517 /* Convert an unsigned SImode value into a DFmode. Only currently used
13518 for SSE, but applicable anywhere. */
13521 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13523 REAL_VALUE_TYPE TWO31r;
13526 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13527 NULL, 1, OPTAB_DIRECT);
13529 fp = gen_reg_rtx (DFmode);
13530 emit_insn (gen_floatsidf2 (fp, x));
13532 real_ldexp (&TWO31r, &dconst1, 31);
13533 x = const_double_from_real_value (TWO31r, DFmode);
13535 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13537 emit_move_insn (target, x);
13540 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13541 32-bit mode; otherwise we have a direct convert instruction. */
13544 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13546 REAL_VALUE_TYPE TWO32r;
13547 rtx fp_lo, fp_hi, x;
13549 fp_lo = gen_reg_rtx (DFmode);
13550 fp_hi = gen_reg_rtx (DFmode);
13552 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13554 real_ldexp (&TWO32r, &dconst1, 32);
13555 x = const_double_from_real_value (TWO32r, DFmode);
13556 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13558 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13560 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13563 emit_move_insn (target, x);
13566 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13567 For x86_32, -mfpmath=sse, !optimize_size only. */
13569 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13571 REAL_VALUE_TYPE ONE16r;
13572 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13574 real_ldexp (&ONE16r, &dconst1, 16);
13575 x = const_double_from_real_value (ONE16r, SFmode);
13576 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13577 NULL, 0, OPTAB_DIRECT);
13578 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13579 NULL, 0, OPTAB_DIRECT);
13580 fp_hi = gen_reg_rtx (SFmode);
13581 fp_lo = gen_reg_rtx (SFmode);
13582 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13583 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13584 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13586 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13588 if (!rtx_equal_p (target, fp_hi))
13589 emit_move_insn (target, fp_hi);
13592 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13593 then replicate the value for all elements of the vector
13597 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13604 v = gen_rtvec (4, value, value, value, value);
13605 return gen_rtx_CONST_VECTOR (V4SImode, v);
13609 v = gen_rtvec (2, value, value);
13610 return gen_rtx_CONST_VECTOR (V2DImode, v);
13614 v = gen_rtvec (4, value, value, value, value);
13616 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13617 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13618 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13622 v = gen_rtvec (2, value, value);
13624 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13625 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13628 gcc_unreachable ();
13632 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13633 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13634 for an SSE register. If VECT is true, then replicate the mask for
13635 all elements of the vector register. If INVERT is true, then create
13636 a mask excluding the sign bit. */
13639 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13641 enum machine_mode vec_mode, imode;
13642 HOST_WIDE_INT hi, lo;
13647 /* Find the sign bit, sign extended to 2*HWI. */
13653 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13654 lo = 0x80000000, hi = lo < 0;
13660 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13661 if (HOST_BITS_PER_WIDE_INT >= 64)
13662 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13664 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13669 vec_mode = VOIDmode;
13670 if (HOST_BITS_PER_WIDE_INT >= 64)
13673 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13680 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13684 lo = ~lo, hi = ~hi;
13690 mask = immed_double_const (lo, hi, imode);
13692 vec = gen_rtvec (2, v, mask);
13693 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13694 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13701 gcc_unreachable ();
13705 lo = ~lo, hi = ~hi;
13707 /* Force this value into the low part of a fp vector constant. */
13708 mask = immed_double_const (lo, hi, imode);
13709 mask = gen_lowpart (mode, mask);
13711 if (vec_mode == VOIDmode)
13712 return force_reg (mode, mask);
13714 v = ix86_build_const_vector (mode, vect, mask);
13715 return force_reg (vec_mode, v);
13718 /* Generate code for floating point ABS or NEG. */
13721 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13724 rtx mask, set, use, clob, dst, src;
13725 bool use_sse = false;
13726 bool vector_mode = VECTOR_MODE_P (mode);
13727 enum machine_mode elt_mode = mode;
13731 elt_mode = GET_MODE_INNER (mode);
13734 else if (mode == TFmode)
13736 else if (TARGET_SSE_MATH)
13737 use_sse = SSE_FLOAT_MODE_P (mode);
13739 /* NEG and ABS performed with SSE use bitwise mask operations.
13740 Create the appropriate mask now. */
13742 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13751 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13752 set = gen_rtx_SET (VOIDmode, dst, set);
13757 set = gen_rtx_fmt_e (code, mode, src);
13758 set = gen_rtx_SET (VOIDmode, dst, set);
13761 use = gen_rtx_USE (VOIDmode, mask);
13762 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13763 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13764 gen_rtvec (3, set, use, clob)));
13771 /* Expand a copysign operation. Special case operand 0 being a constant. */
13774 ix86_expand_copysign (rtx operands[])
13776 enum machine_mode mode;
13777 rtx dest, op0, op1, mask, nmask;
13779 dest = operands[0];
13783 mode = GET_MODE (dest);
13785 if (GET_CODE (op0) == CONST_DOUBLE)
13787 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13789 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13790 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13792 if (mode == SFmode || mode == DFmode)
13794 enum machine_mode vmode;
13796 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13798 if (op0 == CONST0_RTX (mode))
13799 op0 = CONST0_RTX (vmode);
13804 if (mode == SFmode)
13805 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13806 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13808 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13810 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13813 else if (op0 != CONST0_RTX (mode))
13814 op0 = force_reg (mode, op0);
13816 mask = ix86_build_signbit_mask (mode, 0, 0);
13818 if (mode == SFmode)
13819 copysign_insn = gen_copysignsf3_const;
13820 else if (mode == DFmode)
13821 copysign_insn = gen_copysigndf3_const;
13823 copysign_insn = gen_copysigntf3_const;
13825 emit_insn (copysign_insn (dest, op0, op1, mask));
13829 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13831 nmask = ix86_build_signbit_mask (mode, 0, 1);
13832 mask = ix86_build_signbit_mask (mode, 0, 0);
13834 if (mode == SFmode)
13835 copysign_insn = gen_copysignsf3_var;
13836 else if (mode == DFmode)
13837 copysign_insn = gen_copysigndf3_var;
13839 copysign_insn = gen_copysigntf3_var;
13841 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13845 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13846 be a constant, and so has already been expanded into a vector constant. */
13849 ix86_split_copysign_const (rtx operands[])
13851 enum machine_mode mode, vmode;
13852 rtx dest, op0, op1, mask, x;
13854 dest = operands[0];
13857 mask = operands[3];
13859 mode = GET_MODE (dest);
13860 vmode = GET_MODE (mask);
13862 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13863 x = gen_rtx_AND (vmode, dest, mask);
13864 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13866 if (op0 != CONST0_RTX (vmode))
13868 x = gen_rtx_IOR (vmode, dest, op0);
13869 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13873 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13874 so we have to do two masks. */
13877 ix86_split_copysign_var (rtx operands[])
13879 enum machine_mode mode, vmode;
13880 rtx dest, scratch, op0, op1, mask, nmask, x;
13882 dest = operands[0];
13883 scratch = operands[1];
13886 nmask = operands[4];
13887 mask = operands[5];
13889 mode = GET_MODE (dest);
13890 vmode = GET_MODE (mask);
13892 if (rtx_equal_p (op0, op1))
13894 /* Shouldn't happen often (it's useless, obviously), but when it does
13895 we'd generate incorrect code if we continue below. */
13896 emit_move_insn (dest, op0);
13900 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13902 gcc_assert (REGNO (op1) == REGNO (scratch));
13904 x = gen_rtx_AND (vmode, scratch, mask);
13905 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13908 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13909 x = gen_rtx_NOT (vmode, dest);
13910 x = gen_rtx_AND (vmode, x, op0);
13911 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13915 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13917 x = gen_rtx_AND (vmode, scratch, mask);
13919 else /* alternative 2,4 */
13921 gcc_assert (REGNO (mask) == REGNO (scratch));
13922 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13923 x = gen_rtx_AND (vmode, scratch, op1);
13925 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13927 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13929 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13930 x = gen_rtx_AND (vmode, dest, nmask);
13932 else /* alternative 3,4 */
13934 gcc_assert (REGNO (nmask) == REGNO (dest));
13936 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13937 x = gen_rtx_AND (vmode, dest, op0);
13939 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13942 x = gen_rtx_IOR (vmode, dest, scratch);
13943 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13946 /* Return TRUE or FALSE depending on whether the first SET in INSN
13947 has source and destination with matching CC modes, and that the
13948 CC mode is at least as constrained as REQ_MODE. */
13951 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13954 enum machine_mode set_mode;
13956 set = PATTERN (insn);
13957 if (GET_CODE (set) == PARALLEL)
13958 set = XVECEXP (set, 0, 0);
13959 gcc_assert (GET_CODE (set) == SET);
13960 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13962 set_mode = GET_MODE (SET_DEST (set));
13966 if (req_mode != CCNOmode
13967 && (req_mode != CCmode
13968 || XEXP (SET_SRC (set), 1) != const0_rtx))
13972 if (req_mode == CCGCmode)
13976 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13980 if (req_mode == CCZmode)
13991 gcc_unreachable ();
13994 return (GET_MODE (SET_SRC (set)) == set_mode);
13997 /* Generate insn patterns to do an integer compare of OPERANDS. */
14000 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14002 enum machine_mode cmpmode;
14005 cmpmode = SELECT_CC_MODE (code, op0, op1);
14006 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14008 /* This is very simple, but making the interface the same as in the
14009 FP case makes the rest of the code easier. */
14010 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14011 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14013 /* Return the test that should be put into the flags user, i.e.
14014 the bcc, scc, or cmov instruction. */
14015 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14018 /* Figure out whether to use ordered or unordered fp comparisons.
14019 Return the appropriate mode to use. */
14022 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14024 /* ??? In order to make all comparisons reversible, we do all comparisons
14025 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14026 all forms trapping and nontrapping comparisons, we can make inequality
14027 comparisons trapping again, since it results in better code when using
14028 FCOM based compares. */
14029 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14033 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14035 enum machine_mode mode = GET_MODE (op0);
14037 if (SCALAR_FLOAT_MODE_P (mode))
14039 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14040 return ix86_fp_compare_mode (code);
14045 /* Only zero flag is needed. */
14046 case EQ: /* ZF=0 */
14047 case NE: /* ZF!=0 */
14049 /* Codes needing carry flag. */
14050 case GEU: /* CF=0 */
14051 case LTU: /* CF=1 */
14052 /* Detect overflow checks. They need just the carry flag. */
14053 if (GET_CODE (op0) == PLUS
14054 && rtx_equal_p (op1, XEXP (op0, 0)))
14058 case GTU: /* CF=0 & ZF=0 */
14059 case LEU: /* CF=1 | ZF=1 */
14060 /* Detect overflow checks. They need just the carry flag. */
14061 if (GET_CODE (op0) == MINUS
14062 && rtx_equal_p (op1, XEXP (op0, 0)))
14066 /* Codes possibly doable only with sign flag when
14067 comparing against zero. */
14068 case GE: /* SF=OF or SF=0 */
14069 case LT: /* SF<>OF or SF=1 */
14070 if (op1 == const0_rtx)
14073 /* For other cases Carry flag is not required. */
14075 /* Codes doable only with sign flag when comparing
14076 against zero, but we miss jump instruction for it
14077 so we need to use relational tests against overflow
14078 that thus needs to be zero. */
14079 case GT: /* ZF=0 & SF=OF */
14080 case LE: /* ZF=1 | SF<>OF */
14081 if (op1 == const0_rtx)
14085 /* strcmp pattern do (use flags) and combine may ask us for proper
14090 gcc_unreachable ();
14094 /* Return the fixed registers used for condition codes. */
14097 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14104 /* If two condition code modes are compatible, return a condition code
14105 mode which is compatible with both. Otherwise, return
14108 static enum machine_mode
14109 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14114 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14117 if ((m1 == CCGCmode && m2 == CCGOCmode)
14118 || (m1 == CCGOCmode && m2 == CCGCmode))
14124 gcc_unreachable ();
14154 /* These are only compatible with themselves, which we already
14160 /* Split comparison code CODE into comparisons we can do using branch
14161 instructions. BYPASS_CODE is comparison code for branch that will
14162 branch around FIRST_CODE and SECOND_CODE. If some of branches
14163 is not required, set value to UNKNOWN.
14164 We never require more than two branches. */
14167 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14168 enum rtx_code *first_code,
14169 enum rtx_code *second_code)
14171 *first_code = code;
14172 *bypass_code = UNKNOWN;
14173 *second_code = UNKNOWN;
14175 /* The fcomi comparison sets flags as follows:
14185 case GT: /* GTU - CF=0 & ZF=0 */
14186 case GE: /* GEU - CF=0 */
14187 case ORDERED: /* PF=0 */
14188 case UNORDERED: /* PF=1 */
14189 case UNEQ: /* EQ - ZF=1 */
14190 case UNLT: /* LTU - CF=1 */
14191 case UNLE: /* LEU - CF=1 | ZF=1 */
14192 case LTGT: /* EQ - ZF=0 */
14194 case LT: /* LTU - CF=1 - fails on unordered */
14195 *first_code = UNLT;
14196 *bypass_code = UNORDERED;
14198 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14199 *first_code = UNLE;
14200 *bypass_code = UNORDERED;
14202 case EQ: /* EQ - ZF=1 - fails on unordered */
14203 *first_code = UNEQ;
14204 *bypass_code = UNORDERED;
14206 case NE: /* NE - ZF=0 - fails on unordered */
14207 *first_code = LTGT;
14208 *second_code = UNORDERED;
14210 case UNGE: /* GEU - CF=0 - fails on unordered */
14212 *second_code = UNORDERED;
14214 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14216 *second_code = UNORDERED;
14219 gcc_unreachable ();
14221 if (!TARGET_IEEE_FP)
14223 *second_code = UNKNOWN;
14224 *bypass_code = UNKNOWN;
14228 /* Return cost of comparison done fcom + arithmetics operations on AX.
14229 All following functions do use number of instructions as a cost metrics.
14230 In future this should be tweaked to compute bytes for optimize_size and
14231 take into account performance of various instructions on various CPUs. */
14233 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14235 if (!TARGET_IEEE_FP)
14237 /* The cost of code output by ix86_expand_fp_compare. */
14261 gcc_unreachable ();
14265 /* Return cost of comparison done using fcomi operation.
14266 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14268 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14270 enum rtx_code bypass_code, first_code, second_code;
14271 /* Return arbitrarily high cost when instruction is not supported - this
14272 prevents gcc from using it. */
14275 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14276 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14279 /* Return cost of comparison done using sahf operation.
14280 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14282 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14284 enum rtx_code bypass_code, first_code, second_code;
14285 /* Return arbitrarily high cost when instruction is not preferred - this
14286 avoids gcc from using it. */
14287 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14289 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14290 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14293 /* Compute cost of the comparison done using any method.
14294 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14296 ix86_fp_comparison_cost (enum rtx_code code)
14298 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14301 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14302 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14304 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14305 if (min > sahf_cost)
14307 if (min > fcomi_cost)
14312 /* Return true if we should use an FCOMI instruction for this
14316 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14318 enum rtx_code swapped_code = swap_condition (code);
14320 return ((ix86_fp_comparison_cost (code)
14321 == ix86_fp_comparison_fcomi_cost (code))
14322 || (ix86_fp_comparison_cost (swapped_code)
14323 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14326 /* Swap, force into registers, or otherwise massage the two operands
14327 to a fp comparison. The operands are updated in place; the new
14328 comparison code is returned. */
14330 static enum rtx_code
14331 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14333 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14334 rtx op0 = *pop0, op1 = *pop1;
14335 enum machine_mode op_mode = GET_MODE (op0);
14336 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14338 /* All of the unordered compare instructions only work on registers.
14339 The same is true of the fcomi compare instructions. The XFmode
14340 compare instructions require registers except when comparing
14341 against zero or when converting operand 1 from fixed point to
14345 && (fpcmp_mode == CCFPUmode
14346 || (op_mode == XFmode
14347 && ! (standard_80387_constant_p (op0) == 1
14348 || standard_80387_constant_p (op1) == 1)
14349 && GET_CODE (op1) != FLOAT)
14350 || ix86_use_fcomi_compare (code)))
14352 op0 = force_reg (op_mode, op0);
14353 op1 = force_reg (op_mode, op1);
14357 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14358 things around if they appear profitable, otherwise force op0
14359 into a register. */
14361 if (standard_80387_constant_p (op0) == 0
14363 && ! (standard_80387_constant_p (op1) == 0
14367 tmp = op0, op0 = op1, op1 = tmp;
14368 code = swap_condition (code);
14372 op0 = force_reg (op_mode, op0);
14374 if (CONSTANT_P (op1))
14376 int tmp = standard_80387_constant_p (op1);
14378 op1 = validize_mem (force_const_mem (op_mode, op1));
14382 op1 = force_reg (op_mode, op1);
14385 op1 = force_reg (op_mode, op1);
14389 /* Try to rearrange the comparison to make it cheaper. */
14390 if (ix86_fp_comparison_cost (code)
14391 > ix86_fp_comparison_cost (swap_condition (code))
14392 && (REG_P (op1) || can_create_pseudo_p ()))
14395 tmp = op0, op0 = op1, op1 = tmp;
14396 code = swap_condition (code);
14398 op0 = force_reg (op_mode, op0);
14406 /* Convert comparison codes we use to represent FP comparison to integer
14407 code that will result in proper branch. Return UNKNOWN if no such code
14411 ix86_fp_compare_code_to_integer (enum rtx_code code)
14440 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14443 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14444 rtx *second_test, rtx *bypass_test)
14446 enum machine_mode fpcmp_mode, intcmp_mode;
14448 int cost = ix86_fp_comparison_cost (code);
14449 enum rtx_code bypass_code, first_code, second_code;
14451 fpcmp_mode = ix86_fp_compare_mode (code);
14452 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14455 *second_test = NULL_RTX;
14457 *bypass_test = NULL_RTX;
14459 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14461 /* Do fcomi/sahf based test when profitable. */
14462 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14463 && (bypass_code == UNKNOWN || bypass_test)
14464 && (second_code == UNKNOWN || second_test))
14466 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14467 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14473 gcc_assert (TARGET_SAHF);
14476 scratch = gen_reg_rtx (HImode);
14477 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14479 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14482 /* The FP codes work out to act like unsigned. */
14483 intcmp_mode = fpcmp_mode;
14485 if (bypass_code != UNKNOWN)
14486 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14487 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14489 if (second_code != UNKNOWN)
14490 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14491 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14496 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14497 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14498 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14500 scratch = gen_reg_rtx (HImode);
14501 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14503 /* In the unordered case, we have to check C2 for NaN's, which
14504 doesn't happen to work out to anything nice combination-wise.
14505 So do some bit twiddling on the value we've got in AH to come
14506 up with an appropriate set of condition codes. */
14508 intcmp_mode = CCNOmode;
14513 if (code == GT || !TARGET_IEEE_FP)
14515 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14520 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14521 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14522 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14523 intcmp_mode = CCmode;
14529 if (code == LT && TARGET_IEEE_FP)
14531 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14532 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14533 intcmp_mode = CCmode;
14538 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14544 if (code == GE || !TARGET_IEEE_FP)
14546 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14551 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14552 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14559 if (code == LE && TARGET_IEEE_FP)
14561 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14562 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14563 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14564 intcmp_mode = CCmode;
14569 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14575 if (code == EQ && TARGET_IEEE_FP)
14577 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14578 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14579 intcmp_mode = CCmode;
14584 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14591 if (code == NE && TARGET_IEEE_FP)
14593 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14594 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14600 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14606 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14610 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14615 gcc_unreachable ();
14619 /* Return the test that should be put into the flags user, i.e.
14620 the bcc, scc, or cmov instruction. */
14621 return gen_rtx_fmt_ee (code, VOIDmode,
14622 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14627 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14630 op0 = ix86_compare_op0;
14631 op1 = ix86_compare_op1;
14634 *second_test = NULL_RTX;
14636 *bypass_test = NULL_RTX;
14638 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14639 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14641 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14643 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14644 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14645 second_test, bypass_test);
14648 ret = ix86_expand_int_compare (code, op0, op1);
14653 /* Return true if the CODE will result in nontrivial jump sequence. */
14655 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14657 enum rtx_code bypass_code, first_code, second_code;
14660 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14661 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14665 ix86_expand_branch (enum rtx_code code, rtx label)
14669 switch (GET_MODE (ix86_compare_op0))
14675 tmp = ix86_expand_compare (code, NULL, NULL);
14676 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14677 gen_rtx_LABEL_REF (VOIDmode, label),
14679 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14688 enum rtx_code bypass_code, first_code, second_code;
14690 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14691 &ix86_compare_op1);
14693 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14695 /* Check whether we will use the natural sequence with one jump. If
14696 so, we can expand jump early. Otherwise delay expansion by
14697 creating compound insn to not confuse optimizers. */
14698 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14700 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14701 gen_rtx_LABEL_REF (VOIDmode, label),
14702 pc_rtx, NULL_RTX, NULL_RTX);
14706 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14707 ix86_compare_op0, ix86_compare_op1);
14708 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14709 gen_rtx_LABEL_REF (VOIDmode, label),
14711 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14713 use_fcomi = ix86_use_fcomi_compare (code);
14714 vec = rtvec_alloc (3 + !use_fcomi);
14715 RTVEC_ELT (vec, 0) = tmp;
14717 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14719 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14722 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14724 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14733 /* Expand DImode branch into multiple compare+branch. */
14735 rtx lo[2], hi[2], label2;
14736 enum rtx_code code1, code2, code3;
14737 enum machine_mode submode;
14739 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14741 tmp = ix86_compare_op0;
14742 ix86_compare_op0 = ix86_compare_op1;
14743 ix86_compare_op1 = tmp;
14744 code = swap_condition (code);
14746 if (GET_MODE (ix86_compare_op0) == DImode)
14748 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14749 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14754 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14755 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14759 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14760 avoid two branches. This costs one extra insn, so disable when
14761 optimizing for size. */
14763 if ((code == EQ || code == NE)
14764 && (!optimize_insn_for_size_p ()
14765 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14770 if (hi[1] != const0_rtx)
14771 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14772 NULL_RTX, 0, OPTAB_WIDEN);
14775 if (lo[1] != const0_rtx)
14776 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14777 NULL_RTX, 0, OPTAB_WIDEN);
14779 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14780 NULL_RTX, 0, OPTAB_WIDEN);
14782 ix86_compare_op0 = tmp;
14783 ix86_compare_op1 = const0_rtx;
14784 ix86_expand_branch (code, label);
14788 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14789 op1 is a constant and the low word is zero, then we can just
14790 examine the high word. Similarly for low word -1 and
14791 less-or-equal-than or greater-than. */
14793 if (CONST_INT_P (hi[1]))
14796 case LT: case LTU: case GE: case GEU:
14797 if (lo[1] == const0_rtx)
14799 ix86_compare_op0 = hi[0];
14800 ix86_compare_op1 = hi[1];
14801 ix86_expand_branch (code, label);
14805 case LE: case LEU: case GT: case GTU:
14806 if (lo[1] == constm1_rtx)
14808 ix86_compare_op0 = hi[0];
14809 ix86_compare_op1 = hi[1];
14810 ix86_expand_branch (code, label);
14818 /* Otherwise, we need two or three jumps. */
14820 label2 = gen_label_rtx ();
14823 code2 = swap_condition (code);
14824 code3 = unsigned_condition (code);
14828 case LT: case GT: case LTU: case GTU:
14831 case LE: code1 = LT; code2 = GT; break;
14832 case GE: code1 = GT; code2 = LT; break;
14833 case LEU: code1 = LTU; code2 = GTU; break;
14834 case GEU: code1 = GTU; code2 = LTU; break;
14836 case EQ: code1 = UNKNOWN; code2 = NE; break;
14837 case NE: code2 = UNKNOWN; break;
14840 gcc_unreachable ();
14845 * if (hi(a) < hi(b)) goto true;
14846 * if (hi(a) > hi(b)) goto false;
14847 * if (lo(a) < lo(b)) goto true;
14851 ix86_compare_op0 = hi[0];
14852 ix86_compare_op1 = hi[1];
14854 if (code1 != UNKNOWN)
14855 ix86_expand_branch (code1, label);
14856 if (code2 != UNKNOWN)
14857 ix86_expand_branch (code2, label2);
14859 ix86_compare_op0 = lo[0];
14860 ix86_compare_op1 = lo[1];
14861 ix86_expand_branch (code3, label);
14863 if (code2 != UNKNOWN)
14864 emit_label (label2);
14869 /* If we have already emitted a compare insn, go straight to simple.
14870 ix86_expand_compare won't emit anything if ix86_compare_emitted
14872 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
14877 /* Split branch based on floating point condition. */
14879 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14880 rtx target1, rtx target2, rtx tmp, rtx pushed)
14882 rtx second, bypass;
14883 rtx label = NULL_RTX;
14885 int bypass_probability = -1, second_probability = -1, probability = -1;
14888 if (target2 != pc_rtx)
14891 code = reverse_condition_maybe_unordered (code);
14896 condition = ix86_expand_fp_compare (code, op1, op2,
14897 tmp, &second, &bypass);
14899 /* Remove pushed operand from stack. */
14901 ix86_free_from_memory (GET_MODE (pushed));
14903 if (split_branch_probability >= 0)
14905 /* Distribute the probabilities across the jumps.
14906 Assume the BYPASS and SECOND to be always test
14908 probability = split_branch_probability;
14910 /* Value of 1 is low enough to make no need for probability
14911 to be updated. Later we may run some experiments and see
14912 if unordered values are more frequent in practice. */
14914 bypass_probability = 1;
14916 second_probability = 1;
14918 if (bypass != NULL_RTX)
14920 label = gen_label_rtx ();
14921 i = emit_jump_insn (gen_rtx_SET
14923 gen_rtx_IF_THEN_ELSE (VOIDmode,
14925 gen_rtx_LABEL_REF (VOIDmode,
14928 if (bypass_probability >= 0)
14929 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
14931 i = emit_jump_insn (gen_rtx_SET
14933 gen_rtx_IF_THEN_ELSE (VOIDmode,
14934 condition, target1, target2)));
14935 if (probability >= 0)
14936 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
14937 if (second != NULL_RTX)
14939 i = emit_jump_insn (gen_rtx_SET
14941 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14943 if (second_probability >= 0)
14944 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
14946 if (label != NULL_RTX)
14947 emit_label (label);
14951 ix86_expand_setcc (enum rtx_code code, rtx dest)
14953 rtx ret, tmp, tmpreg, equiv;
14954 rtx second_test, bypass_test;
14956 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14957 return 0; /* FAIL */
14959 gcc_assert (GET_MODE (dest) == QImode);
14961 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14962 PUT_MODE (ret, QImode);
14967 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14968 if (bypass_test || second_test)
14970 rtx test = second_test;
14972 rtx tmp2 = gen_reg_rtx (QImode);
14975 gcc_assert (!second_test);
14976 test = bypass_test;
14978 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14980 PUT_MODE (test, QImode);
14981 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14984 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14986 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14989 /* Attach a REG_EQUAL note describing the comparison result. */
14990 if (ix86_compare_op0 && ix86_compare_op1)
14992 equiv = simplify_gen_relational (code, QImode,
14993 GET_MODE (ix86_compare_op0),
14994 ix86_compare_op0, ix86_compare_op1);
14995 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14998 return 1; /* DONE */
15001 /* Expand comparison setting or clearing carry flag. Return true when
15002 successful and set pop for the operation. */
15004 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15006 enum machine_mode mode =
15007 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15009 /* Do not handle DImode compares that go through special path. */
15010 if (mode == (TARGET_64BIT ? TImode : DImode))
15013 if (SCALAR_FLOAT_MODE_P (mode))
15015 rtx second_test = NULL, bypass_test = NULL;
15016 rtx compare_op, compare_seq;
15018 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15020 /* Shortcut: following common codes never translate
15021 into carry flag compares. */
15022 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15023 || code == ORDERED || code == UNORDERED)
15026 /* These comparisons require zero flag; swap operands so they won't. */
15027 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15028 && !TARGET_IEEE_FP)
15033 code = swap_condition (code);
15036 /* Try to expand the comparison and verify that we end up with
15037 carry flag based comparison. This fails to be true only when
15038 we decide to expand comparison using arithmetic that is not
15039 too common scenario. */
15041 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15042 &second_test, &bypass_test);
15043 compare_seq = get_insns ();
15046 if (second_test || bypass_test)
15049 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15050 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15051 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15053 code = GET_CODE (compare_op);
15055 if (code != LTU && code != GEU)
15058 emit_insn (compare_seq);
15063 if (!INTEGRAL_MODE_P (mode))
15072 /* Convert a==0 into (unsigned)a<1. */
15075 if (op1 != const0_rtx)
15078 code = (code == EQ ? LTU : GEU);
15081 /* Convert a>b into b<a or a>=b-1. */
15084 if (CONST_INT_P (op1))
15086 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15087 /* Bail out on overflow. We still can swap operands but that
15088 would force loading of the constant into register. */
15089 if (op1 == const0_rtx
15090 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15092 code = (code == GTU ? GEU : LTU);
15099 code = (code == GTU ? LTU : GEU);
15103 /* Convert a>=0 into (unsigned)a<0x80000000. */
15106 if (mode == DImode || op1 != const0_rtx)
15108 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15109 code = (code == LT ? GEU : LTU);
15113 if (mode == DImode || op1 != constm1_rtx)
15115 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15116 code = (code == LE ? GEU : LTU);
15122 /* Swapping operands may cause constant to appear as first operand. */
15123 if (!nonimmediate_operand (op0, VOIDmode))
15125 if (!can_create_pseudo_p ())
15127 op0 = force_reg (mode, op0);
15129 ix86_compare_op0 = op0;
15130 ix86_compare_op1 = op1;
15131 *pop = ix86_expand_compare (code, NULL, NULL);
15132 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15137 ix86_expand_int_movcc (rtx operands[])
15139 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15140 rtx compare_seq, compare_op;
15141 rtx second_test, bypass_test;
15142 enum machine_mode mode = GET_MODE (operands[0]);
15143 bool sign_bit_compare_p = false;;
15146 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15147 compare_seq = get_insns ();
15150 compare_code = GET_CODE (compare_op);
15152 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15153 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15154 sign_bit_compare_p = true;
15156 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15157 HImode insns, we'd be swallowed in word prefix ops. */
15159 if ((mode != HImode || TARGET_FAST_PREFIX)
15160 && (mode != (TARGET_64BIT ? TImode : DImode))
15161 && CONST_INT_P (operands[2])
15162 && CONST_INT_P (operands[3]))
15164 rtx out = operands[0];
15165 HOST_WIDE_INT ct = INTVAL (operands[2]);
15166 HOST_WIDE_INT cf = INTVAL (operands[3]);
15167 HOST_WIDE_INT diff;
15170 /* Sign bit compares are better done using shifts than we do by using
15172 if (sign_bit_compare_p
15173 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15174 ix86_compare_op1, &compare_op))
15176 /* Detect overlap between destination and compare sources. */
15179 if (!sign_bit_compare_p)
15181 bool fpcmp = false;
15183 compare_code = GET_CODE (compare_op);
15185 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15186 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15189 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15192 /* To simplify rest of code, restrict to the GEU case. */
15193 if (compare_code == LTU)
15195 HOST_WIDE_INT tmp = ct;
15198 compare_code = reverse_condition (compare_code);
15199 code = reverse_condition (code);
15204 PUT_CODE (compare_op,
15205 reverse_condition_maybe_unordered
15206 (GET_CODE (compare_op)));
15208 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15212 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15213 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15214 tmp = gen_reg_rtx (mode);
15216 if (mode == DImode)
15217 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15219 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15223 if (code == GT || code == GE)
15224 code = reverse_condition (code);
15227 HOST_WIDE_INT tmp = ct;
15232 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15233 ix86_compare_op1, VOIDmode, 0, -1);
15246 tmp = expand_simple_binop (mode, PLUS,
15248 copy_rtx (tmp), 1, OPTAB_DIRECT);
15259 tmp = expand_simple_binop (mode, IOR,
15261 copy_rtx (tmp), 1, OPTAB_DIRECT);
15263 else if (diff == -1 && ct)
15273 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15275 tmp = expand_simple_binop (mode, PLUS,
15276 copy_rtx (tmp), GEN_INT (cf),
15277 copy_rtx (tmp), 1, OPTAB_DIRECT);
15285 * andl cf - ct, dest
15295 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15298 tmp = expand_simple_binop (mode, AND,
15300 gen_int_mode (cf - ct, mode),
15301 copy_rtx (tmp), 1, OPTAB_DIRECT);
15303 tmp = expand_simple_binop (mode, PLUS,
15304 copy_rtx (tmp), GEN_INT (ct),
15305 copy_rtx (tmp), 1, OPTAB_DIRECT);
15308 if (!rtx_equal_p (tmp, out))
15309 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15311 return 1; /* DONE */
15316 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15319 tmp = ct, ct = cf, cf = tmp;
15322 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15324 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15326 /* We may be reversing unordered compare to normal compare, that
15327 is not valid in general (we may convert non-trapping condition
15328 to trapping one), however on i386 we currently emit all
15329 comparisons unordered. */
15330 compare_code = reverse_condition_maybe_unordered (compare_code);
15331 code = reverse_condition_maybe_unordered (code);
15335 compare_code = reverse_condition (compare_code);
15336 code = reverse_condition (code);
15340 compare_code = UNKNOWN;
15341 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15342 && CONST_INT_P (ix86_compare_op1))
15344 if (ix86_compare_op1 == const0_rtx
15345 && (code == LT || code == GE))
15346 compare_code = code;
15347 else if (ix86_compare_op1 == constm1_rtx)
15351 else if (code == GT)
15356 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15357 if (compare_code != UNKNOWN
15358 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15359 && (cf == -1 || ct == -1))
15361 /* If lea code below could be used, only optimize
15362 if it results in a 2 insn sequence. */
15364 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15365 || diff == 3 || diff == 5 || diff == 9)
15366 || (compare_code == LT && ct == -1)
15367 || (compare_code == GE && cf == -1))
15370 * notl op1 (if necessary)
15378 code = reverse_condition (code);
15381 out = emit_store_flag (out, code, ix86_compare_op0,
15382 ix86_compare_op1, VOIDmode, 0, -1);
15384 out = expand_simple_binop (mode, IOR,
15386 out, 1, OPTAB_DIRECT);
15387 if (out != operands[0])
15388 emit_move_insn (operands[0], out);
15390 return 1; /* DONE */
15395 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15396 || diff == 3 || diff == 5 || diff == 9)
15397 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15399 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15405 * lea cf(dest*(ct-cf)),dest
15409 * This also catches the degenerate setcc-only case.
15415 out = emit_store_flag (out, code, ix86_compare_op0,
15416 ix86_compare_op1, VOIDmode, 0, 1);
15419 /* On x86_64 the lea instruction operates on Pmode, so we need
15420 to get arithmetics done in proper mode to match. */
15422 tmp = copy_rtx (out);
15426 out1 = copy_rtx (out);
15427 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15431 tmp = gen_rtx_PLUS (mode, tmp, out1);
15437 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15440 if (!rtx_equal_p (tmp, out))
15443 out = force_operand (tmp, copy_rtx (out));
15445 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15447 if (!rtx_equal_p (out, operands[0]))
15448 emit_move_insn (operands[0], copy_rtx (out));
15450 return 1; /* DONE */
15454 * General case: Jumpful:
15455 * xorl dest,dest cmpl op1, op2
15456 * cmpl op1, op2 movl ct, dest
15457 * setcc dest jcc 1f
15458 * decl dest movl cf, dest
15459 * andl (cf-ct),dest 1:
15462 * Size 20. Size 14.
15464 * This is reasonably steep, but branch mispredict costs are
15465 * high on modern cpus, so consider failing only if optimizing
15469 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15470 && BRANCH_COST (optimize_insn_for_speed_p (),
15475 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15480 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15482 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15484 /* We may be reversing unordered compare to normal compare,
15485 that is not valid in general (we may convert non-trapping
15486 condition to trapping one), however on i386 we currently
15487 emit all comparisons unordered. */
15488 code = reverse_condition_maybe_unordered (code);
15492 code = reverse_condition (code);
15493 if (compare_code != UNKNOWN)
15494 compare_code = reverse_condition (compare_code);
15498 if (compare_code != UNKNOWN)
15500 /* notl op1 (if needed)
15505 For x < 0 (resp. x <= -1) there will be no notl,
15506 so if possible swap the constants to get rid of the
15508 True/false will be -1/0 while code below (store flag
15509 followed by decrement) is 0/-1, so the constants need
15510 to be exchanged once more. */
15512 if (compare_code == GE || !cf)
15514 code = reverse_condition (code);
15519 HOST_WIDE_INT tmp = cf;
15524 out = emit_store_flag (out, code, ix86_compare_op0,
15525 ix86_compare_op1, VOIDmode, 0, -1);
15529 out = emit_store_flag (out, code, ix86_compare_op0,
15530 ix86_compare_op1, VOIDmode, 0, 1);
15532 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15533 copy_rtx (out), 1, OPTAB_DIRECT);
15536 out = expand_simple_binop (mode, AND, copy_rtx (out),
15537 gen_int_mode (cf - ct, mode),
15538 copy_rtx (out), 1, OPTAB_DIRECT);
15540 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15541 copy_rtx (out), 1, OPTAB_DIRECT);
15542 if (!rtx_equal_p (out, operands[0]))
15543 emit_move_insn (operands[0], copy_rtx (out));
15545 return 1; /* DONE */
15549 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15551 /* Try a few things more with specific constants and a variable. */
15554 rtx var, orig_out, out, tmp;
15556 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15557 return 0; /* FAIL */
15559 /* If one of the two operands is an interesting constant, load a
15560 constant with the above and mask it in with a logical operation. */
15562 if (CONST_INT_P (operands[2]))
15565 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15566 operands[3] = constm1_rtx, op = and_optab;
15567 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15568 operands[3] = const0_rtx, op = ior_optab;
15570 return 0; /* FAIL */
15572 else if (CONST_INT_P (operands[3]))
15575 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15576 operands[2] = constm1_rtx, op = and_optab;
15577 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15578 operands[2] = const0_rtx, op = ior_optab;
15580 return 0; /* FAIL */
15583 return 0; /* FAIL */
15585 orig_out = operands[0];
15586 tmp = gen_reg_rtx (mode);
15589 /* Recurse to get the constant loaded. */
15590 if (ix86_expand_int_movcc (operands) == 0)
15591 return 0; /* FAIL */
15593 /* Mask in the interesting variable. */
15594 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15596 if (!rtx_equal_p (out, orig_out))
15597 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15599 return 1; /* DONE */
15603 * For comparison with above,
15613 if (! nonimmediate_operand (operands[2], mode))
15614 operands[2] = force_reg (mode, operands[2]);
15615 if (! nonimmediate_operand (operands[3], mode))
15616 operands[3] = force_reg (mode, operands[3]);
15618 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15620 rtx tmp = gen_reg_rtx (mode);
15621 emit_move_insn (tmp, operands[3]);
15624 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15626 rtx tmp = gen_reg_rtx (mode);
15627 emit_move_insn (tmp, operands[2]);
15631 if (! register_operand (operands[2], VOIDmode)
15633 || ! register_operand (operands[3], VOIDmode)))
15634 operands[2] = force_reg (mode, operands[2]);
15637 && ! register_operand (operands[3], VOIDmode))
15638 operands[3] = force_reg (mode, operands[3]);
15640 emit_insn (compare_seq);
15641 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15642 gen_rtx_IF_THEN_ELSE (mode,
15643 compare_op, operands[2],
15646 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15647 gen_rtx_IF_THEN_ELSE (mode,
15649 copy_rtx (operands[3]),
15650 copy_rtx (operands[0]))));
15652 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15653 gen_rtx_IF_THEN_ELSE (mode,
15655 copy_rtx (operands[2]),
15656 copy_rtx (operands[0]))));
15658 return 1; /* DONE */
15661 /* Swap, force into registers, or otherwise massage the two operands
15662 to an sse comparison with a mask result. Thus we differ a bit from
15663 ix86_prepare_fp_compare_args which expects to produce a flags result.
15665 The DEST operand exists to help determine whether to commute commutative
15666 operators. The POP0/POP1 operands are updated in place. The new
15667 comparison code is returned, or UNKNOWN if not implementable. */
15669 static enum rtx_code
15670 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15671 rtx *pop0, rtx *pop1)
15679 /* We have no LTGT as an operator. We could implement it with
15680 NE & ORDERED, but this requires an extra temporary. It's
15681 not clear that it's worth it. */
15688 /* These are supported directly. */
15695 /* For commutative operators, try to canonicalize the destination
15696 operand to be first in the comparison - this helps reload to
15697 avoid extra moves. */
15698 if (!dest || !rtx_equal_p (dest, *pop1))
15706 /* These are not supported directly. Swap the comparison operands
15707 to transform into something that is supported. */
15711 code = swap_condition (code);
15715 gcc_unreachable ();
15721 /* Detect conditional moves that exactly match min/max operational
15722 semantics. Note that this is IEEE safe, as long as we don't
15723 interchange the operands.
15725 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15726 and TRUE if the operation is successful and instructions are emitted. */
15729 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15730 rtx cmp_op1, rtx if_true, rtx if_false)
15732 enum machine_mode mode;
15738 else if (code == UNGE)
15741 if_true = if_false;
15747 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15749 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15754 mode = GET_MODE (dest);
15756 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15757 but MODE may be a vector mode and thus not appropriate. */
15758 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15760 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15763 if_true = force_reg (mode, if_true);
15764 v = gen_rtvec (2, if_true, if_false);
15765 tmp = gen_rtx_UNSPEC (mode, v, u);
15769 code = is_min ? SMIN : SMAX;
15770 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15773 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15777 /* Expand an sse vector comparison. Return the register with the result. */
15780 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15781 rtx op_true, rtx op_false)
15783 enum machine_mode mode = GET_MODE (dest);
15786 cmp_op0 = force_reg (mode, cmp_op0);
15787 if (!nonimmediate_operand (cmp_op1, mode))
15788 cmp_op1 = force_reg (mode, cmp_op1);
15791 || reg_overlap_mentioned_p (dest, op_true)
15792 || reg_overlap_mentioned_p (dest, op_false))
15793 dest = gen_reg_rtx (mode);
15795 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15796 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15801 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15802 operations. This is used for both scalar and vector conditional moves. */
15805 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15807 enum machine_mode mode = GET_MODE (dest);
15810 if (op_false == CONST0_RTX (mode))
15812 op_true = force_reg (mode, op_true);
15813 x = gen_rtx_AND (mode, cmp, op_true);
15814 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15816 else if (op_true == CONST0_RTX (mode))
15818 op_false = force_reg (mode, op_false);
15819 x = gen_rtx_NOT (mode, cmp);
15820 x = gen_rtx_AND (mode, x, op_false);
15821 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15823 else if (TARGET_SSE5)
15825 rtx pcmov = gen_rtx_SET (mode, dest,
15826 gen_rtx_IF_THEN_ELSE (mode, cmp,
15833 op_true = force_reg (mode, op_true);
15834 op_false = force_reg (mode, op_false);
15836 t2 = gen_reg_rtx (mode);
15838 t3 = gen_reg_rtx (mode);
15842 x = gen_rtx_AND (mode, op_true, cmp);
15843 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15845 x = gen_rtx_NOT (mode, cmp);
15846 x = gen_rtx_AND (mode, x, op_false);
15847 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15849 x = gen_rtx_IOR (mode, t3, t2);
15850 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15854 /* Expand a floating-point conditional move. Return true if successful. */
15857 ix86_expand_fp_movcc (rtx operands[])
15859 enum machine_mode mode = GET_MODE (operands[0]);
15860 enum rtx_code code = GET_CODE (operands[1]);
15861 rtx tmp, compare_op, second_test, bypass_test;
15863 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15865 enum machine_mode cmode;
15867 /* Since we've no cmove for sse registers, don't force bad register
15868 allocation just to gain access to it. Deny movcc when the
15869 comparison mode doesn't match the move mode. */
15870 cmode = GET_MODE (ix86_compare_op0);
15871 if (cmode == VOIDmode)
15872 cmode = GET_MODE (ix86_compare_op1);
15876 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15878 &ix86_compare_op1);
15879 if (code == UNKNOWN)
15882 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15883 ix86_compare_op1, operands[2],
15887 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15888 ix86_compare_op1, operands[2], operands[3]);
15889 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15893 /* The floating point conditional move instructions don't directly
15894 support conditions resulting from a signed integer comparison. */
15896 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15898 /* The floating point conditional move instructions don't directly
15899 support signed integer comparisons. */
15901 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15903 gcc_assert (!second_test && !bypass_test);
15904 tmp = gen_reg_rtx (QImode);
15905 ix86_expand_setcc (code, tmp);
15907 ix86_compare_op0 = tmp;
15908 ix86_compare_op1 = const0_rtx;
15909 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15911 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15913 tmp = gen_reg_rtx (mode);
15914 emit_move_insn (tmp, operands[3]);
15917 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15919 tmp = gen_reg_rtx (mode);
15920 emit_move_insn (tmp, operands[2]);
15924 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15925 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15926 operands[2], operands[3])));
15928 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15929 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15930 operands[3], operands[0])));
15932 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15933 gen_rtx_IF_THEN_ELSE (mode, second_test,
15934 operands[2], operands[0])));
15939 /* Expand a floating-point vector conditional move; a vcond operation
15940 rather than a movcc operation. */
15943 ix86_expand_fp_vcond (rtx operands[])
15945 enum rtx_code code = GET_CODE (operands[3]);
15948 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15949 &operands[4], &operands[5]);
15950 if (code == UNKNOWN)
15953 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15954 operands[5], operands[1], operands[2]))
15957 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15958 operands[1], operands[2]);
15959 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15963 /* Expand a signed/unsigned integral vector conditional move. */
15966 ix86_expand_int_vcond (rtx operands[])
15968 enum machine_mode mode = GET_MODE (operands[0]);
15969 enum rtx_code code = GET_CODE (operands[3]);
15970 bool negate = false;
15973 cop0 = operands[4];
15974 cop1 = operands[5];
15976 /* SSE5 supports all of the comparisons on all vector int types. */
15979 /* Canonicalize the comparison to EQ, GT, GTU. */
15990 code = reverse_condition (code);
15996 code = reverse_condition (code);
16002 code = swap_condition (code);
16003 x = cop0, cop0 = cop1, cop1 = x;
16007 gcc_unreachable ();
16010 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16011 if (mode == V2DImode)
16016 /* SSE4.1 supports EQ. */
16017 if (!TARGET_SSE4_1)
16023 /* SSE4.2 supports GT/GTU. */
16024 if (!TARGET_SSE4_2)
16029 gcc_unreachable ();
16033 /* Unsigned parallel compare is not supported by the hardware. Play some
16034 tricks to turn this into a signed comparison against 0. */
16037 cop0 = force_reg (mode, cop0);
16046 /* Perform a parallel modulo subtraction. */
16047 t1 = gen_reg_rtx (mode);
16048 emit_insn ((mode == V4SImode
16050 : gen_subv2di3) (t1, cop0, cop1));
16052 /* Extract the original sign bit of op0. */
16053 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16055 t2 = gen_reg_rtx (mode);
16056 emit_insn ((mode == V4SImode
16058 : gen_andv2di3) (t2, cop0, mask));
16060 /* XOR it back into the result of the subtraction. This results
16061 in the sign bit set iff we saw unsigned underflow. */
16062 x = gen_reg_rtx (mode);
16063 emit_insn ((mode == V4SImode
16065 : gen_xorv2di3) (x, t1, t2));
16073 /* Perform a parallel unsigned saturating subtraction. */
16074 x = gen_reg_rtx (mode);
16075 emit_insn (gen_rtx_SET (VOIDmode, x,
16076 gen_rtx_US_MINUS (mode, cop0, cop1)));
16083 gcc_unreachable ();
16087 cop1 = CONST0_RTX (mode);
16091 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16092 operands[1+negate], operands[2-negate]);
16094 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16095 operands[2-negate]);
16099 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16100 true if we should do zero extension, else sign extension. HIGH_P is
16101 true if we want the N/2 high elements, else the low elements. */
16104 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16106 enum machine_mode imode = GET_MODE (operands[1]);
16107 rtx (*unpack)(rtx, rtx, rtx);
16114 unpack = gen_vec_interleave_highv16qi;
16116 unpack = gen_vec_interleave_lowv16qi;
16120 unpack = gen_vec_interleave_highv8hi;
16122 unpack = gen_vec_interleave_lowv8hi;
16126 unpack = gen_vec_interleave_highv4si;
16128 unpack = gen_vec_interleave_lowv4si;
16131 gcc_unreachable ();
16134 dest = gen_lowpart (imode, operands[0]);
16137 se = force_reg (imode, CONST0_RTX (imode));
16139 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16140 operands[1], pc_rtx, pc_rtx);
16142 emit_insn (unpack (dest, operands[1], se));
16145 /* This function performs the same task as ix86_expand_sse_unpack,
16146 but with SSE4.1 instructions. */
16149 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16151 enum machine_mode imode = GET_MODE (operands[1]);
16152 rtx (*unpack)(rtx, rtx);
16159 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16161 unpack = gen_sse4_1_extendv8qiv8hi2;
16165 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16167 unpack = gen_sse4_1_extendv4hiv4si2;
16171 unpack = gen_sse4_1_zero_extendv2siv2di2;
16173 unpack = gen_sse4_1_extendv2siv2di2;
16176 gcc_unreachable ();
16179 dest = operands[0];
16182 /* Shift higher 8 bytes to lower 8 bytes. */
16183 src = gen_reg_rtx (imode);
16184 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16185 gen_lowpart (TImode, operands[1]),
16191 emit_insn (unpack (dest, src));
16194 /* This function performs the same task as ix86_expand_sse_unpack,
16195 but with sse5 instructions. */
16198 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16200 enum machine_mode imode = GET_MODE (operands[1]);
16201 int pperm_bytes[16];
16203 int h = (high_p) ? 8 : 0;
16206 rtvec v = rtvec_alloc (16);
16209 rtx op0 = operands[0], op1 = operands[1];
16214 vs = rtvec_alloc (8);
16215 h2 = (high_p) ? 8 : 0;
16216 for (i = 0; i < 8; i++)
16218 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16219 pperm_bytes[2*i+1] = ((unsigned_p)
16221 : PPERM_SIGN | PPERM_SRC2 | i | h);
16224 for (i = 0; i < 16; i++)
16225 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16227 for (i = 0; i < 8; i++)
16228 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16230 p = gen_rtx_PARALLEL (VOIDmode, vs);
16231 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16233 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16235 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16239 vs = rtvec_alloc (4);
16240 h2 = (high_p) ? 4 : 0;
16241 for (i = 0; i < 4; i++)
16243 sign_extend = ((unsigned_p)
16245 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16246 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16247 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16248 pperm_bytes[4*i+2] = sign_extend;
16249 pperm_bytes[4*i+3] = sign_extend;
16252 for (i = 0; i < 16; i++)
16253 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16255 for (i = 0; i < 4; i++)
16256 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16258 p = gen_rtx_PARALLEL (VOIDmode, vs);
16259 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16261 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16263 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16267 vs = rtvec_alloc (2);
16268 h2 = (high_p) ? 2 : 0;
16269 for (i = 0; i < 2; i++)
16271 sign_extend = ((unsigned_p)
16273 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16274 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16275 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16276 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16277 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16278 pperm_bytes[8*i+4] = sign_extend;
16279 pperm_bytes[8*i+5] = sign_extend;
16280 pperm_bytes[8*i+6] = sign_extend;
16281 pperm_bytes[8*i+7] = sign_extend;
16284 for (i = 0; i < 16; i++)
16285 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16287 for (i = 0; i < 2; i++)
16288 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16290 p = gen_rtx_PARALLEL (VOIDmode, vs);
16291 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16293 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16295 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16299 gcc_unreachable ();
16305 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16306 next narrower integer vector type */
16308 ix86_expand_sse5_pack (rtx operands[3])
16310 enum machine_mode imode = GET_MODE (operands[0]);
16311 int pperm_bytes[16];
16313 rtvec v = rtvec_alloc (16);
16315 rtx op0 = operands[0];
16316 rtx op1 = operands[1];
16317 rtx op2 = operands[2];
16322 for (i = 0; i < 8; i++)
16324 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16325 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16328 for (i = 0; i < 16; i++)
16329 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16331 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16332 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16336 for (i = 0; i < 4; i++)
16338 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16339 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16340 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16341 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16344 for (i = 0; i < 16; i++)
16345 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16347 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16348 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16352 for (i = 0; i < 2; i++)
16354 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16355 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16356 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16357 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16358 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16359 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16360 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16361 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16364 for (i = 0; i < 16; i++)
16365 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16367 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16368 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16372 gcc_unreachable ();
16378 /* Expand conditional increment or decrement using adb/sbb instructions.
16379 The default case using setcc followed by the conditional move can be
16380 done by generic code. */
16382 ix86_expand_int_addcc (rtx operands[])
16384 enum rtx_code code = GET_CODE (operands[1]);
16386 rtx val = const0_rtx;
16387 bool fpcmp = false;
16388 enum machine_mode mode = GET_MODE (operands[0]);
16390 if (operands[3] != const1_rtx
16391 && operands[3] != constm1_rtx)
16393 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16394 ix86_compare_op1, &compare_op))
16396 code = GET_CODE (compare_op);
16398 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16399 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16402 code = ix86_fp_compare_code_to_integer (code);
16409 PUT_CODE (compare_op,
16410 reverse_condition_maybe_unordered
16411 (GET_CODE (compare_op)));
16413 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16415 PUT_MODE (compare_op, mode);
16417 /* Construct either adc or sbb insn. */
16418 if ((code == LTU) == (operands[3] == constm1_rtx))
16420 switch (GET_MODE (operands[0]))
16423 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16426 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16429 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16432 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16435 gcc_unreachable ();
16440 switch (GET_MODE (operands[0]))
16443 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16446 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16449 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16452 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16455 gcc_unreachable ();
16458 return 1; /* DONE */
16462 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16463 works for floating pointer parameters and nonoffsetable memories.
16464 For pushes, it returns just stack offsets; the values will be saved
16465 in the right order. Maximally three parts are generated. */
16468 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16473 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16475 size = (GET_MODE_SIZE (mode) + 4) / 8;
16477 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16478 gcc_assert (size >= 2 && size <= 4);
16480 /* Optimize constant pool reference to immediates. This is used by fp
16481 moves, that force all constants to memory to allow combining. */
16482 if (MEM_P (operand) && MEM_READONLY_P (operand))
16484 rtx tmp = maybe_get_pool_constant (operand);
16489 if (MEM_P (operand) && !offsettable_memref_p (operand))
16491 /* The only non-offsetable memories we handle are pushes. */
16492 int ok = push_operand (operand, VOIDmode);
16496 operand = copy_rtx (operand);
16497 PUT_MODE (operand, Pmode);
16498 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16502 if (GET_CODE (operand) == CONST_VECTOR)
16504 enum machine_mode imode = int_mode_for_mode (mode);
16505 /* Caution: if we looked through a constant pool memory above,
16506 the operand may actually have a different mode now. That's
16507 ok, since we want to pun this all the way back to an integer. */
16508 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16509 gcc_assert (operand != NULL);
16515 if (mode == DImode)
16516 split_di (&operand, 1, &parts[0], &parts[1]);
16521 if (REG_P (operand))
16523 gcc_assert (reload_completed);
16524 for (i = 0; i < size; i++)
16525 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16527 else if (offsettable_memref_p (operand))
16529 operand = adjust_address (operand, SImode, 0);
16530 parts[0] = operand;
16531 for (i = 1; i < size; i++)
16532 parts[i] = adjust_address (operand, SImode, 4 * i);
16534 else if (GET_CODE (operand) == CONST_DOUBLE)
16539 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16543 real_to_target (l, &r, mode);
16544 parts[3] = gen_int_mode (l[3], SImode);
16545 parts[2] = gen_int_mode (l[2], SImode);
16548 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16549 parts[2] = gen_int_mode (l[2], SImode);
16552 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16555 gcc_unreachable ();
16557 parts[1] = gen_int_mode (l[1], SImode);
16558 parts[0] = gen_int_mode (l[0], SImode);
16561 gcc_unreachable ();
16566 if (mode == TImode)
16567 split_ti (&operand, 1, &parts[0], &parts[1]);
16568 if (mode == XFmode || mode == TFmode)
16570 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16571 if (REG_P (operand))
16573 gcc_assert (reload_completed);
16574 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16575 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16577 else if (offsettable_memref_p (operand))
16579 operand = adjust_address (operand, DImode, 0);
16580 parts[0] = operand;
16581 parts[1] = adjust_address (operand, upper_mode, 8);
16583 else if (GET_CODE (operand) == CONST_DOUBLE)
16588 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16589 real_to_target (l, &r, mode);
16591 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16592 if (HOST_BITS_PER_WIDE_INT >= 64)
16595 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16596 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16599 parts[0] = immed_double_const (l[0], l[1], DImode);
16601 if (upper_mode == SImode)
16602 parts[1] = gen_int_mode (l[2], SImode);
16603 else if (HOST_BITS_PER_WIDE_INT >= 64)
16606 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16607 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16610 parts[1] = immed_double_const (l[2], l[3], DImode);
16613 gcc_unreachable ();
16620 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16621 Return false when normal moves are needed; true when all required
16622 insns have been emitted. Operands 2-4 contain the input values
16623 int the correct order; operands 5-7 contain the output values. */
16626 ix86_split_long_move (rtx operands[])
16631 int collisions = 0;
16632 enum machine_mode mode = GET_MODE (operands[0]);
16633 bool collisionparts[4];
16635 /* The DFmode expanders may ask us to move double.
16636 For 64bit target this is single move. By hiding the fact
16637 here we simplify i386.md splitters. */
16638 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16640 /* Optimize constant pool reference to immediates. This is used by
16641 fp moves, that force all constants to memory to allow combining. */
16643 if (MEM_P (operands[1])
16644 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16645 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16646 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16647 if (push_operand (operands[0], VOIDmode))
16649 operands[0] = copy_rtx (operands[0]);
16650 PUT_MODE (operands[0], Pmode);
16653 operands[0] = gen_lowpart (DImode, operands[0]);
16654 operands[1] = gen_lowpart (DImode, operands[1]);
16655 emit_move_insn (operands[0], operands[1]);
16659 /* The only non-offsettable memory we handle is push. */
16660 if (push_operand (operands[0], VOIDmode))
16663 gcc_assert (!MEM_P (operands[0])
16664 || offsettable_memref_p (operands[0]));
16666 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16667 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16669 /* When emitting push, take care for source operands on the stack. */
16670 if (push && MEM_P (operands[1])
16671 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16672 for (i = 0; i < nparts - 1; i++)
16673 part[1][i] = change_address (part[1][i],
16674 GET_MODE (part[1][i]),
16675 XEXP (part[1][i + 1], 0));
16677 /* We need to do copy in the right order in case an address register
16678 of the source overlaps the destination. */
16679 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16683 for (i = 0; i < nparts; i++)
16686 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16687 if (collisionparts[i])
16691 /* Collision in the middle part can be handled by reordering. */
16692 if (collisions == 1 && nparts == 3 && collisionparts [1])
16694 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16695 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16697 else if (collisions == 1
16699 && (collisionparts [1] || collisionparts [2]))
16701 if (collisionparts [1])
16703 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16704 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16708 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16709 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16713 /* If there are more collisions, we can't handle it by reordering.
16714 Do an lea to the last part and use only one colliding move. */
16715 else if (collisions > 1)
16721 base = part[0][nparts - 1];
16723 /* Handle the case when the last part isn't valid for lea.
16724 Happens in 64-bit mode storing the 12-byte XFmode. */
16725 if (GET_MODE (base) != Pmode)
16726 base = gen_rtx_REG (Pmode, REGNO (base));
16728 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16729 part[1][0] = replace_equiv_address (part[1][0], base);
16730 for (i = 1; i < nparts; i++)
16732 tmp = plus_constant (base, UNITS_PER_WORD * i);
16733 part[1][i] = replace_equiv_address (part[1][i], tmp);
16744 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16745 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16746 emit_move_insn (part[0][2], part[1][2]);
16748 else if (nparts == 4)
16750 emit_move_insn (part[0][3], part[1][3]);
16751 emit_move_insn (part[0][2], part[1][2]);
16756 /* In 64bit mode we don't have 32bit push available. In case this is
16757 register, it is OK - we will just use larger counterpart. We also
16758 retype memory - these comes from attempt to avoid REX prefix on
16759 moving of second half of TFmode value. */
16760 if (GET_MODE (part[1][1]) == SImode)
16762 switch (GET_CODE (part[1][1]))
16765 part[1][1] = adjust_address (part[1][1], DImode, 0);
16769 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16773 gcc_unreachable ();
16776 if (GET_MODE (part[1][0]) == SImode)
16777 part[1][0] = part[1][1];
16780 emit_move_insn (part[0][1], part[1][1]);
16781 emit_move_insn (part[0][0], part[1][0]);
16785 /* Choose correct order to not overwrite the source before it is copied. */
16786 if ((REG_P (part[0][0])
16787 && REG_P (part[1][1])
16788 && (REGNO (part[0][0]) == REGNO (part[1][1])
16790 && REGNO (part[0][0]) == REGNO (part[1][2]))
16792 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16794 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16796 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16798 operands[2 + i] = part[0][j];
16799 operands[6 + i] = part[1][j];
16804 for (i = 0; i < nparts; i++)
16806 operands[2 + i] = part[0][i];
16807 operands[6 + i] = part[1][i];
16811 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16812 if (optimize_insn_for_size_p ())
16814 for (j = 0; j < nparts - 1; j++)
16815 if (CONST_INT_P (operands[6 + j])
16816 && operands[6 + j] != const0_rtx
16817 && REG_P (operands[2 + j]))
16818 for (i = j; i < nparts - 1; i++)
16819 if (CONST_INT_P (operands[7 + i])
16820 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16821 operands[7 + i] = operands[2 + j];
16824 for (i = 0; i < nparts; i++)
16825 emit_move_insn (operands[2 + i], operands[6 + i]);
16830 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16831 left shift by a constant, either using a single shift or
16832 a sequence of add instructions. */
16835 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16839 emit_insn ((mode == DImode
16841 : gen_adddi3) (operand, operand, operand));
16843 else if (!optimize_insn_for_size_p ()
16844 && count * ix86_cost->add <= ix86_cost->shift_const)
16847 for (i=0; i<count; i++)
16849 emit_insn ((mode == DImode
16851 : gen_adddi3) (operand, operand, operand));
16855 emit_insn ((mode == DImode
16857 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16861 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16863 rtx low[2], high[2];
16865 const int single_width = mode == DImode ? 32 : 64;
16867 if (CONST_INT_P (operands[2]))
16869 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16870 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16872 if (count >= single_width)
16874 emit_move_insn (high[0], low[1]);
16875 emit_move_insn (low[0], const0_rtx);
16877 if (count > single_width)
16878 ix86_expand_ashl_const (high[0], count - single_width, mode);
16882 if (!rtx_equal_p (operands[0], operands[1]))
16883 emit_move_insn (operands[0], operands[1]);
16884 emit_insn ((mode == DImode
16886 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16887 ix86_expand_ashl_const (low[0], count, mode);
16892 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16894 if (operands[1] == const1_rtx)
16896 /* Assuming we've chosen a QImode capable registers, then 1 << N
16897 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16898 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16900 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16902 ix86_expand_clear (low[0]);
16903 ix86_expand_clear (high[0]);
16904 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16906 d = gen_lowpart (QImode, low[0]);
16907 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16908 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16909 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16911 d = gen_lowpart (QImode, high[0]);
16912 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16913 s = gen_rtx_NE (QImode, flags, const0_rtx);
16914 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16917 /* Otherwise, we can get the same results by manually performing
16918 a bit extract operation on bit 5/6, and then performing the two
16919 shifts. The two methods of getting 0/1 into low/high are exactly
16920 the same size. Avoiding the shift in the bit extract case helps
16921 pentium4 a bit; no one else seems to care much either way. */
16926 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16927 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16929 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16930 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16932 emit_insn ((mode == DImode
16934 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16935 emit_insn ((mode == DImode
16937 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16938 emit_move_insn (low[0], high[0]);
16939 emit_insn ((mode == DImode
16941 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16944 emit_insn ((mode == DImode
16946 : gen_ashldi3) (low[0], low[0], operands[2]));
16947 emit_insn ((mode == DImode
16949 : gen_ashldi3) (high[0], high[0], operands[2]));
16953 if (operands[1] == constm1_rtx)
16955 /* For -1 << N, we can avoid the shld instruction, because we
16956 know that we're shifting 0...31/63 ones into a -1. */
16957 emit_move_insn (low[0], constm1_rtx);
16958 if (optimize_insn_for_size_p ())
16959 emit_move_insn (high[0], low[0]);
16961 emit_move_insn (high[0], constm1_rtx);
16965 if (!rtx_equal_p (operands[0], operands[1]))
16966 emit_move_insn (operands[0], operands[1]);
16968 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16969 emit_insn ((mode == DImode
16971 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16974 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16976 if (TARGET_CMOVE && scratch)
16978 ix86_expand_clear (scratch);
16979 emit_insn ((mode == DImode
16980 ? gen_x86_shift_adj_1
16981 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16985 emit_insn ((mode == DImode
16986 ? gen_x86_shift_adj_2
16987 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16991 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16993 rtx low[2], high[2];
16995 const int single_width = mode == DImode ? 32 : 64;
16997 if (CONST_INT_P (operands[2]))
16999 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17000 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17002 if (count == single_width * 2 - 1)
17004 emit_move_insn (high[0], high[1]);
17005 emit_insn ((mode == DImode
17007 : gen_ashrdi3) (high[0], high[0],
17008 GEN_INT (single_width - 1)));
17009 emit_move_insn (low[0], high[0]);
17012 else if (count >= single_width)
17014 emit_move_insn (low[0], high[1]);
17015 emit_move_insn (high[0], low[0]);
17016 emit_insn ((mode == DImode
17018 : gen_ashrdi3) (high[0], high[0],
17019 GEN_INT (single_width - 1)));
17020 if (count > single_width)
17021 emit_insn ((mode == DImode
17023 : gen_ashrdi3) (low[0], low[0],
17024 GEN_INT (count - single_width)));
17028 if (!rtx_equal_p (operands[0], operands[1]))
17029 emit_move_insn (operands[0], operands[1]);
17030 emit_insn ((mode == DImode
17032 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17033 emit_insn ((mode == DImode
17035 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17040 if (!rtx_equal_p (operands[0], operands[1]))
17041 emit_move_insn (operands[0], operands[1]);
17043 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17045 emit_insn ((mode == DImode
17047 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17048 emit_insn ((mode == DImode
17050 : gen_ashrdi3) (high[0], high[0], operands[2]));
17052 if (TARGET_CMOVE && scratch)
17054 emit_move_insn (scratch, high[0]);
17055 emit_insn ((mode == DImode
17057 : gen_ashrdi3) (scratch, scratch,
17058 GEN_INT (single_width - 1)));
17059 emit_insn ((mode == DImode
17060 ? gen_x86_shift_adj_1
17061 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17065 emit_insn ((mode == DImode
17066 ? gen_x86_shift_adj_3
17067 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17072 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17074 rtx low[2], high[2];
17076 const int single_width = mode == DImode ? 32 : 64;
17078 if (CONST_INT_P (operands[2]))
17080 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17081 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17083 if (count >= single_width)
17085 emit_move_insn (low[0], high[1]);
17086 ix86_expand_clear (high[0]);
17088 if (count > single_width)
17089 emit_insn ((mode == DImode
17091 : gen_lshrdi3) (low[0], low[0],
17092 GEN_INT (count - single_width)));
17096 if (!rtx_equal_p (operands[0], operands[1]))
17097 emit_move_insn (operands[0], operands[1]);
17098 emit_insn ((mode == DImode
17100 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17101 emit_insn ((mode == DImode
17103 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17108 if (!rtx_equal_p (operands[0], operands[1]))
17109 emit_move_insn (operands[0], operands[1]);
17111 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17113 emit_insn ((mode == DImode
17115 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17116 emit_insn ((mode == DImode
17118 : gen_lshrdi3) (high[0], high[0], operands[2]));
17120 /* Heh. By reversing the arguments, we can reuse this pattern. */
17121 if (TARGET_CMOVE && scratch)
17123 ix86_expand_clear (scratch);
17124 emit_insn ((mode == DImode
17125 ? gen_x86_shift_adj_1
17126 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17130 emit_insn ((mode == DImode
17131 ? gen_x86_shift_adj_2
17132 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17136 /* Predict just emitted jump instruction to be taken with probability PROB. */
17138 predict_jump (int prob)
17140 rtx insn = get_last_insn ();
17141 gcc_assert (JUMP_P (insn));
17142 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17145 /* Helper function for the string operations below. Dest VARIABLE whether
17146 it is aligned to VALUE bytes. If true, jump to the label. */
17148 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17150 rtx label = gen_label_rtx ();
17151 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17152 if (GET_MODE (variable) == DImode)
17153 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17155 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17156 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17159 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17161 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17165 /* Adjust COUNTER by the VALUE. */
17167 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17169 if (GET_MODE (countreg) == DImode)
17170 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17172 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17175 /* Zero extend possibly SImode EXP to Pmode register. */
17177 ix86_zero_extend_to_Pmode (rtx exp)
17180 if (GET_MODE (exp) == VOIDmode)
17181 return force_reg (Pmode, exp);
17182 if (GET_MODE (exp) == Pmode)
17183 return copy_to_mode_reg (Pmode, exp);
17184 r = gen_reg_rtx (Pmode);
17185 emit_insn (gen_zero_extendsidi2 (r, exp));
17189 /* Divide COUNTREG by SCALE. */
17191 scale_counter (rtx countreg, int scale)
17194 rtx piece_size_mask;
17198 if (CONST_INT_P (countreg))
17199 return GEN_INT (INTVAL (countreg) / scale);
17200 gcc_assert (REG_P (countreg));
17202 piece_size_mask = GEN_INT (scale - 1);
17203 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17204 GEN_INT (exact_log2 (scale)),
17205 NULL, 1, OPTAB_DIRECT);
17209 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17210 DImode for constant loop counts. */
17212 static enum machine_mode
17213 counter_mode (rtx count_exp)
17215 if (GET_MODE (count_exp) != VOIDmode)
17216 return GET_MODE (count_exp);
17217 if (GET_CODE (count_exp) != CONST_INT)
17219 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17224 /* When SRCPTR is non-NULL, output simple loop to move memory
17225 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17226 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17227 equivalent loop to set memory by VALUE (supposed to be in MODE).
17229 The size is rounded down to whole number of chunk size moved at once.
17230 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17234 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17235 rtx destptr, rtx srcptr, rtx value,
17236 rtx count, enum machine_mode mode, int unroll,
17239 rtx out_label, top_label, iter, tmp;
17240 enum machine_mode iter_mode = counter_mode (count);
17241 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17242 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17248 top_label = gen_label_rtx ();
17249 out_label = gen_label_rtx ();
17250 iter = gen_reg_rtx (iter_mode);
17252 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17253 NULL, 1, OPTAB_DIRECT);
17254 /* Those two should combine. */
17255 if (piece_size == const1_rtx)
17257 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17259 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17261 emit_move_insn (iter, const0_rtx);
17263 emit_label (top_label);
17265 tmp = convert_modes (Pmode, iter_mode, iter, true);
17266 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17267 destmem = change_address (destmem, mode, x_addr);
17271 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17272 srcmem = change_address (srcmem, mode, y_addr);
17274 /* When unrolling for chips that reorder memory reads and writes,
17275 we can save registers by using single temporary.
17276 Also using 4 temporaries is overkill in 32bit mode. */
17277 if (!TARGET_64BIT && 0)
17279 for (i = 0; i < unroll; i++)
17284 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17286 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17288 emit_move_insn (destmem, srcmem);
17294 gcc_assert (unroll <= 4);
17295 for (i = 0; i < unroll; i++)
17297 tmpreg[i] = gen_reg_rtx (mode);
17301 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17303 emit_move_insn (tmpreg[i], srcmem);
17305 for (i = 0; i < unroll; i++)
17310 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17312 emit_move_insn (destmem, tmpreg[i]);
17317 for (i = 0; i < unroll; i++)
17321 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17322 emit_move_insn (destmem, value);
17325 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17326 true, OPTAB_LIB_WIDEN);
17328 emit_move_insn (iter, tmp);
17330 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17332 if (expected_size != -1)
17334 expected_size /= GET_MODE_SIZE (mode) * unroll;
17335 if (expected_size == 0)
17337 else if (expected_size > REG_BR_PROB_BASE)
17338 predict_jump (REG_BR_PROB_BASE - 1);
17340 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17343 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17344 iter = ix86_zero_extend_to_Pmode (iter);
17345 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17346 true, OPTAB_LIB_WIDEN);
17347 if (tmp != destptr)
17348 emit_move_insn (destptr, tmp);
17351 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17352 true, OPTAB_LIB_WIDEN);
17354 emit_move_insn (srcptr, tmp);
17356 emit_label (out_label);
17359 /* Output "rep; mov" instruction.
17360 Arguments have same meaning as for previous function */
17362 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17363 rtx destptr, rtx srcptr,
17365 enum machine_mode mode)
17371 /* If the size is known, it is shorter to use rep movs. */
17372 if (mode == QImode && CONST_INT_P (count)
17373 && !(INTVAL (count) & 3))
17376 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17377 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17378 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17379 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17380 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17381 if (mode != QImode)
17383 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17384 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17385 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17386 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17387 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17388 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17392 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17393 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17395 if (CONST_INT_P (count))
17397 count = GEN_INT (INTVAL (count)
17398 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17399 destmem = shallow_copy_rtx (destmem);
17400 srcmem = shallow_copy_rtx (srcmem);
17401 set_mem_size (destmem, count);
17402 set_mem_size (srcmem, count);
17406 if (MEM_SIZE (destmem))
17407 set_mem_size (destmem, NULL_RTX);
17408 if (MEM_SIZE (srcmem))
17409 set_mem_size (srcmem, NULL_RTX);
17411 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17415 /* Output "rep; stos" instruction.
17416 Arguments have same meaning as for previous function */
17418 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17419 rtx count, enum machine_mode mode,
17425 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17426 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17427 value = force_reg (mode, gen_lowpart (mode, value));
17428 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17429 if (mode != QImode)
17431 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17432 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17433 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17436 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17437 if (orig_value == const0_rtx && CONST_INT_P (count))
17439 count = GEN_INT (INTVAL (count)
17440 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17441 destmem = shallow_copy_rtx (destmem);
17442 set_mem_size (destmem, count);
17444 else if (MEM_SIZE (destmem))
17445 set_mem_size (destmem, NULL_RTX);
17446 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17450 emit_strmov (rtx destmem, rtx srcmem,
17451 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17453 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17454 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17455 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17458 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17460 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17461 rtx destptr, rtx srcptr, rtx count, int max_size)
17464 if (CONST_INT_P (count))
17466 HOST_WIDE_INT countval = INTVAL (count);
17469 if ((countval & 0x10) && max_size > 16)
17473 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17474 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17477 gcc_unreachable ();
17480 if ((countval & 0x08) && max_size > 8)
17483 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17486 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17487 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17491 if ((countval & 0x04) && max_size > 4)
17493 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17496 if ((countval & 0x02) && max_size > 2)
17498 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17501 if ((countval & 0x01) && max_size > 1)
17503 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17510 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17511 count, 1, OPTAB_DIRECT);
17512 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17513 count, QImode, 1, 4);
17517 /* When there are stringops, we can cheaply increase dest and src pointers.
17518 Otherwise we save code size by maintaining offset (zero is readily
17519 available from preceding rep operation) and using x86 addressing modes.
17521 if (TARGET_SINGLE_STRINGOP)
17525 rtx label = ix86_expand_aligntest (count, 4, true);
17526 src = change_address (srcmem, SImode, srcptr);
17527 dest = change_address (destmem, SImode, destptr);
17528 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17529 emit_label (label);
17530 LABEL_NUSES (label) = 1;
17534 rtx label = ix86_expand_aligntest (count, 2, true);
17535 src = change_address (srcmem, HImode, srcptr);
17536 dest = change_address (destmem, HImode, destptr);
17537 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17538 emit_label (label);
17539 LABEL_NUSES (label) = 1;
17543 rtx label = ix86_expand_aligntest (count, 1, true);
17544 src = change_address (srcmem, QImode, srcptr);
17545 dest = change_address (destmem, QImode, destptr);
17546 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17547 emit_label (label);
17548 LABEL_NUSES (label) = 1;
17553 rtx offset = force_reg (Pmode, const0_rtx);
17558 rtx label = ix86_expand_aligntest (count, 4, true);
17559 src = change_address (srcmem, SImode, srcptr);
17560 dest = change_address (destmem, SImode, destptr);
17561 emit_move_insn (dest, src);
17562 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17563 true, OPTAB_LIB_WIDEN);
17565 emit_move_insn (offset, tmp);
17566 emit_label (label);
17567 LABEL_NUSES (label) = 1;
17571 rtx label = ix86_expand_aligntest (count, 2, true);
17572 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17573 src = change_address (srcmem, HImode, tmp);
17574 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17575 dest = change_address (destmem, HImode, tmp);
17576 emit_move_insn (dest, src);
17577 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17578 true, OPTAB_LIB_WIDEN);
17580 emit_move_insn (offset, tmp);
17581 emit_label (label);
17582 LABEL_NUSES (label) = 1;
17586 rtx label = ix86_expand_aligntest (count, 1, true);
17587 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17588 src = change_address (srcmem, QImode, tmp);
17589 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17590 dest = change_address (destmem, QImode, tmp);
17591 emit_move_insn (dest, src);
17592 emit_label (label);
17593 LABEL_NUSES (label) = 1;
17598 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17600 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17601 rtx count, int max_size)
17604 expand_simple_binop (counter_mode (count), AND, count,
17605 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17606 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17607 gen_lowpart (QImode, value), count, QImode,
17611 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17613 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17617 if (CONST_INT_P (count))
17619 HOST_WIDE_INT countval = INTVAL (count);
17622 if ((countval & 0x10) && max_size > 16)
17626 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17627 emit_insn (gen_strset (destptr, dest, value));
17628 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17629 emit_insn (gen_strset (destptr, dest, value));
17632 gcc_unreachable ();
17635 if ((countval & 0x08) && max_size > 8)
17639 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17640 emit_insn (gen_strset (destptr, dest, value));
17644 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17645 emit_insn (gen_strset (destptr, dest, value));
17646 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17647 emit_insn (gen_strset (destptr, dest, value));
17651 if ((countval & 0x04) && max_size > 4)
17653 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17654 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17657 if ((countval & 0x02) && max_size > 2)
17659 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17660 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17663 if ((countval & 0x01) && max_size > 1)
17665 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17666 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17673 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17678 rtx label = ix86_expand_aligntest (count, 16, true);
17681 dest = change_address (destmem, DImode, destptr);
17682 emit_insn (gen_strset (destptr, dest, value));
17683 emit_insn (gen_strset (destptr, dest, value));
17687 dest = change_address (destmem, SImode, destptr);
17688 emit_insn (gen_strset (destptr, dest, value));
17689 emit_insn (gen_strset (destptr, dest, value));
17690 emit_insn (gen_strset (destptr, dest, value));
17691 emit_insn (gen_strset (destptr, dest, value));
17693 emit_label (label);
17694 LABEL_NUSES (label) = 1;
17698 rtx label = ix86_expand_aligntest (count, 8, true);
17701 dest = change_address (destmem, DImode, destptr);
17702 emit_insn (gen_strset (destptr, dest, value));
17706 dest = change_address (destmem, SImode, destptr);
17707 emit_insn (gen_strset (destptr, dest, value));
17708 emit_insn (gen_strset (destptr, dest, value));
17710 emit_label (label);
17711 LABEL_NUSES (label) = 1;
17715 rtx label = ix86_expand_aligntest (count, 4, true);
17716 dest = change_address (destmem, SImode, destptr);
17717 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17718 emit_label (label);
17719 LABEL_NUSES (label) = 1;
17723 rtx label = ix86_expand_aligntest (count, 2, true);
17724 dest = change_address (destmem, HImode, destptr);
17725 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17726 emit_label (label);
17727 LABEL_NUSES (label) = 1;
17731 rtx label = ix86_expand_aligntest (count, 1, true);
17732 dest = change_address (destmem, QImode, destptr);
17733 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17734 emit_label (label);
17735 LABEL_NUSES (label) = 1;
17739 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17740 DESIRED_ALIGNMENT. */
17742 expand_movmem_prologue (rtx destmem, rtx srcmem,
17743 rtx destptr, rtx srcptr, rtx count,
17744 int align, int desired_alignment)
17746 if (align <= 1 && desired_alignment > 1)
17748 rtx label = ix86_expand_aligntest (destptr, 1, false);
17749 srcmem = change_address (srcmem, QImode, srcptr);
17750 destmem = change_address (destmem, QImode, destptr);
17751 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17752 ix86_adjust_counter (count, 1);
17753 emit_label (label);
17754 LABEL_NUSES (label) = 1;
17756 if (align <= 2 && desired_alignment > 2)
17758 rtx label = ix86_expand_aligntest (destptr, 2, false);
17759 srcmem = change_address (srcmem, HImode, srcptr);
17760 destmem = change_address (destmem, HImode, destptr);
17761 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17762 ix86_adjust_counter (count, 2);
17763 emit_label (label);
17764 LABEL_NUSES (label) = 1;
17766 if (align <= 4 && desired_alignment > 4)
17768 rtx label = ix86_expand_aligntest (destptr, 4, false);
17769 srcmem = change_address (srcmem, SImode, srcptr);
17770 destmem = change_address (destmem, SImode, destptr);
17771 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17772 ix86_adjust_counter (count, 4);
17773 emit_label (label);
17774 LABEL_NUSES (label) = 1;
17776 gcc_assert (desired_alignment <= 8);
17779 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17780 ALIGN_BYTES is how many bytes need to be copied. */
17782 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17783 int desired_align, int align_bytes)
17786 rtx src_size, dst_size;
17788 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17789 if (src_align_bytes >= 0)
17790 src_align_bytes = desired_align - src_align_bytes;
17791 src_size = MEM_SIZE (src);
17792 dst_size = MEM_SIZE (dst);
17793 if (align_bytes & 1)
17795 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17796 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17798 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17800 if (align_bytes & 2)
17802 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17803 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17804 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17805 set_mem_align (dst, 2 * BITS_PER_UNIT);
17806 if (src_align_bytes >= 0
17807 && (src_align_bytes & 1) == (align_bytes & 1)
17808 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17809 set_mem_align (src, 2 * BITS_PER_UNIT);
17811 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17813 if (align_bytes & 4)
17815 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17816 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17817 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17818 set_mem_align (dst, 4 * BITS_PER_UNIT);
17819 if (src_align_bytes >= 0)
17821 unsigned int src_align = 0;
17822 if ((src_align_bytes & 3) == (align_bytes & 3))
17824 else if ((src_align_bytes & 1) == (align_bytes & 1))
17826 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17827 set_mem_align (src, src_align * BITS_PER_UNIT);
17830 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17832 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17833 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17834 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17835 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17836 if (src_align_bytes >= 0)
17838 unsigned int src_align = 0;
17839 if ((src_align_bytes & 7) == (align_bytes & 7))
17841 else if ((src_align_bytes & 3) == (align_bytes & 3))
17843 else if ((src_align_bytes & 1) == (align_bytes & 1))
17845 if (src_align > (unsigned int) desired_align)
17846 src_align = desired_align;
17847 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17848 set_mem_align (src, src_align * BITS_PER_UNIT);
17851 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17853 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17858 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17859 DESIRED_ALIGNMENT. */
17861 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17862 int align, int desired_alignment)
17864 if (align <= 1 && desired_alignment > 1)
17866 rtx label = ix86_expand_aligntest (destptr, 1, false);
17867 destmem = change_address (destmem, QImode, destptr);
17868 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17869 ix86_adjust_counter (count, 1);
17870 emit_label (label);
17871 LABEL_NUSES (label) = 1;
17873 if (align <= 2 && desired_alignment > 2)
17875 rtx label = ix86_expand_aligntest (destptr, 2, false);
17876 destmem = change_address (destmem, HImode, destptr);
17877 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17878 ix86_adjust_counter (count, 2);
17879 emit_label (label);
17880 LABEL_NUSES (label) = 1;
17882 if (align <= 4 && desired_alignment > 4)
17884 rtx label = ix86_expand_aligntest (destptr, 4, false);
17885 destmem = change_address (destmem, SImode, destptr);
17886 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17887 ix86_adjust_counter (count, 4);
17888 emit_label (label);
17889 LABEL_NUSES (label) = 1;
17891 gcc_assert (desired_alignment <= 8);
17894 /* Set enough from DST to align DST known to by aligned by ALIGN to
17895 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17897 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17898 int desired_align, int align_bytes)
17901 rtx dst_size = MEM_SIZE (dst);
17902 if (align_bytes & 1)
17904 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17906 emit_insn (gen_strset (destreg, dst,
17907 gen_lowpart (QImode, value)));
17909 if (align_bytes & 2)
17911 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17912 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17913 set_mem_align (dst, 2 * BITS_PER_UNIT);
17915 emit_insn (gen_strset (destreg, dst,
17916 gen_lowpart (HImode, value)));
17918 if (align_bytes & 4)
17920 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17921 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17922 set_mem_align (dst, 4 * BITS_PER_UNIT);
17924 emit_insn (gen_strset (destreg, dst,
17925 gen_lowpart (SImode, value)));
17927 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17928 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17929 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17931 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17935 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17936 static enum stringop_alg
17937 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17938 int *dynamic_check)
17940 const struct stringop_algs * algs;
17941 bool optimize_for_speed;
17942 /* Algorithms using the rep prefix want at least edi and ecx;
17943 additionally, memset wants eax and memcpy wants esi. Don't
17944 consider such algorithms if the user has appropriated those
17945 registers for their own purposes. */
17946 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17948 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17950 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17951 || (alg != rep_prefix_1_byte \
17952 && alg != rep_prefix_4_byte \
17953 && alg != rep_prefix_8_byte))
17954 const struct processor_costs *cost;
17956 /* Even if the string operation call is cold, we still might spend a lot
17957 of time processing large blocks. */
17958 if (optimize_function_for_size_p (cfun)
17959 || (optimize_insn_for_size_p ()
17960 && expected_size != -1 && expected_size < 256))
17961 optimize_for_speed = false;
17963 optimize_for_speed = true;
17965 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17967 *dynamic_check = -1;
17969 algs = &cost->memset[TARGET_64BIT != 0];
17971 algs = &cost->memcpy[TARGET_64BIT != 0];
17972 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17973 return stringop_alg;
17974 /* rep; movq or rep; movl is the smallest variant. */
17975 else if (!optimize_for_speed)
17977 if (!count || (count & 3))
17978 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17980 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17982 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17984 else if (expected_size != -1 && expected_size < 4)
17985 return loop_1_byte;
17986 else if (expected_size != -1)
17989 enum stringop_alg alg = libcall;
17990 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17992 /* We get here if the algorithms that were not libcall-based
17993 were rep-prefix based and we are unable to use rep prefixes
17994 based on global register usage. Break out of the loop and
17995 use the heuristic below. */
17996 if (algs->size[i].max == 0)
17998 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18000 enum stringop_alg candidate = algs->size[i].alg;
18002 if (candidate != libcall && ALG_USABLE_P (candidate))
18004 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18005 last non-libcall inline algorithm. */
18006 if (TARGET_INLINE_ALL_STRINGOPS)
18008 /* When the current size is best to be copied by a libcall,
18009 but we are still forced to inline, run the heuristic below
18010 that will pick code for medium sized blocks. */
18011 if (alg != libcall)
18015 else if (ALG_USABLE_P (candidate))
18019 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18021 /* When asked to inline the call anyway, try to pick meaningful choice.
18022 We look for maximal size of block that is faster to copy by hand and
18023 take blocks of at most of that size guessing that average size will
18024 be roughly half of the block.
18026 If this turns out to be bad, we might simply specify the preferred
18027 choice in ix86_costs. */
18028 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18029 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18032 enum stringop_alg alg;
18034 bool any_alg_usable_p = true;
18036 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18038 enum stringop_alg candidate = algs->size[i].alg;
18039 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18041 if (candidate != libcall && candidate
18042 && ALG_USABLE_P (candidate))
18043 max = algs->size[i].max;
18045 /* If there aren't any usable algorithms, then recursing on
18046 smaller sizes isn't going to find anything. Just return the
18047 simple byte-at-a-time copy loop. */
18048 if (!any_alg_usable_p)
18050 /* Pick something reasonable. */
18051 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18052 *dynamic_check = 128;
18053 return loop_1_byte;
18057 alg = decide_alg (count, max / 2, memset, dynamic_check);
18058 gcc_assert (*dynamic_check == -1);
18059 gcc_assert (alg != libcall);
18060 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18061 *dynamic_check = max;
18064 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18065 #undef ALG_USABLE_P
18068 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18069 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18071 decide_alignment (int align,
18072 enum stringop_alg alg,
18075 int desired_align = 0;
18079 gcc_unreachable ();
18081 case unrolled_loop:
18082 desired_align = GET_MODE_SIZE (Pmode);
18084 case rep_prefix_8_byte:
18087 case rep_prefix_4_byte:
18088 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18089 copying whole cacheline at once. */
18090 if (TARGET_PENTIUMPRO)
18095 case rep_prefix_1_byte:
18096 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18097 copying whole cacheline at once. */
18098 if (TARGET_PENTIUMPRO)
18112 if (desired_align < align)
18113 desired_align = align;
18114 if (expected_size != -1 && expected_size < 4)
18115 desired_align = align;
18116 return desired_align;
18119 /* Return the smallest power of 2 greater than VAL. */
18121 smallest_pow2_greater_than (int val)
18129 /* Expand string move (memcpy) operation. Use i386 string operations when
18130 profitable. expand_setmem contains similar code. The code depends upon
18131 architecture, block size and alignment, but always has the same
18134 1) Prologue guard: Conditional that jumps up to epilogues for small
18135 blocks that can be handled by epilogue alone. This is faster but
18136 also needed for correctness, since prologue assume the block is larger
18137 than the desired alignment.
18139 Optional dynamic check for size and libcall for large
18140 blocks is emitted here too, with -minline-stringops-dynamically.
18142 2) Prologue: copy first few bytes in order to get destination aligned
18143 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18144 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18145 We emit either a jump tree on power of two sized blocks, or a byte loop.
18147 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18148 with specified algorithm.
18150 4) Epilogue: code copying tail of the block that is too small to be
18151 handled by main body (or up to size guarded by prologue guard). */
18154 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18155 rtx expected_align_exp, rtx expected_size_exp)
18161 rtx jump_around_label = NULL;
18162 HOST_WIDE_INT align = 1;
18163 unsigned HOST_WIDE_INT count = 0;
18164 HOST_WIDE_INT expected_size = -1;
18165 int size_needed = 0, epilogue_size_needed;
18166 int desired_align = 0, align_bytes = 0;
18167 enum stringop_alg alg;
18169 bool need_zero_guard = false;
18171 if (CONST_INT_P (align_exp))
18172 align = INTVAL (align_exp);
18173 /* i386 can do misaligned access on reasonably increased cost. */
18174 if (CONST_INT_P (expected_align_exp)
18175 && INTVAL (expected_align_exp) > align)
18176 align = INTVAL (expected_align_exp);
18177 /* ALIGN is the minimum of destination and source alignment, but we care here
18178 just about destination alignment. */
18179 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18180 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18182 if (CONST_INT_P (count_exp))
18183 count = expected_size = INTVAL (count_exp);
18184 if (CONST_INT_P (expected_size_exp) && count == 0)
18185 expected_size = INTVAL (expected_size_exp);
18187 /* Make sure we don't need to care about overflow later on. */
18188 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18191 /* Step 0: Decide on preferred algorithm, desired alignment and
18192 size of chunks to be copied by main loop. */
18194 alg = decide_alg (count, expected_size, false, &dynamic_check);
18195 desired_align = decide_alignment (align, alg, expected_size);
18197 if (!TARGET_ALIGN_STRINGOPS)
18198 align = desired_align;
18200 if (alg == libcall)
18202 gcc_assert (alg != no_stringop);
18204 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18205 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18206 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18211 gcc_unreachable ();
18213 need_zero_guard = true;
18214 size_needed = GET_MODE_SIZE (Pmode);
18216 case unrolled_loop:
18217 need_zero_guard = true;
18218 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18220 case rep_prefix_8_byte:
18223 case rep_prefix_4_byte:
18226 case rep_prefix_1_byte:
18230 need_zero_guard = true;
18235 epilogue_size_needed = size_needed;
18237 /* Step 1: Prologue guard. */
18239 /* Alignment code needs count to be in register. */
18240 if (CONST_INT_P (count_exp) && desired_align > align)
18242 if (INTVAL (count_exp) > desired_align
18243 && INTVAL (count_exp) > size_needed)
18246 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18247 if (align_bytes <= 0)
18250 align_bytes = desired_align - align_bytes;
18252 if (align_bytes == 0)
18253 count_exp = force_reg (counter_mode (count_exp), count_exp);
18255 gcc_assert (desired_align >= 1 && align >= 1);
18257 /* Ensure that alignment prologue won't copy past end of block. */
18258 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18260 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18261 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18262 Make sure it is power of 2. */
18263 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18267 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18269 /* If main algorithm works on QImode, no epilogue is needed.
18270 For small sizes just don't align anything. */
18271 if (size_needed == 1)
18272 desired_align = align;
18279 label = gen_label_rtx ();
18280 emit_cmp_and_jump_insns (count_exp,
18281 GEN_INT (epilogue_size_needed),
18282 LTU, 0, counter_mode (count_exp), 1, label);
18283 if (expected_size == -1 || expected_size < epilogue_size_needed)
18284 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18286 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18290 /* Emit code to decide on runtime whether library call or inline should be
18292 if (dynamic_check != -1)
18294 if (CONST_INT_P (count_exp))
18296 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18298 emit_block_move_via_libcall (dst, src, count_exp, false);
18299 count_exp = const0_rtx;
18305 rtx hot_label = gen_label_rtx ();
18306 jump_around_label = gen_label_rtx ();
18307 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18308 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18309 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18310 emit_block_move_via_libcall (dst, src, count_exp, false);
18311 emit_jump (jump_around_label);
18312 emit_label (hot_label);
18316 /* Step 2: Alignment prologue. */
18318 if (desired_align > align)
18320 if (align_bytes == 0)
18322 /* Except for the first move in epilogue, we no longer know
18323 constant offset in aliasing info. It don't seems to worth
18324 the pain to maintain it for the first move, so throw away
18326 src = change_address (src, BLKmode, srcreg);
18327 dst = change_address (dst, BLKmode, destreg);
18328 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18333 /* If we know how many bytes need to be stored before dst is
18334 sufficiently aligned, maintain aliasing info accurately. */
18335 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18336 desired_align, align_bytes);
18337 count_exp = plus_constant (count_exp, -align_bytes);
18338 count -= align_bytes;
18340 if (need_zero_guard
18341 && (count < (unsigned HOST_WIDE_INT) size_needed
18342 || (align_bytes == 0
18343 && count < ((unsigned HOST_WIDE_INT) size_needed
18344 + desired_align - align))))
18346 /* It is possible that we copied enough so the main loop will not
18348 gcc_assert (size_needed > 1);
18349 if (label == NULL_RTX)
18350 label = gen_label_rtx ();
18351 emit_cmp_and_jump_insns (count_exp,
18352 GEN_INT (size_needed),
18353 LTU, 0, counter_mode (count_exp), 1, label);
18354 if (expected_size == -1
18355 || expected_size < (desired_align - align) / 2 + size_needed)
18356 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18358 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18361 if (label && size_needed == 1)
18363 emit_label (label);
18364 LABEL_NUSES (label) = 1;
18366 epilogue_size_needed = 1;
18368 else if (label == NULL_RTX)
18369 epilogue_size_needed = size_needed;
18371 /* Step 3: Main loop. */
18377 gcc_unreachable ();
18379 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18380 count_exp, QImode, 1, expected_size);
18383 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18384 count_exp, Pmode, 1, expected_size);
18386 case unrolled_loop:
18387 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18388 registers for 4 temporaries anyway. */
18389 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18390 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18393 case rep_prefix_8_byte:
18394 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18397 case rep_prefix_4_byte:
18398 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18401 case rep_prefix_1_byte:
18402 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18406 /* Adjust properly the offset of src and dest memory for aliasing. */
18407 if (CONST_INT_P (count_exp))
18409 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18410 (count / size_needed) * size_needed);
18411 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18412 (count / size_needed) * size_needed);
18416 src = change_address (src, BLKmode, srcreg);
18417 dst = change_address (dst, BLKmode, destreg);
18420 /* Step 4: Epilogue to copy the remaining bytes. */
18424 /* When the main loop is done, COUNT_EXP might hold original count,
18425 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18426 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18427 bytes. Compensate if needed. */
18429 if (size_needed < epilogue_size_needed)
18432 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18433 GEN_INT (size_needed - 1), count_exp, 1,
18435 if (tmp != count_exp)
18436 emit_move_insn (count_exp, tmp);
18438 emit_label (label);
18439 LABEL_NUSES (label) = 1;
18442 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18443 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18444 epilogue_size_needed);
18445 if (jump_around_label)
18446 emit_label (jump_around_label);
18450 /* Helper function for memcpy. For QImode value 0xXY produce
18451 0xXYXYXYXY of wide specified by MODE. This is essentially
18452 a * 0x10101010, but we can do slightly better than
18453 synth_mult by unwinding the sequence by hand on CPUs with
18456 promote_duplicated_reg (enum machine_mode mode, rtx val)
18458 enum machine_mode valmode = GET_MODE (val);
18460 int nops = mode == DImode ? 3 : 2;
18462 gcc_assert (mode == SImode || mode == DImode);
18463 if (val == const0_rtx)
18464 return copy_to_mode_reg (mode, const0_rtx);
18465 if (CONST_INT_P (val))
18467 HOST_WIDE_INT v = INTVAL (val) & 255;
18471 if (mode == DImode)
18472 v |= (v << 16) << 16;
18473 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18476 if (valmode == VOIDmode)
18478 if (valmode != QImode)
18479 val = gen_lowpart (QImode, val);
18480 if (mode == QImode)
18482 if (!TARGET_PARTIAL_REG_STALL)
18484 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18485 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18486 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18487 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18489 rtx reg = convert_modes (mode, QImode, val, true);
18490 tmp = promote_duplicated_reg (mode, const1_rtx);
18491 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18496 rtx reg = convert_modes (mode, QImode, val, true);
18498 if (!TARGET_PARTIAL_REG_STALL)
18499 if (mode == SImode)
18500 emit_insn (gen_movsi_insv_1 (reg, reg));
18502 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18505 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18506 NULL, 1, OPTAB_DIRECT);
18508 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18510 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18511 NULL, 1, OPTAB_DIRECT);
18512 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18513 if (mode == SImode)
18515 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18516 NULL, 1, OPTAB_DIRECT);
18517 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18522 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18523 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18524 alignment from ALIGN to DESIRED_ALIGN. */
18526 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18531 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18532 promoted_val = promote_duplicated_reg (DImode, val);
18533 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18534 promoted_val = promote_duplicated_reg (SImode, val);
18535 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18536 promoted_val = promote_duplicated_reg (HImode, val);
18538 promoted_val = val;
18540 return promoted_val;
18543 /* Expand string clear operation (bzero). Use i386 string operations when
18544 profitable. See expand_movmem comment for explanation of individual
18545 steps performed. */
18547 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18548 rtx expected_align_exp, rtx expected_size_exp)
18553 rtx jump_around_label = NULL;
18554 HOST_WIDE_INT align = 1;
18555 unsigned HOST_WIDE_INT count = 0;
18556 HOST_WIDE_INT expected_size = -1;
18557 int size_needed = 0, epilogue_size_needed;
18558 int desired_align = 0, align_bytes = 0;
18559 enum stringop_alg alg;
18560 rtx promoted_val = NULL;
18561 bool force_loopy_epilogue = false;
18563 bool need_zero_guard = false;
18565 if (CONST_INT_P (align_exp))
18566 align = INTVAL (align_exp);
18567 /* i386 can do misaligned access on reasonably increased cost. */
18568 if (CONST_INT_P (expected_align_exp)
18569 && INTVAL (expected_align_exp) > align)
18570 align = INTVAL (expected_align_exp);
18571 if (CONST_INT_P (count_exp))
18572 count = expected_size = INTVAL (count_exp);
18573 if (CONST_INT_P (expected_size_exp) && count == 0)
18574 expected_size = INTVAL (expected_size_exp);
18576 /* Make sure we don't need to care about overflow later on. */
18577 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18580 /* Step 0: Decide on preferred algorithm, desired alignment and
18581 size of chunks to be copied by main loop. */
18583 alg = decide_alg (count, expected_size, true, &dynamic_check);
18584 desired_align = decide_alignment (align, alg, expected_size);
18586 if (!TARGET_ALIGN_STRINGOPS)
18587 align = desired_align;
18589 if (alg == libcall)
18591 gcc_assert (alg != no_stringop);
18593 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18594 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18599 gcc_unreachable ();
18601 need_zero_guard = true;
18602 size_needed = GET_MODE_SIZE (Pmode);
18604 case unrolled_loop:
18605 need_zero_guard = true;
18606 size_needed = GET_MODE_SIZE (Pmode) * 4;
18608 case rep_prefix_8_byte:
18611 case rep_prefix_4_byte:
18614 case rep_prefix_1_byte:
18618 need_zero_guard = true;
18622 epilogue_size_needed = size_needed;
18624 /* Step 1: Prologue guard. */
18626 /* Alignment code needs count to be in register. */
18627 if (CONST_INT_P (count_exp) && desired_align > align)
18629 if (INTVAL (count_exp) > desired_align
18630 && INTVAL (count_exp) > size_needed)
18633 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18634 if (align_bytes <= 0)
18637 align_bytes = desired_align - align_bytes;
18639 if (align_bytes == 0)
18641 enum machine_mode mode = SImode;
18642 if (TARGET_64BIT && (count & ~0xffffffff))
18644 count_exp = force_reg (mode, count_exp);
18647 /* Do the cheap promotion to allow better CSE across the
18648 main loop and epilogue (ie one load of the big constant in the
18649 front of all code. */
18650 if (CONST_INT_P (val_exp))
18651 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18652 desired_align, align);
18653 /* Ensure that alignment prologue won't copy past end of block. */
18654 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18656 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18657 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18658 Make sure it is power of 2. */
18659 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18661 /* To improve performance of small blocks, we jump around the VAL
18662 promoting mode. This mean that if the promoted VAL is not constant,
18663 we might not use it in the epilogue and have to use byte
18665 if (epilogue_size_needed > 2 && !promoted_val)
18666 force_loopy_epilogue = true;
18669 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18671 /* If main algorithm works on QImode, no epilogue is needed.
18672 For small sizes just don't align anything. */
18673 if (size_needed == 1)
18674 desired_align = align;
18681 label = gen_label_rtx ();
18682 emit_cmp_and_jump_insns (count_exp,
18683 GEN_INT (epilogue_size_needed),
18684 LTU, 0, counter_mode (count_exp), 1, label);
18685 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18686 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18688 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18691 if (dynamic_check != -1)
18693 rtx hot_label = gen_label_rtx ();
18694 jump_around_label = gen_label_rtx ();
18695 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18696 LEU, 0, counter_mode (count_exp), 1, hot_label);
18697 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18698 set_storage_via_libcall (dst, count_exp, val_exp, false);
18699 emit_jump (jump_around_label);
18700 emit_label (hot_label);
18703 /* Step 2: Alignment prologue. */
18705 /* Do the expensive promotion once we branched off the small blocks. */
18707 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18708 desired_align, align);
18709 gcc_assert (desired_align >= 1 && align >= 1);
18711 if (desired_align > align)
18713 if (align_bytes == 0)
18715 /* Except for the first move in epilogue, we no longer know
18716 constant offset in aliasing info. It don't seems to worth
18717 the pain to maintain it for the first move, so throw away
18719 dst = change_address (dst, BLKmode, destreg);
18720 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18725 /* If we know how many bytes need to be stored before dst is
18726 sufficiently aligned, maintain aliasing info accurately. */
18727 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18728 desired_align, align_bytes);
18729 count_exp = plus_constant (count_exp, -align_bytes);
18730 count -= align_bytes;
18732 if (need_zero_guard
18733 && (count < (unsigned HOST_WIDE_INT) size_needed
18734 || (align_bytes == 0
18735 && count < ((unsigned HOST_WIDE_INT) size_needed
18736 + desired_align - align))))
18738 /* It is possible that we copied enough so the main loop will not
18740 gcc_assert (size_needed > 1);
18741 if (label == NULL_RTX)
18742 label = gen_label_rtx ();
18743 emit_cmp_and_jump_insns (count_exp,
18744 GEN_INT (size_needed),
18745 LTU, 0, counter_mode (count_exp), 1, label);
18746 if (expected_size == -1
18747 || expected_size < (desired_align - align) / 2 + size_needed)
18748 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18750 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18753 if (label && size_needed == 1)
18755 emit_label (label);
18756 LABEL_NUSES (label) = 1;
18758 promoted_val = val_exp;
18759 epilogue_size_needed = 1;
18761 else if (label == NULL_RTX)
18762 epilogue_size_needed = size_needed;
18764 /* Step 3: Main loop. */
18770 gcc_unreachable ();
18772 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18773 count_exp, QImode, 1, expected_size);
18776 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18777 count_exp, Pmode, 1, expected_size);
18779 case unrolled_loop:
18780 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18781 count_exp, Pmode, 4, expected_size);
18783 case rep_prefix_8_byte:
18784 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18787 case rep_prefix_4_byte:
18788 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18791 case rep_prefix_1_byte:
18792 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18796 /* Adjust properly the offset of src and dest memory for aliasing. */
18797 if (CONST_INT_P (count_exp))
18798 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18799 (count / size_needed) * size_needed);
18801 dst = change_address (dst, BLKmode, destreg);
18803 /* Step 4: Epilogue to copy the remaining bytes. */
18807 /* When the main loop is done, COUNT_EXP might hold original count,
18808 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18809 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18810 bytes. Compensate if needed. */
18812 if (size_needed < epilogue_size_needed)
18815 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18816 GEN_INT (size_needed - 1), count_exp, 1,
18818 if (tmp != count_exp)
18819 emit_move_insn (count_exp, tmp);
18821 emit_label (label);
18822 LABEL_NUSES (label) = 1;
18825 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18827 if (force_loopy_epilogue)
18828 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18829 epilogue_size_needed);
18831 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18832 epilogue_size_needed);
18834 if (jump_around_label)
18835 emit_label (jump_around_label);
18839 /* Expand the appropriate insns for doing strlen if not just doing
18842 out = result, initialized with the start address
18843 align_rtx = alignment of the address.
18844 scratch = scratch register, initialized with the startaddress when
18845 not aligned, otherwise undefined
18847 This is just the body. It needs the initializations mentioned above and
18848 some address computing at the end. These things are done in i386.md. */
18851 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18855 rtx align_2_label = NULL_RTX;
18856 rtx align_3_label = NULL_RTX;
18857 rtx align_4_label = gen_label_rtx ();
18858 rtx end_0_label = gen_label_rtx ();
18860 rtx tmpreg = gen_reg_rtx (SImode);
18861 rtx scratch = gen_reg_rtx (SImode);
18865 if (CONST_INT_P (align_rtx))
18866 align = INTVAL (align_rtx);
18868 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18870 /* Is there a known alignment and is it less than 4? */
18873 rtx scratch1 = gen_reg_rtx (Pmode);
18874 emit_move_insn (scratch1, out);
18875 /* Is there a known alignment and is it not 2? */
18878 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18879 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18881 /* Leave just the 3 lower bits. */
18882 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18883 NULL_RTX, 0, OPTAB_WIDEN);
18885 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18886 Pmode, 1, align_4_label);
18887 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18888 Pmode, 1, align_2_label);
18889 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18890 Pmode, 1, align_3_label);
18894 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18895 check if is aligned to 4 - byte. */
18897 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18898 NULL_RTX, 0, OPTAB_WIDEN);
18900 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18901 Pmode, 1, align_4_label);
18904 mem = change_address (src, QImode, out);
18906 /* Now compare the bytes. */
18908 /* Compare the first n unaligned byte on a byte per byte basis. */
18909 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18910 QImode, 1, end_0_label);
18912 /* Increment the address. */
18913 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18915 /* Not needed with an alignment of 2 */
18918 emit_label (align_2_label);
18920 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18923 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18925 emit_label (align_3_label);
18928 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18931 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18934 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18935 align this loop. It gives only huge programs, but does not help to
18937 emit_label (align_4_label);
18939 mem = change_address (src, SImode, out);
18940 emit_move_insn (scratch, mem);
18941 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18943 /* This formula yields a nonzero result iff one of the bytes is zero.
18944 This saves three branches inside loop and many cycles. */
18946 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18947 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18948 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18949 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18950 gen_int_mode (0x80808080, SImode)));
18951 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18956 rtx reg = gen_reg_rtx (SImode);
18957 rtx reg2 = gen_reg_rtx (Pmode);
18958 emit_move_insn (reg, tmpreg);
18959 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18961 /* If zero is not in the first two bytes, move two bytes forward. */
18962 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18963 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18964 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18965 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18966 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18969 /* Emit lea manually to avoid clobbering of flags. */
18970 emit_insn (gen_rtx_SET (SImode, reg2,
18971 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18973 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18974 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18975 emit_insn (gen_rtx_SET (VOIDmode, out,
18976 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18983 rtx end_2_label = gen_label_rtx ();
18984 /* Is zero in the first two bytes? */
18986 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18987 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18988 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18989 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18990 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18992 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18993 JUMP_LABEL (tmp) = end_2_label;
18995 /* Not in the first two. Move two bytes forward. */
18996 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18997 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18999 emit_label (end_2_label);
19003 /* Avoid branch in fixing the byte. */
19004 tmpreg = gen_lowpart (QImode, tmpreg);
19005 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19006 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19007 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19009 emit_label (end_0_label);
19012 /* Expand strlen. */
19015 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19017 rtx addr, scratch1, scratch2, scratch3, scratch4;
19019 /* The generic case of strlen expander is long. Avoid it's
19020 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19022 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19023 && !TARGET_INLINE_ALL_STRINGOPS
19024 && !optimize_insn_for_size_p ()
19025 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19028 addr = force_reg (Pmode, XEXP (src, 0));
19029 scratch1 = gen_reg_rtx (Pmode);
19031 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19032 && !optimize_insn_for_size_p ())
19034 /* Well it seems that some optimizer does not combine a call like
19035 foo(strlen(bar), strlen(bar));
19036 when the move and the subtraction is done here. It does calculate
19037 the length just once when these instructions are done inside of
19038 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19039 often used and I use one fewer register for the lifetime of
19040 output_strlen_unroll() this is better. */
19042 emit_move_insn (out, addr);
19044 ix86_expand_strlensi_unroll_1 (out, src, align);
19046 /* strlensi_unroll_1 returns the address of the zero at the end of
19047 the string, like memchr(), so compute the length by subtracting
19048 the start address. */
19049 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19055 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19056 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19059 scratch2 = gen_reg_rtx (Pmode);
19060 scratch3 = gen_reg_rtx (Pmode);
19061 scratch4 = force_reg (Pmode, constm1_rtx);
19063 emit_move_insn (scratch3, addr);
19064 eoschar = force_reg (QImode, eoschar);
19066 src = replace_equiv_address_nv (src, scratch3);
19068 /* If .md starts supporting :P, this can be done in .md. */
19069 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19070 scratch4), UNSPEC_SCAS);
19071 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19072 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19073 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19078 /* For given symbol (function) construct code to compute address of it's PLT
19079 entry in large x86-64 PIC model. */
19081 construct_plt_address (rtx symbol)
19083 rtx tmp = gen_reg_rtx (Pmode);
19084 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19086 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19087 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19089 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19090 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19095 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19097 rtx pop, int sibcall)
19099 rtx use = NULL, call;
19101 if (pop == const0_rtx)
19103 gcc_assert (!TARGET_64BIT || !pop);
19105 if (TARGET_MACHO && !TARGET_64BIT)
19108 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19109 fnaddr = machopic_indirect_call_target (fnaddr);
19114 /* Static functions and indirect calls don't need the pic register. */
19115 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19116 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19117 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19118 use_reg (&use, pic_offset_table_rtx);
19121 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19123 rtx al = gen_rtx_REG (QImode, AX_REG);
19124 emit_move_insn (al, callarg2);
19125 use_reg (&use, al);
19128 if (ix86_cmodel == CM_LARGE_PIC
19129 && GET_CODE (fnaddr) == MEM
19130 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19131 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19132 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19133 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19135 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19136 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19138 if (sibcall && TARGET_64BIT
19139 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19142 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19143 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19144 emit_move_insn (fnaddr, addr);
19145 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19148 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19150 call = gen_rtx_SET (VOIDmode, retval, call);
19153 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19154 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19155 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19158 && ix86_cfun_abi () == MS_ABI
19159 && (!callarg2 || INTVAL (callarg2) != -2))
19161 /* We need to represent that SI and DI registers are clobbered
19163 static int clobbered_registers[] = {
19164 XMM6_REG, XMM7_REG, XMM8_REG,
19165 XMM9_REG, XMM10_REG, XMM11_REG,
19166 XMM12_REG, XMM13_REG, XMM14_REG,
19167 XMM15_REG, SI_REG, DI_REG
19170 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19171 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19172 UNSPEC_MS_TO_SYSV_CALL);
19176 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19177 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19180 (SSE_REGNO_P (clobbered_registers[i])
19182 clobbered_registers[i]));
19184 call = gen_rtx_PARALLEL (VOIDmode,
19185 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19189 call = emit_call_insn (call);
19191 CALL_INSN_FUNCTION_USAGE (call) = use;
19195 /* Clear stack slot assignments remembered from previous functions.
19196 This is called from INIT_EXPANDERS once before RTL is emitted for each
19199 static struct machine_function *
19200 ix86_init_machine_status (void)
19202 struct machine_function *f;
19204 f = GGC_CNEW (struct machine_function);
19205 f->use_fast_prologue_epilogue_nregs = -1;
19206 f->tls_descriptor_call_expanded_p = 0;
19207 f->call_abi = ix86_abi;
19212 /* Return a MEM corresponding to a stack slot with mode MODE.
19213 Allocate a new slot if necessary.
19215 The RTL for a function can have several slots available: N is
19216 which slot to use. */
19219 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19221 struct stack_local_entry *s;
19223 gcc_assert (n < MAX_386_STACK_LOCALS);
19225 /* Virtual slot is valid only before vregs are instantiated. */
19226 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19228 for (s = ix86_stack_locals; s; s = s->next)
19229 if (s->mode == mode && s->n == n)
19230 return copy_rtx (s->rtl);
19232 s = (struct stack_local_entry *)
19233 ggc_alloc (sizeof (struct stack_local_entry));
19236 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19238 s->next = ix86_stack_locals;
19239 ix86_stack_locals = s;
19243 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19245 static GTY(()) rtx ix86_tls_symbol;
19247 ix86_tls_get_addr (void)
19250 if (!ix86_tls_symbol)
19252 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19253 (TARGET_ANY_GNU_TLS
19255 ? "___tls_get_addr"
19256 : "__tls_get_addr");
19259 return ix86_tls_symbol;
19262 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19264 static GTY(()) rtx ix86_tls_module_base_symbol;
19266 ix86_tls_module_base (void)
19269 if (!ix86_tls_module_base_symbol)
19271 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19272 "_TLS_MODULE_BASE_");
19273 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19274 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19277 return ix86_tls_module_base_symbol;
19280 /* Calculate the length of the memory address in the instruction
19281 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19284 memory_address_length (rtx addr)
19286 struct ix86_address parts;
19287 rtx base, index, disp;
19291 if (GET_CODE (addr) == PRE_DEC
19292 || GET_CODE (addr) == POST_INC
19293 || GET_CODE (addr) == PRE_MODIFY
19294 || GET_CODE (addr) == POST_MODIFY)
19297 ok = ix86_decompose_address (addr, &parts);
19300 if (parts.base && GET_CODE (parts.base) == SUBREG)
19301 parts.base = SUBREG_REG (parts.base);
19302 if (parts.index && GET_CODE (parts.index) == SUBREG)
19303 parts.index = SUBREG_REG (parts.index);
19306 index = parts.index;
19311 - esp as the base always wants an index,
19312 - ebp as the base always wants a displacement. */
19314 /* Register Indirect. */
19315 if (base && !index && !disp)
19317 /* esp (for its index) and ebp (for its displacement) need
19318 the two-byte modrm form. */
19319 if (addr == stack_pointer_rtx
19320 || addr == arg_pointer_rtx
19321 || addr == frame_pointer_rtx
19322 || addr == hard_frame_pointer_rtx)
19326 /* Direct Addressing. */
19327 else if (disp && !base && !index)
19332 /* Find the length of the displacement constant. */
19335 if (base && satisfies_constraint_K (disp))
19340 /* ebp always wants a displacement. */
19341 else if (base == hard_frame_pointer_rtx)
19344 /* An index requires the two-byte modrm form.... */
19346 /* ...like esp, which always wants an index. */
19347 || base == stack_pointer_rtx
19348 || base == arg_pointer_rtx
19349 || base == frame_pointer_rtx)
19356 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19357 is set, expect that insn have 8bit immediate alternative. */
19359 ix86_attr_length_immediate_default (rtx insn, int shortform)
19363 extract_insn_cached (insn);
19364 for (i = recog_data.n_operands - 1; i >= 0; --i)
19365 if (CONSTANT_P (recog_data.operand[i]))
19368 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19372 switch (get_attr_mode (insn))
19383 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19388 fatal_insn ("unknown insn mode", insn);
19394 /* Compute default value for "length_address" attribute. */
19396 ix86_attr_length_address_default (rtx insn)
19400 if (get_attr_type (insn) == TYPE_LEA)
19402 rtx set = PATTERN (insn);
19404 if (GET_CODE (set) == PARALLEL)
19405 set = XVECEXP (set, 0, 0);
19407 gcc_assert (GET_CODE (set) == SET);
19409 return memory_address_length (SET_SRC (set));
19412 extract_insn_cached (insn);
19413 for (i = recog_data.n_operands - 1; i >= 0; --i)
19414 if (MEM_P (recog_data.operand[i]))
19416 return memory_address_length (XEXP (recog_data.operand[i], 0));
19422 /* Compute default value for "length_vex" attribute. It includes
19423 2 or 3 byte VEX prefix and 1 opcode byte. */
19426 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19431 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19432 byte VEX prefix. */
19433 if (!has_0f_opcode || has_vex_w)
19436 /* We can always use 2 byte VEX prefix in 32bit. */
19440 extract_insn_cached (insn);
19442 for (i = recog_data.n_operands - 1; i >= 0; --i)
19443 if (REG_P (recog_data.operand[i]))
19445 /* REX.W bit uses 3 byte VEX prefix. */
19446 if (GET_MODE (recog_data.operand[i]) == DImode)
19451 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19452 if (MEM_P (recog_data.operand[i])
19453 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19460 /* Return the maximum number of instructions a cpu can issue. */
19463 ix86_issue_rate (void)
19467 case PROCESSOR_PENTIUM:
19468 case PROCESSOR_ATOM:
19472 case PROCESSOR_PENTIUMPRO:
19473 case PROCESSOR_PENTIUM4:
19474 case PROCESSOR_ATHLON:
19476 case PROCESSOR_AMDFAM10:
19477 case PROCESSOR_NOCONA:
19478 case PROCESSOR_GENERIC32:
19479 case PROCESSOR_GENERIC64:
19482 case PROCESSOR_CORE2:
19490 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19491 by DEP_INSN and nothing set by DEP_INSN. */
19494 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19498 /* Simplify the test for uninteresting insns. */
19499 if (insn_type != TYPE_SETCC
19500 && insn_type != TYPE_ICMOV
19501 && insn_type != TYPE_FCMOV
19502 && insn_type != TYPE_IBR)
19505 if ((set = single_set (dep_insn)) != 0)
19507 set = SET_DEST (set);
19510 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19511 && XVECLEN (PATTERN (dep_insn), 0) == 2
19512 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19513 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19515 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19516 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19521 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19524 /* This test is true if the dependent insn reads the flags but
19525 not any other potentially set register. */
19526 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19529 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19535 /* Return true iff USE_INSN has a memory address with operands set by
19539 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19542 extract_insn_cached (use_insn);
19543 for (i = recog_data.n_operands - 1; i >= 0; --i)
19544 if (MEM_P (recog_data.operand[i]))
19546 rtx addr = XEXP (recog_data.operand[i], 0);
19547 return modified_in_p (addr, set_insn) != 0;
19553 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19555 enum attr_type insn_type, dep_insn_type;
19556 enum attr_memory memory;
19558 int dep_insn_code_number;
19560 /* Anti and output dependencies have zero cost on all CPUs. */
19561 if (REG_NOTE_KIND (link) != 0)
19564 dep_insn_code_number = recog_memoized (dep_insn);
19566 /* If we can't recognize the insns, we can't really do anything. */
19567 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19570 insn_type = get_attr_type (insn);
19571 dep_insn_type = get_attr_type (dep_insn);
19575 case PROCESSOR_PENTIUM:
19576 /* Address Generation Interlock adds a cycle of latency. */
19577 if (insn_type == TYPE_LEA)
19579 rtx addr = PATTERN (insn);
19581 if (GET_CODE (addr) == PARALLEL)
19582 addr = XVECEXP (addr, 0, 0);
19584 gcc_assert (GET_CODE (addr) == SET);
19586 addr = SET_SRC (addr);
19587 if (modified_in_p (addr, dep_insn))
19590 else if (ix86_agi_dependent (dep_insn, insn))
19593 /* ??? Compares pair with jump/setcc. */
19594 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19597 /* Floating point stores require value to be ready one cycle earlier. */
19598 if (insn_type == TYPE_FMOV
19599 && get_attr_memory (insn) == MEMORY_STORE
19600 && !ix86_agi_dependent (dep_insn, insn))
19604 case PROCESSOR_PENTIUMPRO:
19605 memory = get_attr_memory (insn);
19607 /* INT->FP conversion is expensive. */
19608 if (get_attr_fp_int_src (dep_insn))
19611 /* There is one cycle extra latency between an FP op and a store. */
19612 if (insn_type == TYPE_FMOV
19613 && (set = single_set (dep_insn)) != NULL_RTX
19614 && (set2 = single_set (insn)) != NULL_RTX
19615 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19616 && MEM_P (SET_DEST (set2)))
19619 /* Show ability of reorder buffer to hide latency of load by executing
19620 in parallel with previous instruction in case
19621 previous instruction is not needed to compute the address. */
19622 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19623 && !ix86_agi_dependent (dep_insn, insn))
19625 /* Claim moves to take one cycle, as core can issue one load
19626 at time and the next load can start cycle later. */
19627 if (dep_insn_type == TYPE_IMOV
19628 || dep_insn_type == TYPE_FMOV)
19636 memory = get_attr_memory (insn);
19638 /* The esp dependency is resolved before the instruction is really
19640 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19641 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19644 /* INT->FP conversion is expensive. */
19645 if (get_attr_fp_int_src (dep_insn))
19648 /* Show ability of reorder buffer to hide latency of load by executing
19649 in parallel with previous instruction in case
19650 previous instruction is not needed to compute the address. */
19651 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19652 && !ix86_agi_dependent (dep_insn, insn))
19654 /* Claim moves to take one cycle, as core can issue one load
19655 at time and the next load can start cycle later. */
19656 if (dep_insn_type == TYPE_IMOV
19657 || dep_insn_type == TYPE_FMOV)
19666 case PROCESSOR_ATHLON:
19668 case PROCESSOR_AMDFAM10:
19669 case PROCESSOR_ATOM:
19670 case PROCESSOR_GENERIC32:
19671 case PROCESSOR_GENERIC64:
19672 memory = get_attr_memory (insn);
19674 /* Show ability of reorder buffer to hide latency of load by executing
19675 in parallel with previous instruction in case
19676 previous instruction is not needed to compute the address. */
19677 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19678 && !ix86_agi_dependent (dep_insn, insn))
19680 enum attr_unit unit = get_attr_unit (insn);
19683 /* Because of the difference between the length of integer and
19684 floating unit pipeline preparation stages, the memory operands
19685 for floating point are cheaper.
19687 ??? For Athlon it the difference is most probably 2. */
19688 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19691 loadcost = TARGET_ATHLON ? 2 : 0;
19693 if (cost >= loadcost)
19706 /* How many alternative schedules to try. This should be as wide as the
19707 scheduling freedom in the DFA, but no wider. Making this value too
19708 large results extra work for the scheduler. */
19711 ia32_multipass_dfa_lookahead (void)
19715 case PROCESSOR_PENTIUM:
19718 case PROCESSOR_PENTIUMPRO:
19728 /* Compute the alignment given to a constant that is being placed in memory.
19729 EXP is the constant and ALIGN is the alignment that the object would
19731 The value of this function is used instead of that alignment to align
19735 ix86_constant_alignment (tree exp, int align)
19737 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19738 || TREE_CODE (exp) == INTEGER_CST)
19740 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19742 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19745 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19746 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19747 return BITS_PER_WORD;
19752 /* Compute the alignment for a static variable.
19753 TYPE is the data type, and ALIGN is the alignment that
19754 the object would ordinarily have. The value of this function is used
19755 instead of that alignment to align the object. */
19758 ix86_data_alignment (tree type, int align)
19760 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19762 if (AGGREGATE_TYPE_P (type)
19763 && TYPE_SIZE (type)
19764 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19765 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19766 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19767 && align < max_align)
19770 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19771 to 16byte boundary. */
19774 if (AGGREGATE_TYPE_P (type)
19775 && TYPE_SIZE (type)
19776 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19777 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19778 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19782 if (TREE_CODE (type) == ARRAY_TYPE)
19784 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19786 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19789 else if (TREE_CODE (type) == COMPLEX_TYPE)
19792 if (TYPE_MODE (type) == DCmode && align < 64)
19794 if ((TYPE_MODE (type) == XCmode
19795 || TYPE_MODE (type) == TCmode) && align < 128)
19798 else if ((TREE_CODE (type) == RECORD_TYPE
19799 || TREE_CODE (type) == UNION_TYPE
19800 || TREE_CODE (type) == QUAL_UNION_TYPE)
19801 && TYPE_FIELDS (type))
19803 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19805 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19808 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19809 || TREE_CODE (type) == INTEGER_TYPE)
19811 if (TYPE_MODE (type) == DFmode && align < 64)
19813 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19820 /* Compute the alignment for a local variable or a stack slot. EXP is
19821 the data type or decl itself, MODE is the widest mode available and
19822 ALIGN is the alignment that the object would ordinarily have. The
19823 value of this macro is used instead of that alignment to align the
19827 ix86_local_alignment (tree exp, enum machine_mode mode,
19828 unsigned int align)
19832 if (exp && DECL_P (exp))
19834 type = TREE_TYPE (exp);
19843 /* Don't do dynamic stack realignment for long long objects with
19844 -mpreferred-stack-boundary=2. */
19847 && ix86_preferred_stack_boundary < 64
19848 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19849 && (!type || !TYPE_USER_ALIGN (type))
19850 && (!decl || !DECL_USER_ALIGN (decl)))
19853 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19854 register in MODE. We will return the largest alignment of XF
19858 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19859 align = GET_MODE_ALIGNMENT (DFmode);
19863 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19864 to 16byte boundary. */
19867 if (AGGREGATE_TYPE_P (type)
19868 && TYPE_SIZE (type)
19869 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19870 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19871 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19874 if (TREE_CODE (type) == ARRAY_TYPE)
19876 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19878 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19881 else if (TREE_CODE (type) == COMPLEX_TYPE)
19883 if (TYPE_MODE (type) == DCmode && align < 64)
19885 if ((TYPE_MODE (type) == XCmode
19886 || TYPE_MODE (type) == TCmode) && align < 128)
19889 else if ((TREE_CODE (type) == RECORD_TYPE
19890 || TREE_CODE (type) == UNION_TYPE
19891 || TREE_CODE (type) == QUAL_UNION_TYPE)
19892 && TYPE_FIELDS (type))
19894 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19896 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19899 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19900 || TREE_CODE (type) == INTEGER_TYPE)
19903 if (TYPE_MODE (type) == DFmode && align < 64)
19905 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19911 /* Emit RTL insns to initialize the variable parts of a trampoline.
19912 FNADDR is an RTX for the address of the function's pure code.
19913 CXT is an RTX for the static chain value for the function. */
19915 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19919 /* Compute offset from the end of the jmp to the target function. */
19920 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19921 plus_constant (tramp, 10),
19922 NULL_RTX, 1, OPTAB_DIRECT);
19923 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19924 gen_int_mode (0xb9, QImode));
19925 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19926 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19927 gen_int_mode (0xe9, QImode));
19928 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19933 /* Try to load address using shorter movl instead of movabs.
19934 We may want to support movq for kernel mode, but kernel does not use
19935 trampolines at the moment. */
19936 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19938 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19939 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19940 gen_int_mode (0xbb41, HImode));
19941 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19942 gen_lowpart (SImode, fnaddr));
19947 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19948 gen_int_mode (0xbb49, HImode));
19949 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19953 /* Load static chain using movabs to r10. */
19954 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19955 gen_int_mode (0xba49, HImode));
19956 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19959 /* Jump to the r11 */
19960 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19961 gen_int_mode (0xff49, HImode));
19962 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19963 gen_int_mode (0xe3, QImode));
19965 gcc_assert (offset <= TRAMPOLINE_SIZE);
19968 #ifdef ENABLE_EXECUTE_STACK
19969 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19970 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19974 /* Codes for all the SSE/MMX builtins. */
19977 IX86_BUILTIN_ADDPS,
19978 IX86_BUILTIN_ADDSS,
19979 IX86_BUILTIN_DIVPS,
19980 IX86_BUILTIN_DIVSS,
19981 IX86_BUILTIN_MULPS,
19982 IX86_BUILTIN_MULSS,
19983 IX86_BUILTIN_SUBPS,
19984 IX86_BUILTIN_SUBSS,
19986 IX86_BUILTIN_CMPEQPS,
19987 IX86_BUILTIN_CMPLTPS,
19988 IX86_BUILTIN_CMPLEPS,
19989 IX86_BUILTIN_CMPGTPS,
19990 IX86_BUILTIN_CMPGEPS,
19991 IX86_BUILTIN_CMPNEQPS,
19992 IX86_BUILTIN_CMPNLTPS,
19993 IX86_BUILTIN_CMPNLEPS,
19994 IX86_BUILTIN_CMPNGTPS,
19995 IX86_BUILTIN_CMPNGEPS,
19996 IX86_BUILTIN_CMPORDPS,
19997 IX86_BUILTIN_CMPUNORDPS,
19998 IX86_BUILTIN_CMPEQSS,
19999 IX86_BUILTIN_CMPLTSS,
20000 IX86_BUILTIN_CMPLESS,
20001 IX86_BUILTIN_CMPNEQSS,
20002 IX86_BUILTIN_CMPNLTSS,
20003 IX86_BUILTIN_CMPNLESS,
20004 IX86_BUILTIN_CMPNGTSS,
20005 IX86_BUILTIN_CMPNGESS,
20006 IX86_BUILTIN_CMPORDSS,
20007 IX86_BUILTIN_CMPUNORDSS,
20009 IX86_BUILTIN_COMIEQSS,
20010 IX86_BUILTIN_COMILTSS,
20011 IX86_BUILTIN_COMILESS,
20012 IX86_BUILTIN_COMIGTSS,
20013 IX86_BUILTIN_COMIGESS,
20014 IX86_BUILTIN_COMINEQSS,
20015 IX86_BUILTIN_UCOMIEQSS,
20016 IX86_BUILTIN_UCOMILTSS,
20017 IX86_BUILTIN_UCOMILESS,
20018 IX86_BUILTIN_UCOMIGTSS,
20019 IX86_BUILTIN_UCOMIGESS,
20020 IX86_BUILTIN_UCOMINEQSS,
20022 IX86_BUILTIN_CVTPI2PS,
20023 IX86_BUILTIN_CVTPS2PI,
20024 IX86_BUILTIN_CVTSI2SS,
20025 IX86_BUILTIN_CVTSI642SS,
20026 IX86_BUILTIN_CVTSS2SI,
20027 IX86_BUILTIN_CVTSS2SI64,
20028 IX86_BUILTIN_CVTTPS2PI,
20029 IX86_BUILTIN_CVTTSS2SI,
20030 IX86_BUILTIN_CVTTSS2SI64,
20032 IX86_BUILTIN_MAXPS,
20033 IX86_BUILTIN_MAXSS,
20034 IX86_BUILTIN_MINPS,
20035 IX86_BUILTIN_MINSS,
20037 IX86_BUILTIN_LOADUPS,
20038 IX86_BUILTIN_STOREUPS,
20039 IX86_BUILTIN_MOVSS,
20041 IX86_BUILTIN_MOVHLPS,
20042 IX86_BUILTIN_MOVLHPS,
20043 IX86_BUILTIN_LOADHPS,
20044 IX86_BUILTIN_LOADLPS,
20045 IX86_BUILTIN_STOREHPS,
20046 IX86_BUILTIN_STORELPS,
20048 IX86_BUILTIN_MASKMOVQ,
20049 IX86_BUILTIN_MOVMSKPS,
20050 IX86_BUILTIN_PMOVMSKB,
20052 IX86_BUILTIN_MOVNTPS,
20053 IX86_BUILTIN_MOVNTQ,
20055 IX86_BUILTIN_LOADDQU,
20056 IX86_BUILTIN_STOREDQU,
20058 IX86_BUILTIN_PACKSSWB,
20059 IX86_BUILTIN_PACKSSDW,
20060 IX86_BUILTIN_PACKUSWB,
20062 IX86_BUILTIN_PADDB,
20063 IX86_BUILTIN_PADDW,
20064 IX86_BUILTIN_PADDD,
20065 IX86_BUILTIN_PADDQ,
20066 IX86_BUILTIN_PADDSB,
20067 IX86_BUILTIN_PADDSW,
20068 IX86_BUILTIN_PADDUSB,
20069 IX86_BUILTIN_PADDUSW,
20070 IX86_BUILTIN_PSUBB,
20071 IX86_BUILTIN_PSUBW,
20072 IX86_BUILTIN_PSUBD,
20073 IX86_BUILTIN_PSUBQ,
20074 IX86_BUILTIN_PSUBSB,
20075 IX86_BUILTIN_PSUBSW,
20076 IX86_BUILTIN_PSUBUSB,
20077 IX86_BUILTIN_PSUBUSW,
20080 IX86_BUILTIN_PANDN,
20084 IX86_BUILTIN_PAVGB,
20085 IX86_BUILTIN_PAVGW,
20087 IX86_BUILTIN_PCMPEQB,
20088 IX86_BUILTIN_PCMPEQW,
20089 IX86_BUILTIN_PCMPEQD,
20090 IX86_BUILTIN_PCMPGTB,
20091 IX86_BUILTIN_PCMPGTW,
20092 IX86_BUILTIN_PCMPGTD,
20094 IX86_BUILTIN_PMADDWD,
20096 IX86_BUILTIN_PMAXSW,
20097 IX86_BUILTIN_PMAXUB,
20098 IX86_BUILTIN_PMINSW,
20099 IX86_BUILTIN_PMINUB,
20101 IX86_BUILTIN_PMULHUW,
20102 IX86_BUILTIN_PMULHW,
20103 IX86_BUILTIN_PMULLW,
20105 IX86_BUILTIN_PSADBW,
20106 IX86_BUILTIN_PSHUFW,
20108 IX86_BUILTIN_PSLLW,
20109 IX86_BUILTIN_PSLLD,
20110 IX86_BUILTIN_PSLLQ,
20111 IX86_BUILTIN_PSRAW,
20112 IX86_BUILTIN_PSRAD,
20113 IX86_BUILTIN_PSRLW,
20114 IX86_BUILTIN_PSRLD,
20115 IX86_BUILTIN_PSRLQ,
20116 IX86_BUILTIN_PSLLWI,
20117 IX86_BUILTIN_PSLLDI,
20118 IX86_BUILTIN_PSLLQI,
20119 IX86_BUILTIN_PSRAWI,
20120 IX86_BUILTIN_PSRADI,
20121 IX86_BUILTIN_PSRLWI,
20122 IX86_BUILTIN_PSRLDI,
20123 IX86_BUILTIN_PSRLQI,
20125 IX86_BUILTIN_PUNPCKHBW,
20126 IX86_BUILTIN_PUNPCKHWD,
20127 IX86_BUILTIN_PUNPCKHDQ,
20128 IX86_BUILTIN_PUNPCKLBW,
20129 IX86_BUILTIN_PUNPCKLWD,
20130 IX86_BUILTIN_PUNPCKLDQ,
20132 IX86_BUILTIN_SHUFPS,
20134 IX86_BUILTIN_RCPPS,
20135 IX86_BUILTIN_RCPSS,
20136 IX86_BUILTIN_RSQRTPS,
20137 IX86_BUILTIN_RSQRTPS_NR,
20138 IX86_BUILTIN_RSQRTSS,
20139 IX86_BUILTIN_RSQRTF,
20140 IX86_BUILTIN_SQRTPS,
20141 IX86_BUILTIN_SQRTPS_NR,
20142 IX86_BUILTIN_SQRTSS,
20144 IX86_BUILTIN_UNPCKHPS,
20145 IX86_BUILTIN_UNPCKLPS,
20147 IX86_BUILTIN_ANDPS,
20148 IX86_BUILTIN_ANDNPS,
20150 IX86_BUILTIN_XORPS,
20153 IX86_BUILTIN_LDMXCSR,
20154 IX86_BUILTIN_STMXCSR,
20155 IX86_BUILTIN_SFENCE,
20157 /* 3DNow! Original */
20158 IX86_BUILTIN_FEMMS,
20159 IX86_BUILTIN_PAVGUSB,
20160 IX86_BUILTIN_PF2ID,
20161 IX86_BUILTIN_PFACC,
20162 IX86_BUILTIN_PFADD,
20163 IX86_BUILTIN_PFCMPEQ,
20164 IX86_BUILTIN_PFCMPGE,
20165 IX86_BUILTIN_PFCMPGT,
20166 IX86_BUILTIN_PFMAX,
20167 IX86_BUILTIN_PFMIN,
20168 IX86_BUILTIN_PFMUL,
20169 IX86_BUILTIN_PFRCP,
20170 IX86_BUILTIN_PFRCPIT1,
20171 IX86_BUILTIN_PFRCPIT2,
20172 IX86_BUILTIN_PFRSQIT1,
20173 IX86_BUILTIN_PFRSQRT,
20174 IX86_BUILTIN_PFSUB,
20175 IX86_BUILTIN_PFSUBR,
20176 IX86_BUILTIN_PI2FD,
20177 IX86_BUILTIN_PMULHRW,
20179 /* 3DNow! Athlon Extensions */
20180 IX86_BUILTIN_PF2IW,
20181 IX86_BUILTIN_PFNACC,
20182 IX86_BUILTIN_PFPNACC,
20183 IX86_BUILTIN_PI2FW,
20184 IX86_BUILTIN_PSWAPDSI,
20185 IX86_BUILTIN_PSWAPDSF,
20188 IX86_BUILTIN_ADDPD,
20189 IX86_BUILTIN_ADDSD,
20190 IX86_BUILTIN_DIVPD,
20191 IX86_BUILTIN_DIVSD,
20192 IX86_BUILTIN_MULPD,
20193 IX86_BUILTIN_MULSD,
20194 IX86_BUILTIN_SUBPD,
20195 IX86_BUILTIN_SUBSD,
20197 IX86_BUILTIN_CMPEQPD,
20198 IX86_BUILTIN_CMPLTPD,
20199 IX86_BUILTIN_CMPLEPD,
20200 IX86_BUILTIN_CMPGTPD,
20201 IX86_BUILTIN_CMPGEPD,
20202 IX86_BUILTIN_CMPNEQPD,
20203 IX86_BUILTIN_CMPNLTPD,
20204 IX86_BUILTIN_CMPNLEPD,
20205 IX86_BUILTIN_CMPNGTPD,
20206 IX86_BUILTIN_CMPNGEPD,
20207 IX86_BUILTIN_CMPORDPD,
20208 IX86_BUILTIN_CMPUNORDPD,
20209 IX86_BUILTIN_CMPEQSD,
20210 IX86_BUILTIN_CMPLTSD,
20211 IX86_BUILTIN_CMPLESD,
20212 IX86_BUILTIN_CMPNEQSD,
20213 IX86_BUILTIN_CMPNLTSD,
20214 IX86_BUILTIN_CMPNLESD,
20215 IX86_BUILTIN_CMPORDSD,
20216 IX86_BUILTIN_CMPUNORDSD,
20218 IX86_BUILTIN_COMIEQSD,
20219 IX86_BUILTIN_COMILTSD,
20220 IX86_BUILTIN_COMILESD,
20221 IX86_BUILTIN_COMIGTSD,
20222 IX86_BUILTIN_COMIGESD,
20223 IX86_BUILTIN_COMINEQSD,
20224 IX86_BUILTIN_UCOMIEQSD,
20225 IX86_BUILTIN_UCOMILTSD,
20226 IX86_BUILTIN_UCOMILESD,
20227 IX86_BUILTIN_UCOMIGTSD,
20228 IX86_BUILTIN_UCOMIGESD,
20229 IX86_BUILTIN_UCOMINEQSD,
20231 IX86_BUILTIN_MAXPD,
20232 IX86_BUILTIN_MAXSD,
20233 IX86_BUILTIN_MINPD,
20234 IX86_BUILTIN_MINSD,
20236 IX86_BUILTIN_ANDPD,
20237 IX86_BUILTIN_ANDNPD,
20239 IX86_BUILTIN_XORPD,
20241 IX86_BUILTIN_SQRTPD,
20242 IX86_BUILTIN_SQRTSD,
20244 IX86_BUILTIN_UNPCKHPD,
20245 IX86_BUILTIN_UNPCKLPD,
20247 IX86_BUILTIN_SHUFPD,
20249 IX86_BUILTIN_LOADUPD,
20250 IX86_BUILTIN_STOREUPD,
20251 IX86_BUILTIN_MOVSD,
20253 IX86_BUILTIN_LOADHPD,
20254 IX86_BUILTIN_LOADLPD,
20256 IX86_BUILTIN_CVTDQ2PD,
20257 IX86_BUILTIN_CVTDQ2PS,
20259 IX86_BUILTIN_CVTPD2DQ,
20260 IX86_BUILTIN_CVTPD2PI,
20261 IX86_BUILTIN_CVTPD2PS,
20262 IX86_BUILTIN_CVTTPD2DQ,
20263 IX86_BUILTIN_CVTTPD2PI,
20265 IX86_BUILTIN_CVTPI2PD,
20266 IX86_BUILTIN_CVTSI2SD,
20267 IX86_BUILTIN_CVTSI642SD,
20269 IX86_BUILTIN_CVTSD2SI,
20270 IX86_BUILTIN_CVTSD2SI64,
20271 IX86_BUILTIN_CVTSD2SS,
20272 IX86_BUILTIN_CVTSS2SD,
20273 IX86_BUILTIN_CVTTSD2SI,
20274 IX86_BUILTIN_CVTTSD2SI64,
20276 IX86_BUILTIN_CVTPS2DQ,
20277 IX86_BUILTIN_CVTPS2PD,
20278 IX86_BUILTIN_CVTTPS2DQ,
20280 IX86_BUILTIN_MOVNTI,
20281 IX86_BUILTIN_MOVNTPD,
20282 IX86_BUILTIN_MOVNTDQ,
20284 IX86_BUILTIN_MOVQ128,
20287 IX86_BUILTIN_MASKMOVDQU,
20288 IX86_BUILTIN_MOVMSKPD,
20289 IX86_BUILTIN_PMOVMSKB128,
20291 IX86_BUILTIN_PACKSSWB128,
20292 IX86_BUILTIN_PACKSSDW128,
20293 IX86_BUILTIN_PACKUSWB128,
20295 IX86_BUILTIN_PADDB128,
20296 IX86_BUILTIN_PADDW128,
20297 IX86_BUILTIN_PADDD128,
20298 IX86_BUILTIN_PADDQ128,
20299 IX86_BUILTIN_PADDSB128,
20300 IX86_BUILTIN_PADDSW128,
20301 IX86_BUILTIN_PADDUSB128,
20302 IX86_BUILTIN_PADDUSW128,
20303 IX86_BUILTIN_PSUBB128,
20304 IX86_BUILTIN_PSUBW128,
20305 IX86_BUILTIN_PSUBD128,
20306 IX86_BUILTIN_PSUBQ128,
20307 IX86_BUILTIN_PSUBSB128,
20308 IX86_BUILTIN_PSUBSW128,
20309 IX86_BUILTIN_PSUBUSB128,
20310 IX86_BUILTIN_PSUBUSW128,
20312 IX86_BUILTIN_PAND128,
20313 IX86_BUILTIN_PANDN128,
20314 IX86_BUILTIN_POR128,
20315 IX86_BUILTIN_PXOR128,
20317 IX86_BUILTIN_PAVGB128,
20318 IX86_BUILTIN_PAVGW128,
20320 IX86_BUILTIN_PCMPEQB128,
20321 IX86_BUILTIN_PCMPEQW128,
20322 IX86_BUILTIN_PCMPEQD128,
20323 IX86_BUILTIN_PCMPGTB128,
20324 IX86_BUILTIN_PCMPGTW128,
20325 IX86_BUILTIN_PCMPGTD128,
20327 IX86_BUILTIN_PMADDWD128,
20329 IX86_BUILTIN_PMAXSW128,
20330 IX86_BUILTIN_PMAXUB128,
20331 IX86_BUILTIN_PMINSW128,
20332 IX86_BUILTIN_PMINUB128,
20334 IX86_BUILTIN_PMULUDQ,
20335 IX86_BUILTIN_PMULUDQ128,
20336 IX86_BUILTIN_PMULHUW128,
20337 IX86_BUILTIN_PMULHW128,
20338 IX86_BUILTIN_PMULLW128,
20340 IX86_BUILTIN_PSADBW128,
20341 IX86_BUILTIN_PSHUFHW,
20342 IX86_BUILTIN_PSHUFLW,
20343 IX86_BUILTIN_PSHUFD,
20345 IX86_BUILTIN_PSLLDQI128,
20346 IX86_BUILTIN_PSLLWI128,
20347 IX86_BUILTIN_PSLLDI128,
20348 IX86_BUILTIN_PSLLQI128,
20349 IX86_BUILTIN_PSRAWI128,
20350 IX86_BUILTIN_PSRADI128,
20351 IX86_BUILTIN_PSRLDQI128,
20352 IX86_BUILTIN_PSRLWI128,
20353 IX86_BUILTIN_PSRLDI128,
20354 IX86_BUILTIN_PSRLQI128,
20356 IX86_BUILTIN_PSLLDQ128,
20357 IX86_BUILTIN_PSLLW128,
20358 IX86_BUILTIN_PSLLD128,
20359 IX86_BUILTIN_PSLLQ128,
20360 IX86_BUILTIN_PSRAW128,
20361 IX86_BUILTIN_PSRAD128,
20362 IX86_BUILTIN_PSRLW128,
20363 IX86_BUILTIN_PSRLD128,
20364 IX86_BUILTIN_PSRLQ128,
20366 IX86_BUILTIN_PUNPCKHBW128,
20367 IX86_BUILTIN_PUNPCKHWD128,
20368 IX86_BUILTIN_PUNPCKHDQ128,
20369 IX86_BUILTIN_PUNPCKHQDQ128,
20370 IX86_BUILTIN_PUNPCKLBW128,
20371 IX86_BUILTIN_PUNPCKLWD128,
20372 IX86_BUILTIN_PUNPCKLDQ128,
20373 IX86_BUILTIN_PUNPCKLQDQ128,
20375 IX86_BUILTIN_CLFLUSH,
20376 IX86_BUILTIN_MFENCE,
20377 IX86_BUILTIN_LFENCE,
20380 IX86_BUILTIN_ADDSUBPS,
20381 IX86_BUILTIN_HADDPS,
20382 IX86_BUILTIN_HSUBPS,
20383 IX86_BUILTIN_MOVSHDUP,
20384 IX86_BUILTIN_MOVSLDUP,
20385 IX86_BUILTIN_ADDSUBPD,
20386 IX86_BUILTIN_HADDPD,
20387 IX86_BUILTIN_HSUBPD,
20388 IX86_BUILTIN_LDDQU,
20390 IX86_BUILTIN_MONITOR,
20391 IX86_BUILTIN_MWAIT,
20394 IX86_BUILTIN_PHADDW,
20395 IX86_BUILTIN_PHADDD,
20396 IX86_BUILTIN_PHADDSW,
20397 IX86_BUILTIN_PHSUBW,
20398 IX86_BUILTIN_PHSUBD,
20399 IX86_BUILTIN_PHSUBSW,
20400 IX86_BUILTIN_PMADDUBSW,
20401 IX86_BUILTIN_PMULHRSW,
20402 IX86_BUILTIN_PSHUFB,
20403 IX86_BUILTIN_PSIGNB,
20404 IX86_BUILTIN_PSIGNW,
20405 IX86_BUILTIN_PSIGND,
20406 IX86_BUILTIN_PALIGNR,
20407 IX86_BUILTIN_PABSB,
20408 IX86_BUILTIN_PABSW,
20409 IX86_BUILTIN_PABSD,
20411 IX86_BUILTIN_PHADDW128,
20412 IX86_BUILTIN_PHADDD128,
20413 IX86_BUILTIN_PHADDSW128,
20414 IX86_BUILTIN_PHSUBW128,
20415 IX86_BUILTIN_PHSUBD128,
20416 IX86_BUILTIN_PHSUBSW128,
20417 IX86_BUILTIN_PMADDUBSW128,
20418 IX86_BUILTIN_PMULHRSW128,
20419 IX86_BUILTIN_PSHUFB128,
20420 IX86_BUILTIN_PSIGNB128,
20421 IX86_BUILTIN_PSIGNW128,
20422 IX86_BUILTIN_PSIGND128,
20423 IX86_BUILTIN_PALIGNR128,
20424 IX86_BUILTIN_PABSB128,
20425 IX86_BUILTIN_PABSW128,
20426 IX86_BUILTIN_PABSD128,
20428 /* AMDFAM10 - SSE4A New Instructions. */
20429 IX86_BUILTIN_MOVNTSD,
20430 IX86_BUILTIN_MOVNTSS,
20431 IX86_BUILTIN_EXTRQI,
20432 IX86_BUILTIN_EXTRQ,
20433 IX86_BUILTIN_INSERTQI,
20434 IX86_BUILTIN_INSERTQ,
20437 IX86_BUILTIN_BLENDPD,
20438 IX86_BUILTIN_BLENDPS,
20439 IX86_BUILTIN_BLENDVPD,
20440 IX86_BUILTIN_BLENDVPS,
20441 IX86_BUILTIN_PBLENDVB128,
20442 IX86_BUILTIN_PBLENDW128,
20447 IX86_BUILTIN_INSERTPS128,
20449 IX86_BUILTIN_MOVNTDQA,
20450 IX86_BUILTIN_MPSADBW128,
20451 IX86_BUILTIN_PACKUSDW128,
20452 IX86_BUILTIN_PCMPEQQ,
20453 IX86_BUILTIN_PHMINPOSUW128,
20455 IX86_BUILTIN_PMAXSB128,
20456 IX86_BUILTIN_PMAXSD128,
20457 IX86_BUILTIN_PMAXUD128,
20458 IX86_BUILTIN_PMAXUW128,
20460 IX86_BUILTIN_PMINSB128,
20461 IX86_BUILTIN_PMINSD128,
20462 IX86_BUILTIN_PMINUD128,
20463 IX86_BUILTIN_PMINUW128,
20465 IX86_BUILTIN_PMOVSXBW128,
20466 IX86_BUILTIN_PMOVSXBD128,
20467 IX86_BUILTIN_PMOVSXBQ128,
20468 IX86_BUILTIN_PMOVSXWD128,
20469 IX86_BUILTIN_PMOVSXWQ128,
20470 IX86_BUILTIN_PMOVSXDQ128,
20472 IX86_BUILTIN_PMOVZXBW128,
20473 IX86_BUILTIN_PMOVZXBD128,
20474 IX86_BUILTIN_PMOVZXBQ128,
20475 IX86_BUILTIN_PMOVZXWD128,
20476 IX86_BUILTIN_PMOVZXWQ128,
20477 IX86_BUILTIN_PMOVZXDQ128,
20479 IX86_BUILTIN_PMULDQ128,
20480 IX86_BUILTIN_PMULLD128,
20482 IX86_BUILTIN_ROUNDPD,
20483 IX86_BUILTIN_ROUNDPS,
20484 IX86_BUILTIN_ROUNDSD,
20485 IX86_BUILTIN_ROUNDSS,
20487 IX86_BUILTIN_PTESTZ,
20488 IX86_BUILTIN_PTESTC,
20489 IX86_BUILTIN_PTESTNZC,
20491 IX86_BUILTIN_VEC_INIT_V2SI,
20492 IX86_BUILTIN_VEC_INIT_V4HI,
20493 IX86_BUILTIN_VEC_INIT_V8QI,
20494 IX86_BUILTIN_VEC_EXT_V2DF,
20495 IX86_BUILTIN_VEC_EXT_V2DI,
20496 IX86_BUILTIN_VEC_EXT_V4SF,
20497 IX86_BUILTIN_VEC_EXT_V4SI,
20498 IX86_BUILTIN_VEC_EXT_V8HI,
20499 IX86_BUILTIN_VEC_EXT_V2SI,
20500 IX86_BUILTIN_VEC_EXT_V4HI,
20501 IX86_BUILTIN_VEC_EXT_V16QI,
20502 IX86_BUILTIN_VEC_SET_V2DI,
20503 IX86_BUILTIN_VEC_SET_V4SF,
20504 IX86_BUILTIN_VEC_SET_V4SI,
20505 IX86_BUILTIN_VEC_SET_V8HI,
20506 IX86_BUILTIN_VEC_SET_V4HI,
20507 IX86_BUILTIN_VEC_SET_V16QI,
20509 IX86_BUILTIN_VEC_PACK_SFIX,
20512 IX86_BUILTIN_CRC32QI,
20513 IX86_BUILTIN_CRC32HI,
20514 IX86_BUILTIN_CRC32SI,
20515 IX86_BUILTIN_CRC32DI,
20517 IX86_BUILTIN_PCMPESTRI128,
20518 IX86_BUILTIN_PCMPESTRM128,
20519 IX86_BUILTIN_PCMPESTRA128,
20520 IX86_BUILTIN_PCMPESTRC128,
20521 IX86_BUILTIN_PCMPESTRO128,
20522 IX86_BUILTIN_PCMPESTRS128,
20523 IX86_BUILTIN_PCMPESTRZ128,
20524 IX86_BUILTIN_PCMPISTRI128,
20525 IX86_BUILTIN_PCMPISTRM128,
20526 IX86_BUILTIN_PCMPISTRA128,
20527 IX86_BUILTIN_PCMPISTRC128,
20528 IX86_BUILTIN_PCMPISTRO128,
20529 IX86_BUILTIN_PCMPISTRS128,
20530 IX86_BUILTIN_PCMPISTRZ128,
20532 IX86_BUILTIN_PCMPGTQ,
20534 /* AES instructions */
20535 IX86_BUILTIN_AESENC128,
20536 IX86_BUILTIN_AESENCLAST128,
20537 IX86_BUILTIN_AESDEC128,
20538 IX86_BUILTIN_AESDECLAST128,
20539 IX86_BUILTIN_AESIMC128,
20540 IX86_BUILTIN_AESKEYGENASSIST128,
20542 /* PCLMUL instruction */
20543 IX86_BUILTIN_PCLMULQDQ128,
20546 IX86_BUILTIN_ADDPD256,
20547 IX86_BUILTIN_ADDPS256,
20548 IX86_BUILTIN_ADDSUBPD256,
20549 IX86_BUILTIN_ADDSUBPS256,
20550 IX86_BUILTIN_ANDPD256,
20551 IX86_BUILTIN_ANDPS256,
20552 IX86_BUILTIN_ANDNPD256,
20553 IX86_BUILTIN_ANDNPS256,
20554 IX86_BUILTIN_BLENDPD256,
20555 IX86_BUILTIN_BLENDPS256,
20556 IX86_BUILTIN_BLENDVPD256,
20557 IX86_BUILTIN_BLENDVPS256,
20558 IX86_BUILTIN_DIVPD256,
20559 IX86_BUILTIN_DIVPS256,
20560 IX86_BUILTIN_DPPS256,
20561 IX86_BUILTIN_HADDPD256,
20562 IX86_BUILTIN_HADDPS256,
20563 IX86_BUILTIN_HSUBPD256,
20564 IX86_BUILTIN_HSUBPS256,
20565 IX86_BUILTIN_MAXPD256,
20566 IX86_BUILTIN_MAXPS256,
20567 IX86_BUILTIN_MINPD256,
20568 IX86_BUILTIN_MINPS256,
20569 IX86_BUILTIN_MULPD256,
20570 IX86_BUILTIN_MULPS256,
20571 IX86_BUILTIN_ORPD256,
20572 IX86_BUILTIN_ORPS256,
20573 IX86_BUILTIN_SHUFPD256,
20574 IX86_BUILTIN_SHUFPS256,
20575 IX86_BUILTIN_SUBPD256,
20576 IX86_BUILTIN_SUBPS256,
20577 IX86_BUILTIN_XORPD256,
20578 IX86_BUILTIN_XORPS256,
20579 IX86_BUILTIN_CMPSD,
20580 IX86_BUILTIN_CMPSS,
20581 IX86_BUILTIN_CMPPD,
20582 IX86_BUILTIN_CMPPS,
20583 IX86_BUILTIN_CMPPD256,
20584 IX86_BUILTIN_CMPPS256,
20585 IX86_BUILTIN_CVTDQ2PD256,
20586 IX86_BUILTIN_CVTDQ2PS256,
20587 IX86_BUILTIN_CVTPD2PS256,
20588 IX86_BUILTIN_CVTPS2DQ256,
20589 IX86_BUILTIN_CVTPS2PD256,
20590 IX86_BUILTIN_CVTTPD2DQ256,
20591 IX86_BUILTIN_CVTPD2DQ256,
20592 IX86_BUILTIN_CVTTPS2DQ256,
20593 IX86_BUILTIN_EXTRACTF128PD256,
20594 IX86_BUILTIN_EXTRACTF128PS256,
20595 IX86_BUILTIN_EXTRACTF128SI256,
20596 IX86_BUILTIN_VZEROALL,
20597 IX86_BUILTIN_VZEROUPPER,
20598 IX86_BUILTIN_VZEROUPPER_REX64,
20599 IX86_BUILTIN_VPERMILVARPD,
20600 IX86_BUILTIN_VPERMILVARPS,
20601 IX86_BUILTIN_VPERMILVARPD256,
20602 IX86_BUILTIN_VPERMILVARPS256,
20603 IX86_BUILTIN_VPERMILPD,
20604 IX86_BUILTIN_VPERMILPS,
20605 IX86_BUILTIN_VPERMILPD256,
20606 IX86_BUILTIN_VPERMILPS256,
20607 IX86_BUILTIN_VPERM2F128PD256,
20608 IX86_BUILTIN_VPERM2F128PS256,
20609 IX86_BUILTIN_VPERM2F128SI256,
20610 IX86_BUILTIN_VBROADCASTSS,
20611 IX86_BUILTIN_VBROADCASTSD256,
20612 IX86_BUILTIN_VBROADCASTSS256,
20613 IX86_BUILTIN_VBROADCASTPD256,
20614 IX86_BUILTIN_VBROADCASTPS256,
20615 IX86_BUILTIN_VINSERTF128PD256,
20616 IX86_BUILTIN_VINSERTF128PS256,
20617 IX86_BUILTIN_VINSERTF128SI256,
20618 IX86_BUILTIN_LOADUPD256,
20619 IX86_BUILTIN_LOADUPS256,
20620 IX86_BUILTIN_STOREUPD256,
20621 IX86_BUILTIN_STOREUPS256,
20622 IX86_BUILTIN_LDDQU256,
20623 IX86_BUILTIN_MOVNTDQ256,
20624 IX86_BUILTIN_MOVNTPD256,
20625 IX86_BUILTIN_MOVNTPS256,
20626 IX86_BUILTIN_LOADDQU256,
20627 IX86_BUILTIN_STOREDQU256,
20628 IX86_BUILTIN_MASKLOADPD,
20629 IX86_BUILTIN_MASKLOADPS,
20630 IX86_BUILTIN_MASKSTOREPD,
20631 IX86_BUILTIN_MASKSTOREPS,
20632 IX86_BUILTIN_MASKLOADPD256,
20633 IX86_BUILTIN_MASKLOADPS256,
20634 IX86_BUILTIN_MASKSTOREPD256,
20635 IX86_BUILTIN_MASKSTOREPS256,
20636 IX86_BUILTIN_MOVSHDUP256,
20637 IX86_BUILTIN_MOVSLDUP256,
20638 IX86_BUILTIN_MOVDDUP256,
20640 IX86_BUILTIN_SQRTPD256,
20641 IX86_BUILTIN_SQRTPS256,
20642 IX86_BUILTIN_SQRTPS_NR256,
20643 IX86_BUILTIN_RSQRTPS256,
20644 IX86_BUILTIN_RSQRTPS_NR256,
20646 IX86_BUILTIN_RCPPS256,
20648 IX86_BUILTIN_ROUNDPD256,
20649 IX86_BUILTIN_ROUNDPS256,
20651 IX86_BUILTIN_UNPCKHPD256,
20652 IX86_BUILTIN_UNPCKLPD256,
20653 IX86_BUILTIN_UNPCKHPS256,
20654 IX86_BUILTIN_UNPCKLPS256,
20656 IX86_BUILTIN_SI256_SI,
20657 IX86_BUILTIN_PS256_PS,
20658 IX86_BUILTIN_PD256_PD,
20659 IX86_BUILTIN_SI_SI256,
20660 IX86_BUILTIN_PS_PS256,
20661 IX86_BUILTIN_PD_PD256,
20663 IX86_BUILTIN_VTESTZPD,
20664 IX86_BUILTIN_VTESTCPD,
20665 IX86_BUILTIN_VTESTNZCPD,
20666 IX86_BUILTIN_VTESTZPS,
20667 IX86_BUILTIN_VTESTCPS,
20668 IX86_BUILTIN_VTESTNZCPS,
20669 IX86_BUILTIN_VTESTZPD256,
20670 IX86_BUILTIN_VTESTCPD256,
20671 IX86_BUILTIN_VTESTNZCPD256,
20672 IX86_BUILTIN_VTESTZPS256,
20673 IX86_BUILTIN_VTESTCPS256,
20674 IX86_BUILTIN_VTESTNZCPS256,
20675 IX86_BUILTIN_PTESTZ256,
20676 IX86_BUILTIN_PTESTC256,
20677 IX86_BUILTIN_PTESTNZC256,
20679 IX86_BUILTIN_MOVMSKPD256,
20680 IX86_BUILTIN_MOVMSKPS256,
20682 /* TFmode support builtins. */
20684 IX86_BUILTIN_HUGE_VALQ,
20685 IX86_BUILTIN_FABSQ,
20686 IX86_BUILTIN_COPYSIGNQ,
20688 /* SSE5 instructions */
20689 IX86_BUILTIN_FMADDSS,
20690 IX86_BUILTIN_FMADDSD,
20691 IX86_BUILTIN_FMADDPS,
20692 IX86_BUILTIN_FMADDPD,
20693 IX86_BUILTIN_FMSUBSS,
20694 IX86_BUILTIN_FMSUBSD,
20695 IX86_BUILTIN_FMSUBPS,
20696 IX86_BUILTIN_FMSUBPD,
20697 IX86_BUILTIN_FNMADDSS,
20698 IX86_BUILTIN_FNMADDSD,
20699 IX86_BUILTIN_FNMADDPS,
20700 IX86_BUILTIN_FNMADDPD,
20701 IX86_BUILTIN_FNMSUBSS,
20702 IX86_BUILTIN_FNMSUBSD,
20703 IX86_BUILTIN_FNMSUBPS,
20704 IX86_BUILTIN_FNMSUBPD,
20705 IX86_BUILTIN_PCMOV,
20706 IX86_BUILTIN_PCMOV_V2DI,
20707 IX86_BUILTIN_PCMOV_V4SI,
20708 IX86_BUILTIN_PCMOV_V8HI,
20709 IX86_BUILTIN_PCMOV_V16QI,
20710 IX86_BUILTIN_PCMOV_V4SF,
20711 IX86_BUILTIN_PCMOV_V2DF,
20712 IX86_BUILTIN_PPERM,
20713 IX86_BUILTIN_PERMPS,
20714 IX86_BUILTIN_PERMPD,
20715 IX86_BUILTIN_PMACSSWW,
20716 IX86_BUILTIN_PMACSWW,
20717 IX86_BUILTIN_PMACSSWD,
20718 IX86_BUILTIN_PMACSWD,
20719 IX86_BUILTIN_PMACSSDD,
20720 IX86_BUILTIN_PMACSDD,
20721 IX86_BUILTIN_PMACSSDQL,
20722 IX86_BUILTIN_PMACSSDQH,
20723 IX86_BUILTIN_PMACSDQL,
20724 IX86_BUILTIN_PMACSDQH,
20725 IX86_BUILTIN_PMADCSSWD,
20726 IX86_BUILTIN_PMADCSWD,
20727 IX86_BUILTIN_PHADDBW,
20728 IX86_BUILTIN_PHADDBD,
20729 IX86_BUILTIN_PHADDBQ,
20730 IX86_BUILTIN_PHADDWD,
20731 IX86_BUILTIN_PHADDWQ,
20732 IX86_BUILTIN_PHADDDQ,
20733 IX86_BUILTIN_PHADDUBW,
20734 IX86_BUILTIN_PHADDUBD,
20735 IX86_BUILTIN_PHADDUBQ,
20736 IX86_BUILTIN_PHADDUWD,
20737 IX86_BUILTIN_PHADDUWQ,
20738 IX86_BUILTIN_PHADDUDQ,
20739 IX86_BUILTIN_PHSUBBW,
20740 IX86_BUILTIN_PHSUBWD,
20741 IX86_BUILTIN_PHSUBDQ,
20742 IX86_BUILTIN_PROTB,
20743 IX86_BUILTIN_PROTW,
20744 IX86_BUILTIN_PROTD,
20745 IX86_BUILTIN_PROTQ,
20746 IX86_BUILTIN_PROTB_IMM,
20747 IX86_BUILTIN_PROTW_IMM,
20748 IX86_BUILTIN_PROTD_IMM,
20749 IX86_BUILTIN_PROTQ_IMM,
20750 IX86_BUILTIN_PSHLB,
20751 IX86_BUILTIN_PSHLW,
20752 IX86_BUILTIN_PSHLD,
20753 IX86_BUILTIN_PSHLQ,
20754 IX86_BUILTIN_PSHAB,
20755 IX86_BUILTIN_PSHAW,
20756 IX86_BUILTIN_PSHAD,
20757 IX86_BUILTIN_PSHAQ,
20758 IX86_BUILTIN_FRCZSS,
20759 IX86_BUILTIN_FRCZSD,
20760 IX86_BUILTIN_FRCZPS,
20761 IX86_BUILTIN_FRCZPD,
20762 IX86_BUILTIN_CVTPH2PS,
20763 IX86_BUILTIN_CVTPS2PH,
20765 IX86_BUILTIN_COMEQSS,
20766 IX86_BUILTIN_COMNESS,
20767 IX86_BUILTIN_COMLTSS,
20768 IX86_BUILTIN_COMLESS,
20769 IX86_BUILTIN_COMGTSS,
20770 IX86_BUILTIN_COMGESS,
20771 IX86_BUILTIN_COMUEQSS,
20772 IX86_BUILTIN_COMUNESS,
20773 IX86_BUILTIN_COMULTSS,
20774 IX86_BUILTIN_COMULESS,
20775 IX86_BUILTIN_COMUGTSS,
20776 IX86_BUILTIN_COMUGESS,
20777 IX86_BUILTIN_COMORDSS,
20778 IX86_BUILTIN_COMUNORDSS,
20779 IX86_BUILTIN_COMFALSESS,
20780 IX86_BUILTIN_COMTRUESS,
20782 IX86_BUILTIN_COMEQSD,
20783 IX86_BUILTIN_COMNESD,
20784 IX86_BUILTIN_COMLTSD,
20785 IX86_BUILTIN_COMLESD,
20786 IX86_BUILTIN_COMGTSD,
20787 IX86_BUILTIN_COMGESD,
20788 IX86_BUILTIN_COMUEQSD,
20789 IX86_BUILTIN_COMUNESD,
20790 IX86_BUILTIN_COMULTSD,
20791 IX86_BUILTIN_COMULESD,
20792 IX86_BUILTIN_COMUGTSD,
20793 IX86_BUILTIN_COMUGESD,
20794 IX86_BUILTIN_COMORDSD,
20795 IX86_BUILTIN_COMUNORDSD,
20796 IX86_BUILTIN_COMFALSESD,
20797 IX86_BUILTIN_COMTRUESD,
20799 IX86_BUILTIN_COMEQPS,
20800 IX86_BUILTIN_COMNEPS,
20801 IX86_BUILTIN_COMLTPS,
20802 IX86_BUILTIN_COMLEPS,
20803 IX86_BUILTIN_COMGTPS,
20804 IX86_BUILTIN_COMGEPS,
20805 IX86_BUILTIN_COMUEQPS,
20806 IX86_BUILTIN_COMUNEPS,
20807 IX86_BUILTIN_COMULTPS,
20808 IX86_BUILTIN_COMULEPS,
20809 IX86_BUILTIN_COMUGTPS,
20810 IX86_BUILTIN_COMUGEPS,
20811 IX86_BUILTIN_COMORDPS,
20812 IX86_BUILTIN_COMUNORDPS,
20813 IX86_BUILTIN_COMFALSEPS,
20814 IX86_BUILTIN_COMTRUEPS,
20816 IX86_BUILTIN_COMEQPD,
20817 IX86_BUILTIN_COMNEPD,
20818 IX86_BUILTIN_COMLTPD,
20819 IX86_BUILTIN_COMLEPD,
20820 IX86_BUILTIN_COMGTPD,
20821 IX86_BUILTIN_COMGEPD,
20822 IX86_BUILTIN_COMUEQPD,
20823 IX86_BUILTIN_COMUNEPD,
20824 IX86_BUILTIN_COMULTPD,
20825 IX86_BUILTIN_COMULEPD,
20826 IX86_BUILTIN_COMUGTPD,
20827 IX86_BUILTIN_COMUGEPD,
20828 IX86_BUILTIN_COMORDPD,
20829 IX86_BUILTIN_COMUNORDPD,
20830 IX86_BUILTIN_COMFALSEPD,
20831 IX86_BUILTIN_COMTRUEPD,
20833 IX86_BUILTIN_PCOMEQUB,
20834 IX86_BUILTIN_PCOMNEUB,
20835 IX86_BUILTIN_PCOMLTUB,
20836 IX86_BUILTIN_PCOMLEUB,
20837 IX86_BUILTIN_PCOMGTUB,
20838 IX86_BUILTIN_PCOMGEUB,
20839 IX86_BUILTIN_PCOMFALSEUB,
20840 IX86_BUILTIN_PCOMTRUEUB,
20841 IX86_BUILTIN_PCOMEQUW,
20842 IX86_BUILTIN_PCOMNEUW,
20843 IX86_BUILTIN_PCOMLTUW,
20844 IX86_BUILTIN_PCOMLEUW,
20845 IX86_BUILTIN_PCOMGTUW,
20846 IX86_BUILTIN_PCOMGEUW,
20847 IX86_BUILTIN_PCOMFALSEUW,
20848 IX86_BUILTIN_PCOMTRUEUW,
20849 IX86_BUILTIN_PCOMEQUD,
20850 IX86_BUILTIN_PCOMNEUD,
20851 IX86_BUILTIN_PCOMLTUD,
20852 IX86_BUILTIN_PCOMLEUD,
20853 IX86_BUILTIN_PCOMGTUD,
20854 IX86_BUILTIN_PCOMGEUD,
20855 IX86_BUILTIN_PCOMFALSEUD,
20856 IX86_BUILTIN_PCOMTRUEUD,
20857 IX86_BUILTIN_PCOMEQUQ,
20858 IX86_BUILTIN_PCOMNEUQ,
20859 IX86_BUILTIN_PCOMLTUQ,
20860 IX86_BUILTIN_PCOMLEUQ,
20861 IX86_BUILTIN_PCOMGTUQ,
20862 IX86_BUILTIN_PCOMGEUQ,
20863 IX86_BUILTIN_PCOMFALSEUQ,
20864 IX86_BUILTIN_PCOMTRUEUQ,
20866 IX86_BUILTIN_PCOMEQB,
20867 IX86_BUILTIN_PCOMNEB,
20868 IX86_BUILTIN_PCOMLTB,
20869 IX86_BUILTIN_PCOMLEB,
20870 IX86_BUILTIN_PCOMGTB,
20871 IX86_BUILTIN_PCOMGEB,
20872 IX86_BUILTIN_PCOMFALSEB,
20873 IX86_BUILTIN_PCOMTRUEB,
20874 IX86_BUILTIN_PCOMEQW,
20875 IX86_BUILTIN_PCOMNEW,
20876 IX86_BUILTIN_PCOMLTW,
20877 IX86_BUILTIN_PCOMLEW,
20878 IX86_BUILTIN_PCOMGTW,
20879 IX86_BUILTIN_PCOMGEW,
20880 IX86_BUILTIN_PCOMFALSEW,
20881 IX86_BUILTIN_PCOMTRUEW,
20882 IX86_BUILTIN_PCOMEQD,
20883 IX86_BUILTIN_PCOMNED,
20884 IX86_BUILTIN_PCOMLTD,
20885 IX86_BUILTIN_PCOMLED,
20886 IX86_BUILTIN_PCOMGTD,
20887 IX86_BUILTIN_PCOMGED,
20888 IX86_BUILTIN_PCOMFALSED,
20889 IX86_BUILTIN_PCOMTRUED,
20890 IX86_BUILTIN_PCOMEQQ,
20891 IX86_BUILTIN_PCOMNEQ,
20892 IX86_BUILTIN_PCOMLTQ,
20893 IX86_BUILTIN_PCOMLEQ,
20894 IX86_BUILTIN_PCOMGTQ,
20895 IX86_BUILTIN_PCOMGEQ,
20896 IX86_BUILTIN_PCOMFALSEQ,
20897 IX86_BUILTIN_PCOMTRUEQ,
20902 /* Table for the ix86 builtin decls. */
20903 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20905 /* Table of all of the builtin functions that are possible with different ISA's
20906 but are waiting to be built until a function is declared to use that
20908 struct GTY(()) builtin_isa {
20909 tree type; /* builtin type to use in the declaration */
20910 const char *name; /* function name */
20911 int isa; /* isa_flags this builtin is defined for */
20912 bool const_p; /* true if the declaration is constant */
20915 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20918 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20919 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20920 * function decl in the ix86_builtins array. Returns the function decl or
20921 * NULL_TREE, if the builtin was not added.
20923 * If the front end has a special hook for builtin functions, delay adding
20924 * builtin functions that aren't in the current ISA until the ISA is changed
20925 * with function specific optimization. Doing so, can save about 300K for the
20926 * default compiler. When the builtin is expanded, check at that time whether
20929 * If the front end doesn't have a special hook, record all builtins, even if
20930 * it isn't an instruction set in the current ISA in case the user uses
20931 * function specific options for a different ISA, so that we don't get scope
20932 * errors if a builtin is added in the middle of a function scope. */
20935 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20937 tree decl = NULL_TREE;
20939 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20941 ix86_builtins_isa[(int) code].isa = mask;
20943 if ((mask & ix86_isa_flags) != 0
20944 || (lang_hooks.builtin_function
20945 == lang_hooks.builtin_function_ext_scope))
20948 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20950 ix86_builtins[(int) code] = decl;
20951 ix86_builtins_isa[(int) code].type = NULL_TREE;
20955 ix86_builtins[(int) code] = NULL_TREE;
20956 ix86_builtins_isa[(int) code].const_p = false;
20957 ix86_builtins_isa[(int) code].type = type;
20958 ix86_builtins_isa[(int) code].name = name;
20965 /* Like def_builtin, but also marks the function decl "const". */
20968 def_builtin_const (int mask, const char *name, tree type,
20969 enum ix86_builtins code)
20971 tree decl = def_builtin (mask, name, type, code);
20973 TREE_READONLY (decl) = 1;
20975 ix86_builtins_isa[(int) code].const_p = true;
20980 /* Add any new builtin functions for a given ISA that may not have been
20981 declared. This saves a bit of space compared to adding all of the
20982 declarations to the tree, even if we didn't use them. */
20985 ix86_add_new_builtins (int isa)
20990 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20992 if ((ix86_builtins_isa[i].isa & isa) != 0
20993 && ix86_builtins_isa[i].type != NULL_TREE)
20995 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20996 ix86_builtins_isa[i].type,
20997 i, BUILT_IN_MD, NULL,
21000 ix86_builtins[i] = decl;
21001 ix86_builtins_isa[i].type = NULL_TREE;
21002 if (ix86_builtins_isa[i].const_p)
21003 TREE_READONLY (decl) = 1;
21008 /* Bits for builtin_description.flag. */
21010 /* Set when we don't support the comparison natively, and should
21011 swap_comparison in order to support it. */
21012 #define BUILTIN_DESC_SWAP_OPERANDS 1
21014 struct builtin_description
21016 const unsigned int mask;
21017 const enum insn_code icode;
21018 const char *const name;
21019 const enum ix86_builtins code;
21020 const enum rtx_code comparison;
21024 static const struct builtin_description bdesc_comi[] =
21026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21042 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21049 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21052 static const struct builtin_description bdesc_pcmpestr[] =
21055 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21056 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21057 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21058 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21059 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21060 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21061 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21064 static const struct builtin_description bdesc_pcmpistr[] =
21067 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21068 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21069 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21070 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21071 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21072 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21073 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21076 /* Special builtin types */
21077 enum ix86_special_builtin_type
21079 SPECIAL_FTYPE_UNKNOWN,
21081 V32QI_FTYPE_PCCHAR,
21082 V16QI_FTYPE_PCCHAR,
21084 V8SF_FTYPE_PCFLOAT,
21086 V4DF_FTYPE_PCDOUBLE,
21087 V4SF_FTYPE_PCFLOAT,
21088 V2DF_FTYPE_PCDOUBLE,
21089 V8SF_FTYPE_PCV8SF_V8SF,
21090 V4DF_FTYPE_PCV4DF_V4DF,
21091 V4SF_FTYPE_V4SF_PCV2SF,
21092 V4SF_FTYPE_PCV4SF_V4SF,
21093 V2DF_FTYPE_V2DF_PCDOUBLE,
21094 V2DF_FTYPE_PCV2DF_V2DF,
21096 VOID_FTYPE_PV2SF_V4SF,
21097 VOID_FTYPE_PV4DI_V4DI,
21098 VOID_FTYPE_PV2DI_V2DI,
21099 VOID_FTYPE_PCHAR_V32QI,
21100 VOID_FTYPE_PCHAR_V16QI,
21101 VOID_FTYPE_PFLOAT_V8SF,
21102 VOID_FTYPE_PFLOAT_V4SF,
21103 VOID_FTYPE_PDOUBLE_V4DF,
21104 VOID_FTYPE_PDOUBLE_V2DF,
21106 VOID_FTYPE_PINT_INT,
21107 VOID_FTYPE_PV8SF_V8SF_V8SF,
21108 VOID_FTYPE_PV4DF_V4DF_V4DF,
21109 VOID_FTYPE_PV4SF_V4SF_V4SF,
21110 VOID_FTYPE_PV2DF_V2DF_V2DF
21113 /* Builtin types */
21114 enum ix86_builtin_type
21117 FLOAT128_FTYPE_FLOAT128,
21119 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21120 INT_FTYPE_V8SF_V8SF_PTEST,
21121 INT_FTYPE_V4DI_V4DI_PTEST,
21122 INT_FTYPE_V4DF_V4DF_PTEST,
21123 INT_FTYPE_V4SF_V4SF_PTEST,
21124 INT_FTYPE_V2DI_V2DI_PTEST,
21125 INT_FTYPE_V2DF_V2DF_PTEST,
21157 V4SF_FTYPE_V4SF_VEC_MERGE,
21166 V2DF_FTYPE_V2DF_VEC_MERGE,
21177 V16QI_FTYPE_V16QI_V16QI,
21178 V16QI_FTYPE_V8HI_V8HI,
21179 V8QI_FTYPE_V8QI_V8QI,
21180 V8QI_FTYPE_V4HI_V4HI,
21181 V8HI_FTYPE_V8HI_V8HI,
21182 V8HI_FTYPE_V8HI_V8HI_COUNT,
21183 V8HI_FTYPE_V16QI_V16QI,
21184 V8HI_FTYPE_V4SI_V4SI,
21185 V8HI_FTYPE_V8HI_SI_COUNT,
21186 V8SF_FTYPE_V8SF_V8SF,
21187 V8SF_FTYPE_V8SF_V8SI,
21188 V4SI_FTYPE_V4SI_V4SI,
21189 V4SI_FTYPE_V4SI_V4SI_COUNT,
21190 V4SI_FTYPE_V8HI_V8HI,
21191 V4SI_FTYPE_V4SF_V4SF,
21192 V4SI_FTYPE_V2DF_V2DF,
21193 V4SI_FTYPE_V4SI_SI_COUNT,
21194 V4HI_FTYPE_V4HI_V4HI,
21195 V4HI_FTYPE_V4HI_V4HI_COUNT,
21196 V4HI_FTYPE_V8QI_V8QI,
21197 V4HI_FTYPE_V2SI_V2SI,
21198 V4HI_FTYPE_V4HI_SI_COUNT,
21199 V4DF_FTYPE_V4DF_V4DF,
21200 V4DF_FTYPE_V4DF_V4DI,
21201 V4SF_FTYPE_V4SF_V4SF,
21202 V4SF_FTYPE_V4SF_V4SF_SWAP,
21203 V4SF_FTYPE_V4SF_V4SI,
21204 V4SF_FTYPE_V4SF_V2SI,
21205 V4SF_FTYPE_V4SF_V2DF,
21206 V4SF_FTYPE_V4SF_DI,
21207 V4SF_FTYPE_V4SF_SI,
21208 V2DI_FTYPE_V2DI_V2DI,
21209 V2DI_FTYPE_V2DI_V2DI_COUNT,
21210 V2DI_FTYPE_V16QI_V16QI,
21211 V2DI_FTYPE_V4SI_V4SI,
21212 V2DI_FTYPE_V2DI_V16QI,
21213 V2DI_FTYPE_V2DF_V2DF,
21214 V2DI_FTYPE_V2DI_SI_COUNT,
21215 V2SI_FTYPE_V2SI_V2SI,
21216 V2SI_FTYPE_V2SI_V2SI_COUNT,
21217 V2SI_FTYPE_V4HI_V4HI,
21218 V2SI_FTYPE_V2SF_V2SF,
21219 V2SI_FTYPE_V2SI_SI_COUNT,
21220 V2DF_FTYPE_V2DF_V2DF,
21221 V2DF_FTYPE_V2DF_V2DF_SWAP,
21222 V2DF_FTYPE_V2DF_V4SF,
21223 V2DF_FTYPE_V2DF_V2DI,
21224 V2DF_FTYPE_V2DF_DI,
21225 V2DF_FTYPE_V2DF_SI,
21226 V2SF_FTYPE_V2SF_V2SF,
21227 V1DI_FTYPE_V1DI_V1DI,
21228 V1DI_FTYPE_V1DI_V1DI_COUNT,
21229 V1DI_FTYPE_V8QI_V8QI,
21230 V1DI_FTYPE_V2SI_V2SI,
21231 V1DI_FTYPE_V1DI_SI_COUNT,
21232 UINT64_FTYPE_UINT64_UINT64,
21233 UINT_FTYPE_UINT_UINT,
21234 UINT_FTYPE_UINT_USHORT,
21235 UINT_FTYPE_UINT_UCHAR,
21236 V8HI_FTYPE_V8HI_INT,
21237 V4SI_FTYPE_V4SI_INT,
21238 V4HI_FTYPE_V4HI_INT,
21239 V8SF_FTYPE_V8SF_INT,
21240 V4SI_FTYPE_V8SI_INT,
21241 V4SF_FTYPE_V8SF_INT,
21242 V2DF_FTYPE_V4DF_INT,
21243 V4DF_FTYPE_V4DF_INT,
21244 V4SF_FTYPE_V4SF_INT,
21245 V2DI_FTYPE_V2DI_INT,
21246 V2DI2TI_FTYPE_V2DI_INT,
21247 V2DF_FTYPE_V2DF_INT,
21248 V16QI_FTYPE_V16QI_V16QI_V16QI,
21249 V8SF_FTYPE_V8SF_V8SF_V8SF,
21250 V4DF_FTYPE_V4DF_V4DF_V4DF,
21251 V4SF_FTYPE_V4SF_V4SF_V4SF,
21252 V2DF_FTYPE_V2DF_V2DF_V2DF,
21253 V16QI_FTYPE_V16QI_V16QI_INT,
21254 V8SI_FTYPE_V8SI_V8SI_INT,
21255 V8SI_FTYPE_V8SI_V4SI_INT,
21256 V8HI_FTYPE_V8HI_V8HI_INT,
21257 V8SF_FTYPE_V8SF_V8SF_INT,
21258 V8SF_FTYPE_V8SF_V4SF_INT,
21259 V4SI_FTYPE_V4SI_V4SI_INT,
21260 V4DF_FTYPE_V4DF_V4DF_INT,
21261 V4DF_FTYPE_V4DF_V2DF_INT,
21262 V4SF_FTYPE_V4SF_V4SF_INT,
21263 V2DI_FTYPE_V2DI_V2DI_INT,
21264 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21265 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21266 V2DF_FTYPE_V2DF_V2DF_INT,
21267 V2DI_FTYPE_V2DI_UINT_UINT,
21268 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21271 /* Special builtins with variable number of arguments. */
21272 static const struct builtin_description bdesc_special_args[] =
21275 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21278 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21290 /* SSE or 3DNow!A */
21291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21309 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21312 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21315 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21316 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21320 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21321 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21330 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21337 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21338 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21339 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21341 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21342 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21343 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21344 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21345 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21346 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21347 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21348 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21351 /* Builtins with variable number of arguments. */
21352 static const struct builtin_description bdesc_args[] =
21355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21365 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21379 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21391 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21397 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21399 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21403 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21404 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21406 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21408 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21409 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21410 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21411 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21413 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21414 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21416 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21419 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21420 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21421 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21422 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21424 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21425 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21426 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21427 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21428 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21429 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21430 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21431 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21432 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21433 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21434 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21435 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21436 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21437 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21438 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21441 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21442 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21443 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21444 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21445 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21446 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21451 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21452 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21453 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21457 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21460 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21464 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21465 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21466 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21490 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21496 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21497 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21501 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21503 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21504 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21510 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21512 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21514 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21516 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21518 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21522 /* SSE MMX or 3Dnow!A */
21523 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21524 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21525 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21527 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21528 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21529 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21530 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21532 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21533 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21535 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21556 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21557 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21563 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21564 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21565 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21566 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21594 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21598 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21600 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21601 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21607 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21609 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21610 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21611 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21613 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21614 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21615 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21616 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21627 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21628 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21630 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21632 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21633 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21645 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21646 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21647 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21663 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21672 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21677 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21678 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21679 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21680 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21681 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21682 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21685 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21686 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21687 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21688 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21689 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21690 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21692 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21693 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21694 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21695 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21703 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21704 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21706 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21709 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21710 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21713 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21714 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21716 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21717 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21718 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21719 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21720 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21721 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21724 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21728 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21729 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21731 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21732 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21733 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21734 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21735 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21736 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21737 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21738 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21739 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21740 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21741 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21742 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21743 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21744 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21745 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21746 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21747 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21748 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21749 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21750 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21751 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21752 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21753 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21754 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21757 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21758 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21767 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21769 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21772 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21773 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21774 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21775 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21776 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21778 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21779 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21780 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21781 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21782 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21783 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21784 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21786 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21787 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21788 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21789 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21790 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21791 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21792 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21793 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21794 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21795 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21796 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21797 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21799 /* SSE4.1 and SSE5 */
21800 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21801 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21802 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21803 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21805 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21806 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21807 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21810 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21811 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21812 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21813 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21814 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21817 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21818 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21819 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21820 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21823 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21824 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21826 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21827 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21828 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21829 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21832 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21835 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21836 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21839 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21840 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21843 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21849 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21850 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21851 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21852 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21853 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21854 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21855 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21856 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21857 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21858 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21859 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21860 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21906 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21908 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21910 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21939 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21942 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21943 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21946 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21950 enum multi_arg_type {
21960 MULTI_ARG_3_PERMPS,
21961 MULTI_ARG_3_PERMPD,
21968 MULTI_ARG_2_DI_IMM,
21969 MULTI_ARG_2_SI_IMM,
21970 MULTI_ARG_2_HI_IMM,
21971 MULTI_ARG_2_QI_IMM,
21972 MULTI_ARG_2_SF_CMP,
21973 MULTI_ARG_2_DF_CMP,
21974 MULTI_ARG_2_DI_CMP,
21975 MULTI_ARG_2_SI_CMP,
21976 MULTI_ARG_2_HI_CMP,
21977 MULTI_ARG_2_QI_CMP,
22000 static const struct builtin_description bdesc_multi_arg[] =
22002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22129 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22196 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22198 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22199 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22200 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22202 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22208 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22219 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22228 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22229 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22232 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22238 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22239 in the current target ISA to allow the user to compile particular modules
22240 with different target specific options that differ from the command line
22243 ix86_init_mmx_sse_builtins (void)
22245 const struct builtin_description * d;
22248 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22249 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22250 tree V1DI_type_node
22251 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22252 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22253 tree V2DI_type_node
22254 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22255 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22256 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22257 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22258 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22259 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22260 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22262 tree pchar_type_node = build_pointer_type (char_type_node);
22263 tree pcchar_type_node
22264 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22265 tree pfloat_type_node = build_pointer_type (float_type_node);
22266 tree pcfloat_type_node
22267 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22268 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22269 tree pcv2sf_type_node
22270 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22271 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22272 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22275 tree int_ftype_v4sf_v4sf
22276 = build_function_type_list (integer_type_node,
22277 V4SF_type_node, V4SF_type_node, NULL_TREE);
22278 tree v4si_ftype_v4sf_v4sf
22279 = build_function_type_list (V4SI_type_node,
22280 V4SF_type_node, V4SF_type_node, NULL_TREE);
22281 /* MMX/SSE/integer conversions. */
22282 tree int_ftype_v4sf
22283 = build_function_type_list (integer_type_node,
22284 V4SF_type_node, NULL_TREE);
22285 tree int64_ftype_v4sf
22286 = build_function_type_list (long_long_integer_type_node,
22287 V4SF_type_node, NULL_TREE);
22288 tree int_ftype_v8qi
22289 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22290 tree v4sf_ftype_v4sf_int
22291 = build_function_type_list (V4SF_type_node,
22292 V4SF_type_node, integer_type_node, NULL_TREE);
22293 tree v4sf_ftype_v4sf_int64
22294 = build_function_type_list (V4SF_type_node,
22295 V4SF_type_node, long_long_integer_type_node,
22297 tree v4sf_ftype_v4sf_v2si
22298 = build_function_type_list (V4SF_type_node,
22299 V4SF_type_node, V2SI_type_node, NULL_TREE);
22301 /* Miscellaneous. */
22302 tree v8qi_ftype_v4hi_v4hi
22303 = build_function_type_list (V8QI_type_node,
22304 V4HI_type_node, V4HI_type_node, NULL_TREE);
22305 tree v4hi_ftype_v2si_v2si
22306 = build_function_type_list (V4HI_type_node,
22307 V2SI_type_node, V2SI_type_node, NULL_TREE);
22308 tree v4sf_ftype_v4sf_v4sf_int
22309 = build_function_type_list (V4SF_type_node,
22310 V4SF_type_node, V4SF_type_node,
22311 integer_type_node, NULL_TREE);
22312 tree v2si_ftype_v4hi_v4hi
22313 = build_function_type_list (V2SI_type_node,
22314 V4HI_type_node, V4HI_type_node, NULL_TREE);
22315 tree v4hi_ftype_v4hi_int
22316 = build_function_type_list (V4HI_type_node,
22317 V4HI_type_node, integer_type_node, NULL_TREE);
22318 tree v2si_ftype_v2si_int
22319 = build_function_type_list (V2SI_type_node,
22320 V2SI_type_node, integer_type_node, NULL_TREE);
22321 tree v1di_ftype_v1di_int
22322 = build_function_type_list (V1DI_type_node,
22323 V1DI_type_node, integer_type_node, NULL_TREE);
22325 tree void_ftype_void
22326 = build_function_type (void_type_node, void_list_node);
22327 tree void_ftype_unsigned
22328 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22329 tree void_ftype_unsigned_unsigned
22330 = build_function_type_list (void_type_node, unsigned_type_node,
22331 unsigned_type_node, NULL_TREE);
22332 tree void_ftype_pcvoid_unsigned_unsigned
22333 = build_function_type_list (void_type_node, const_ptr_type_node,
22334 unsigned_type_node, unsigned_type_node,
22336 tree unsigned_ftype_void
22337 = build_function_type (unsigned_type_node, void_list_node);
22338 tree v2si_ftype_v4sf
22339 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22340 /* Loads/stores. */
22341 tree void_ftype_v8qi_v8qi_pchar
22342 = build_function_type_list (void_type_node,
22343 V8QI_type_node, V8QI_type_node,
22344 pchar_type_node, NULL_TREE);
22345 tree v4sf_ftype_pcfloat
22346 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22347 tree v4sf_ftype_v4sf_pcv2sf
22348 = build_function_type_list (V4SF_type_node,
22349 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22350 tree void_ftype_pv2sf_v4sf
22351 = build_function_type_list (void_type_node,
22352 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22353 tree void_ftype_pfloat_v4sf
22354 = build_function_type_list (void_type_node,
22355 pfloat_type_node, V4SF_type_node, NULL_TREE);
22356 tree void_ftype_pdi_di
22357 = build_function_type_list (void_type_node,
22358 pdi_type_node, long_long_unsigned_type_node,
22360 tree void_ftype_pv2di_v2di
22361 = build_function_type_list (void_type_node,
22362 pv2di_type_node, V2DI_type_node, NULL_TREE);
22363 /* Normal vector unops. */
22364 tree v4sf_ftype_v4sf
22365 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22366 tree v16qi_ftype_v16qi
22367 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22368 tree v8hi_ftype_v8hi
22369 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22370 tree v4si_ftype_v4si
22371 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22372 tree v8qi_ftype_v8qi
22373 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22374 tree v4hi_ftype_v4hi
22375 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22377 /* Normal vector binops. */
22378 tree v4sf_ftype_v4sf_v4sf
22379 = build_function_type_list (V4SF_type_node,
22380 V4SF_type_node, V4SF_type_node, NULL_TREE);
22381 tree v8qi_ftype_v8qi_v8qi
22382 = build_function_type_list (V8QI_type_node,
22383 V8QI_type_node, V8QI_type_node, NULL_TREE);
22384 tree v4hi_ftype_v4hi_v4hi
22385 = build_function_type_list (V4HI_type_node,
22386 V4HI_type_node, V4HI_type_node, NULL_TREE);
22387 tree v2si_ftype_v2si_v2si
22388 = build_function_type_list (V2SI_type_node,
22389 V2SI_type_node, V2SI_type_node, NULL_TREE);
22390 tree v1di_ftype_v1di_v1di
22391 = build_function_type_list (V1DI_type_node,
22392 V1DI_type_node, V1DI_type_node, NULL_TREE);
22393 tree v1di_ftype_v1di_v1di_int
22394 = build_function_type_list (V1DI_type_node,
22395 V1DI_type_node, V1DI_type_node,
22396 integer_type_node, NULL_TREE);
22397 tree v2si_ftype_v2sf
22398 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22399 tree v2sf_ftype_v2si
22400 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22401 tree v2si_ftype_v2si
22402 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22403 tree v2sf_ftype_v2sf
22404 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22405 tree v2sf_ftype_v2sf_v2sf
22406 = build_function_type_list (V2SF_type_node,
22407 V2SF_type_node, V2SF_type_node, NULL_TREE);
22408 tree v2si_ftype_v2sf_v2sf
22409 = build_function_type_list (V2SI_type_node,
22410 V2SF_type_node, V2SF_type_node, NULL_TREE);
22411 tree pint_type_node = build_pointer_type (integer_type_node);
22412 tree pdouble_type_node = build_pointer_type (double_type_node);
22413 tree pcdouble_type_node = build_pointer_type (
22414 build_type_variant (double_type_node, 1, 0));
22415 tree int_ftype_v2df_v2df
22416 = build_function_type_list (integer_type_node,
22417 V2DF_type_node, V2DF_type_node, NULL_TREE);
22419 tree void_ftype_pcvoid
22420 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22421 tree v4sf_ftype_v4si
22422 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22423 tree v4si_ftype_v4sf
22424 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22425 tree v2df_ftype_v4si
22426 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22427 tree v4si_ftype_v2df
22428 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22429 tree v4si_ftype_v2df_v2df
22430 = build_function_type_list (V4SI_type_node,
22431 V2DF_type_node, V2DF_type_node, NULL_TREE);
22432 tree v2si_ftype_v2df
22433 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22434 tree v4sf_ftype_v2df
22435 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22436 tree v2df_ftype_v2si
22437 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22438 tree v2df_ftype_v4sf
22439 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22440 tree int_ftype_v2df
22441 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22442 tree int64_ftype_v2df
22443 = build_function_type_list (long_long_integer_type_node,
22444 V2DF_type_node, NULL_TREE);
22445 tree v2df_ftype_v2df_int
22446 = build_function_type_list (V2DF_type_node,
22447 V2DF_type_node, integer_type_node, NULL_TREE);
22448 tree v2df_ftype_v2df_int64
22449 = build_function_type_list (V2DF_type_node,
22450 V2DF_type_node, long_long_integer_type_node,
22452 tree v4sf_ftype_v4sf_v2df
22453 = build_function_type_list (V4SF_type_node,
22454 V4SF_type_node, V2DF_type_node, NULL_TREE);
22455 tree v2df_ftype_v2df_v4sf
22456 = build_function_type_list (V2DF_type_node,
22457 V2DF_type_node, V4SF_type_node, NULL_TREE);
22458 tree v2df_ftype_v2df_v2df_int
22459 = build_function_type_list (V2DF_type_node,
22460 V2DF_type_node, V2DF_type_node,
22463 tree v2df_ftype_v2df_pcdouble
22464 = build_function_type_list (V2DF_type_node,
22465 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22466 tree void_ftype_pdouble_v2df
22467 = build_function_type_list (void_type_node,
22468 pdouble_type_node, V2DF_type_node, NULL_TREE);
22469 tree void_ftype_pint_int
22470 = build_function_type_list (void_type_node,
22471 pint_type_node, integer_type_node, NULL_TREE);
22472 tree void_ftype_v16qi_v16qi_pchar
22473 = build_function_type_list (void_type_node,
22474 V16QI_type_node, V16QI_type_node,
22475 pchar_type_node, NULL_TREE);
22476 tree v2df_ftype_pcdouble
22477 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22478 tree v2df_ftype_v2df_v2df
22479 = build_function_type_list (V2DF_type_node,
22480 V2DF_type_node, V2DF_type_node, NULL_TREE);
22481 tree v16qi_ftype_v16qi_v16qi
22482 = build_function_type_list (V16QI_type_node,
22483 V16QI_type_node, V16QI_type_node, NULL_TREE);
22484 tree v8hi_ftype_v8hi_v8hi
22485 = build_function_type_list (V8HI_type_node,
22486 V8HI_type_node, V8HI_type_node, NULL_TREE);
22487 tree v4si_ftype_v4si_v4si
22488 = build_function_type_list (V4SI_type_node,
22489 V4SI_type_node, V4SI_type_node, NULL_TREE);
22490 tree v2di_ftype_v2di_v2di
22491 = build_function_type_list (V2DI_type_node,
22492 V2DI_type_node, V2DI_type_node, NULL_TREE);
22493 tree v2di_ftype_v2df_v2df
22494 = build_function_type_list (V2DI_type_node,
22495 V2DF_type_node, V2DF_type_node, NULL_TREE);
22496 tree v2df_ftype_v2df
22497 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22498 tree v2di_ftype_v2di_int
22499 = build_function_type_list (V2DI_type_node,
22500 V2DI_type_node, integer_type_node, NULL_TREE);
22501 tree v2di_ftype_v2di_v2di_int
22502 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22503 V2DI_type_node, integer_type_node, NULL_TREE);
22504 tree v4si_ftype_v4si_int
22505 = build_function_type_list (V4SI_type_node,
22506 V4SI_type_node, integer_type_node, NULL_TREE);
22507 tree v8hi_ftype_v8hi_int
22508 = build_function_type_list (V8HI_type_node,
22509 V8HI_type_node, integer_type_node, NULL_TREE);
22510 tree v4si_ftype_v8hi_v8hi
22511 = build_function_type_list (V4SI_type_node,
22512 V8HI_type_node, V8HI_type_node, NULL_TREE);
22513 tree v1di_ftype_v8qi_v8qi
22514 = build_function_type_list (V1DI_type_node,
22515 V8QI_type_node, V8QI_type_node, NULL_TREE);
22516 tree v1di_ftype_v2si_v2si
22517 = build_function_type_list (V1DI_type_node,
22518 V2SI_type_node, V2SI_type_node, NULL_TREE);
22519 tree v2di_ftype_v16qi_v16qi
22520 = build_function_type_list (V2DI_type_node,
22521 V16QI_type_node, V16QI_type_node, NULL_TREE);
22522 tree v2di_ftype_v4si_v4si
22523 = build_function_type_list (V2DI_type_node,
22524 V4SI_type_node, V4SI_type_node, NULL_TREE);
22525 tree int_ftype_v16qi
22526 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22527 tree v16qi_ftype_pcchar
22528 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22529 tree void_ftype_pchar_v16qi
22530 = build_function_type_list (void_type_node,
22531 pchar_type_node, V16QI_type_node, NULL_TREE);
22533 tree v2di_ftype_v2di_unsigned_unsigned
22534 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22535 unsigned_type_node, unsigned_type_node,
22537 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22538 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22539 unsigned_type_node, unsigned_type_node,
22541 tree v2di_ftype_v2di_v16qi
22542 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22544 tree v2df_ftype_v2df_v2df_v2df
22545 = build_function_type_list (V2DF_type_node,
22546 V2DF_type_node, V2DF_type_node,
22547 V2DF_type_node, NULL_TREE);
22548 tree v4sf_ftype_v4sf_v4sf_v4sf
22549 = build_function_type_list (V4SF_type_node,
22550 V4SF_type_node, V4SF_type_node,
22551 V4SF_type_node, NULL_TREE);
22552 tree v8hi_ftype_v16qi
22553 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22555 tree v4si_ftype_v16qi
22556 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22558 tree v2di_ftype_v16qi
22559 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22561 tree v4si_ftype_v8hi
22562 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22564 tree v2di_ftype_v8hi
22565 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22567 tree v2di_ftype_v4si
22568 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22570 tree v2di_ftype_pv2di
22571 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22573 tree v16qi_ftype_v16qi_v16qi_int
22574 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22575 V16QI_type_node, integer_type_node,
22577 tree v16qi_ftype_v16qi_v16qi_v16qi
22578 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22579 V16QI_type_node, V16QI_type_node,
22581 tree v8hi_ftype_v8hi_v8hi_int
22582 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22583 V8HI_type_node, integer_type_node,
22585 tree v4si_ftype_v4si_v4si_int
22586 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22587 V4SI_type_node, integer_type_node,
22589 tree int_ftype_v2di_v2di
22590 = build_function_type_list (integer_type_node,
22591 V2DI_type_node, V2DI_type_node,
22593 tree int_ftype_v16qi_int_v16qi_int_int
22594 = build_function_type_list (integer_type_node,
22601 tree v16qi_ftype_v16qi_int_v16qi_int_int
22602 = build_function_type_list (V16QI_type_node,
22609 tree int_ftype_v16qi_v16qi_int
22610 = build_function_type_list (integer_type_node,
22616 /* SSE5 instructions */
22617 tree v2di_ftype_v2di_v2di_v2di
22618 = build_function_type_list (V2DI_type_node,
22624 tree v4si_ftype_v4si_v4si_v4si
22625 = build_function_type_list (V4SI_type_node,
22631 tree v4si_ftype_v4si_v4si_v2di
22632 = build_function_type_list (V4SI_type_node,
22638 tree v8hi_ftype_v8hi_v8hi_v8hi
22639 = build_function_type_list (V8HI_type_node,
22645 tree v8hi_ftype_v8hi_v8hi_v4si
22646 = build_function_type_list (V8HI_type_node,
22652 tree v2df_ftype_v2df_v2df_v16qi
22653 = build_function_type_list (V2DF_type_node,
22659 tree v4sf_ftype_v4sf_v4sf_v16qi
22660 = build_function_type_list (V4SF_type_node,
22666 tree v2di_ftype_v2di_si
22667 = build_function_type_list (V2DI_type_node,
22672 tree v4si_ftype_v4si_si
22673 = build_function_type_list (V4SI_type_node,
22678 tree v8hi_ftype_v8hi_si
22679 = build_function_type_list (V8HI_type_node,
22684 tree v16qi_ftype_v16qi_si
22685 = build_function_type_list (V16QI_type_node,
22689 tree v4sf_ftype_v4hi
22690 = build_function_type_list (V4SF_type_node,
22694 tree v4hi_ftype_v4sf
22695 = build_function_type_list (V4HI_type_node,
22699 tree v2di_ftype_v2di
22700 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22702 tree v16qi_ftype_v8hi_v8hi
22703 = build_function_type_list (V16QI_type_node,
22704 V8HI_type_node, V8HI_type_node,
22706 tree v8hi_ftype_v4si_v4si
22707 = build_function_type_list (V8HI_type_node,
22708 V4SI_type_node, V4SI_type_node,
22710 tree v8hi_ftype_v16qi_v16qi
22711 = build_function_type_list (V8HI_type_node,
22712 V16QI_type_node, V16QI_type_node,
22714 tree v4hi_ftype_v8qi_v8qi
22715 = build_function_type_list (V4HI_type_node,
22716 V8QI_type_node, V8QI_type_node,
22718 tree unsigned_ftype_unsigned_uchar
22719 = build_function_type_list (unsigned_type_node,
22720 unsigned_type_node,
22721 unsigned_char_type_node,
22723 tree unsigned_ftype_unsigned_ushort
22724 = build_function_type_list (unsigned_type_node,
22725 unsigned_type_node,
22726 short_unsigned_type_node,
22728 tree unsigned_ftype_unsigned_unsigned
22729 = build_function_type_list (unsigned_type_node,
22730 unsigned_type_node,
22731 unsigned_type_node,
22733 tree uint64_ftype_uint64_uint64
22734 = build_function_type_list (long_long_unsigned_type_node,
22735 long_long_unsigned_type_node,
22736 long_long_unsigned_type_node,
22738 tree float_ftype_float
22739 = build_function_type_list (float_type_node,
22744 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22746 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22748 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22750 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22752 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22754 tree v8sf_ftype_v8sf
22755 = build_function_type_list (V8SF_type_node,
22758 tree v8si_ftype_v8sf
22759 = build_function_type_list (V8SI_type_node,
22762 tree v8sf_ftype_v8si
22763 = build_function_type_list (V8SF_type_node,
22766 tree v4si_ftype_v4df
22767 = build_function_type_list (V4SI_type_node,
22770 tree v4df_ftype_v4df
22771 = build_function_type_list (V4DF_type_node,
22774 tree v4df_ftype_v4si
22775 = build_function_type_list (V4DF_type_node,
22778 tree v4df_ftype_v4sf
22779 = build_function_type_list (V4DF_type_node,
22782 tree v4sf_ftype_v4df
22783 = build_function_type_list (V4SF_type_node,
22786 tree v8sf_ftype_v8sf_v8sf
22787 = build_function_type_list (V8SF_type_node,
22788 V8SF_type_node, V8SF_type_node,
22790 tree v4df_ftype_v4df_v4df
22791 = build_function_type_list (V4DF_type_node,
22792 V4DF_type_node, V4DF_type_node,
22794 tree v8sf_ftype_v8sf_int
22795 = build_function_type_list (V8SF_type_node,
22796 V8SF_type_node, integer_type_node,
22798 tree v4si_ftype_v8si_int
22799 = build_function_type_list (V4SI_type_node,
22800 V8SI_type_node, integer_type_node,
22802 tree v4df_ftype_v4df_int
22803 = build_function_type_list (V4DF_type_node,
22804 V4DF_type_node, integer_type_node,
22806 tree v4sf_ftype_v8sf_int
22807 = build_function_type_list (V4SF_type_node,
22808 V8SF_type_node, integer_type_node,
22810 tree v2df_ftype_v4df_int
22811 = build_function_type_list (V2DF_type_node,
22812 V4DF_type_node, integer_type_node,
22814 tree v8sf_ftype_v8sf_v8sf_int
22815 = build_function_type_list (V8SF_type_node,
22816 V8SF_type_node, V8SF_type_node,
22819 tree v8sf_ftype_v8sf_v8sf_v8sf
22820 = build_function_type_list (V8SF_type_node,
22821 V8SF_type_node, V8SF_type_node,
22824 tree v4df_ftype_v4df_v4df_v4df
22825 = build_function_type_list (V4DF_type_node,
22826 V4DF_type_node, V4DF_type_node,
22829 tree v8si_ftype_v8si_v8si_int
22830 = build_function_type_list (V8SI_type_node,
22831 V8SI_type_node, V8SI_type_node,
22834 tree v4df_ftype_v4df_v4df_int
22835 = build_function_type_list (V4DF_type_node,
22836 V4DF_type_node, V4DF_type_node,
22839 tree v8sf_ftype_pcfloat
22840 = build_function_type_list (V8SF_type_node,
22843 tree v4df_ftype_pcdouble
22844 = build_function_type_list (V4DF_type_node,
22845 pcdouble_type_node,
22847 tree pcv4sf_type_node
22848 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22849 tree pcv2df_type_node
22850 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22851 tree v8sf_ftype_pcv4sf
22852 = build_function_type_list (V8SF_type_node,
22855 tree v4df_ftype_pcv2df
22856 = build_function_type_list (V4DF_type_node,
22859 tree v32qi_ftype_pcchar
22860 = build_function_type_list (V32QI_type_node,
22863 tree void_ftype_pchar_v32qi
22864 = build_function_type_list (void_type_node,
22865 pchar_type_node, V32QI_type_node,
22867 tree v8si_ftype_v8si_v4si_int
22868 = build_function_type_list (V8SI_type_node,
22869 V8SI_type_node, V4SI_type_node,
22872 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22873 tree void_ftype_pv4di_v4di
22874 = build_function_type_list (void_type_node,
22875 pv4di_type_node, V4DI_type_node,
22877 tree v8sf_ftype_v8sf_v4sf_int
22878 = build_function_type_list (V8SF_type_node,
22879 V8SF_type_node, V4SF_type_node,
22882 tree v4df_ftype_v4df_v2df_int
22883 = build_function_type_list (V4DF_type_node,
22884 V4DF_type_node, V2DF_type_node,
22887 tree void_ftype_pfloat_v8sf
22888 = build_function_type_list (void_type_node,
22889 pfloat_type_node, V8SF_type_node,
22891 tree void_ftype_pdouble_v4df
22892 = build_function_type_list (void_type_node,
22893 pdouble_type_node, V4DF_type_node,
22895 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22896 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22897 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22898 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22899 tree pcv8sf_type_node
22900 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22901 tree pcv4df_type_node
22902 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22903 tree v8sf_ftype_pcv8sf_v8sf
22904 = build_function_type_list (V8SF_type_node,
22905 pcv8sf_type_node, V8SF_type_node,
22907 tree v4df_ftype_pcv4df_v4df
22908 = build_function_type_list (V4DF_type_node,
22909 pcv4df_type_node, V4DF_type_node,
22911 tree v4sf_ftype_pcv4sf_v4sf
22912 = build_function_type_list (V4SF_type_node,
22913 pcv4sf_type_node, V4SF_type_node,
22915 tree v2df_ftype_pcv2df_v2df
22916 = build_function_type_list (V2DF_type_node,
22917 pcv2df_type_node, V2DF_type_node,
22919 tree void_ftype_pv8sf_v8sf_v8sf
22920 = build_function_type_list (void_type_node,
22921 pv8sf_type_node, V8SF_type_node,
22924 tree void_ftype_pv4df_v4df_v4df
22925 = build_function_type_list (void_type_node,
22926 pv4df_type_node, V4DF_type_node,
22929 tree void_ftype_pv4sf_v4sf_v4sf
22930 = build_function_type_list (void_type_node,
22931 pv4sf_type_node, V4SF_type_node,
22934 tree void_ftype_pv2df_v2df_v2df
22935 = build_function_type_list (void_type_node,
22936 pv2df_type_node, V2DF_type_node,
22939 tree v4df_ftype_v2df
22940 = build_function_type_list (V4DF_type_node,
22943 tree v8sf_ftype_v4sf
22944 = build_function_type_list (V8SF_type_node,
22947 tree v8si_ftype_v4si
22948 = build_function_type_list (V8SI_type_node,
22951 tree v2df_ftype_v4df
22952 = build_function_type_list (V2DF_type_node,
22955 tree v4sf_ftype_v8sf
22956 = build_function_type_list (V4SF_type_node,
22959 tree v4si_ftype_v8si
22960 = build_function_type_list (V4SI_type_node,
22963 tree int_ftype_v4df
22964 = build_function_type_list (integer_type_node,
22967 tree int_ftype_v8sf
22968 = build_function_type_list (integer_type_node,
22971 tree int_ftype_v8sf_v8sf
22972 = build_function_type_list (integer_type_node,
22973 V8SF_type_node, V8SF_type_node,
22975 tree int_ftype_v4di_v4di
22976 = build_function_type_list (integer_type_node,
22977 V4DI_type_node, V4DI_type_node,
22979 tree int_ftype_v4df_v4df
22980 = build_function_type_list (integer_type_node,
22981 V4DF_type_node, V4DF_type_node,
22983 tree v8sf_ftype_v8sf_v8si
22984 = build_function_type_list (V8SF_type_node,
22985 V8SF_type_node, V8SI_type_node,
22987 tree v4df_ftype_v4df_v4di
22988 = build_function_type_list (V4DF_type_node,
22989 V4DF_type_node, V4DI_type_node,
22991 tree v4sf_ftype_v4sf_v4si
22992 = build_function_type_list (V4SF_type_node,
22993 V4SF_type_node, V4SI_type_node, NULL_TREE);
22994 tree v2df_ftype_v2df_v2di
22995 = build_function_type_list (V2DF_type_node,
22996 V2DF_type_node, V2DI_type_node, NULL_TREE);
23000 /* Add all special builtins with variable number of operands. */
23001 for (i = 0, d = bdesc_special_args;
23002 i < ARRAY_SIZE (bdesc_special_args);
23010 switch ((enum ix86_special_builtin_type) d->flag)
23012 case VOID_FTYPE_VOID:
23013 type = void_ftype_void;
23015 case V32QI_FTYPE_PCCHAR:
23016 type = v32qi_ftype_pcchar;
23018 case V16QI_FTYPE_PCCHAR:
23019 type = v16qi_ftype_pcchar;
23021 case V8SF_FTYPE_PCV4SF:
23022 type = v8sf_ftype_pcv4sf;
23024 case V8SF_FTYPE_PCFLOAT:
23025 type = v8sf_ftype_pcfloat;
23027 case V4DF_FTYPE_PCV2DF:
23028 type = v4df_ftype_pcv2df;
23030 case V4DF_FTYPE_PCDOUBLE:
23031 type = v4df_ftype_pcdouble;
23033 case V4SF_FTYPE_PCFLOAT:
23034 type = v4sf_ftype_pcfloat;
23036 case V2DI_FTYPE_PV2DI:
23037 type = v2di_ftype_pv2di;
23039 case V2DF_FTYPE_PCDOUBLE:
23040 type = v2df_ftype_pcdouble;
23042 case V8SF_FTYPE_PCV8SF_V8SF:
23043 type = v8sf_ftype_pcv8sf_v8sf;
23045 case V4DF_FTYPE_PCV4DF_V4DF:
23046 type = v4df_ftype_pcv4df_v4df;
23048 case V4SF_FTYPE_V4SF_PCV2SF:
23049 type = v4sf_ftype_v4sf_pcv2sf;
23051 case V4SF_FTYPE_PCV4SF_V4SF:
23052 type = v4sf_ftype_pcv4sf_v4sf;
23054 case V2DF_FTYPE_V2DF_PCDOUBLE:
23055 type = v2df_ftype_v2df_pcdouble;
23057 case V2DF_FTYPE_PCV2DF_V2DF:
23058 type = v2df_ftype_pcv2df_v2df;
23060 case VOID_FTYPE_PV2SF_V4SF:
23061 type = void_ftype_pv2sf_v4sf;
23063 case VOID_FTYPE_PV4DI_V4DI:
23064 type = void_ftype_pv4di_v4di;
23066 case VOID_FTYPE_PV2DI_V2DI:
23067 type = void_ftype_pv2di_v2di;
23069 case VOID_FTYPE_PCHAR_V32QI:
23070 type = void_ftype_pchar_v32qi;
23072 case VOID_FTYPE_PCHAR_V16QI:
23073 type = void_ftype_pchar_v16qi;
23075 case VOID_FTYPE_PFLOAT_V8SF:
23076 type = void_ftype_pfloat_v8sf;
23078 case VOID_FTYPE_PFLOAT_V4SF:
23079 type = void_ftype_pfloat_v4sf;
23081 case VOID_FTYPE_PDOUBLE_V4DF:
23082 type = void_ftype_pdouble_v4df;
23084 case VOID_FTYPE_PDOUBLE_V2DF:
23085 type = void_ftype_pdouble_v2df;
23087 case VOID_FTYPE_PDI_DI:
23088 type = void_ftype_pdi_di;
23090 case VOID_FTYPE_PINT_INT:
23091 type = void_ftype_pint_int;
23093 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23094 type = void_ftype_pv8sf_v8sf_v8sf;
23096 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23097 type = void_ftype_pv4df_v4df_v4df;
23099 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23100 type = void_ftype_pv4sf_v4sf_v4sf;
23102 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23103 type = void_ftype_pv2df_v2df_v2df;
23106 gcc_unreachable ();
23109 def_builtin (d->mask, d->name, type, d->code);
23112 /* Add all builtins with variable number of operands. */
23113 for (i = 0, d = bdesc_args;
23114 i < ARRAY_SIZE (bdesc_args);
23122 switch ((enum ix86_builtin_type) d->flag)
23124 case FLOAT_FTYPE_FLOAT:
23125 type = float_ftype_float;
23127 case INT_FTYPE_V8SF_V8SF_PTEST:
23128 type = int_ftype_v8sf_v8sf;
23130 case INT_FTYPE_V4DI_V4DI_PTEST:
23131 type = int_ftype_v4di_v4di;
23133 case INT_FTYPE_V4DF_V4DF_PTEST:
23134 type = int_ftype_v4df_v4df;
23136 case INT_FTYPE_V4SF_V4SF_PTEST:
23137 type = int_ftype_v4sf_v4sf;
23139 case INT_FTYPE_V2DI_V2DI_PTEST:
23140 type = int_ftype_v2di_v2di;
23142 case INT_FTYPE_V2DF_V2DF_PTEST:
23143 type = int_ftype_v2df_v2df;
23145 case INT64_FTYPE_V4SF:
23146 type = int64_ftype_v4sf;
23148 case INT64_FTYPE_V2DF:
23149 type = int64_ftype_v2df;
23151 case INT_FTYPE_V16QI:
23152 type = int_ftype_v16qi;
23154 case INT_FTYPE_V8QI:
23155 type = int_ftype_v8qi;
23157 case INT_FTYPE_V8SF:
23158 type = int_ftype_v8sf;
23160 case INT_FTYPE_V4DF:
23161 type = int_ftype_v4df;
23163 case INT_FTYPE_V4SF:
23164 type = int_ftype_v4sf;
23166 case INT_FTYPE_V2DF:
23167 type = int_ftype_v2df;
23169 case V16QI_FTYPE_V16QI:
23170 type = v16qi_ftype_v16qi;
23172 case V8SI_FTYPE_V8SF:
23173 type = v8si_ftype_v8sf;
23175 case V8SI_FTYPE_V4SI:
23176 type = v8si_ftype_v4si;
23178 case V8HI_FTYPE_V8HI:
23179 type = v8hi_ftype_v8hi;
23181 case V8HI_FTYPE_V16QI:
23182 type = v8hi_ftype_v16qi;
23184 case V8QI_FTYPE_V8QI:
23185 type = v8qi_ftype_v8qi;
23187 case V8SF_FTYPE_V8SF:
23188 type = v8sf_ftype_v8sf;
23190 case V8SF_FTYPE_V8SI:
23191 type = v8sf_ftype_v8si;
23193 case V8SF_FTYPE_V4SF:
23194 type = v8sf_ftype_v4sf;
23196 case V4SI_FTYPE_V4DF:
23197 type = v4si_ftype_v4df;
23199 case V4SI_FTYPE_V4SI:
23200 type = v4si_ftype_v4si;
23202 case V4SI_FTYPE_V16QI:
23203 type = v4si_ftype_v16qi;
23205 case V4SI_FTYPE_V8SI:
23206 type = v4si_ftype_v8si;
23208 case V4SI_FTYPE_V8HI:
23209 type = v4si_ftype_v8hi;
23211 case V4SI_FTYPE_V4SF:
23212 type = v4si_ftype_v4sf;
23214 case V4SI_FTYPE_V2DF:
23215 type = v4si_ftype_v2df;
23217 case V4HI_FTYPE_V4HI:
23218 type = v4hi_ftype_v4hi;
23220 case V4DF_FTYPE_V4DF:
23221 type = v4df_ftype_v4df;
23223 case V4DF_FTYPE_V4SI:
23224 type = v4df_ftype_v4si;
23226 case V4DF_FTYPE_V4SF:
23227 type = v4df_ftype_v4sf;
23229 case V4DF_FTYPE_V2DF:
23230 type = v4df_ftype_v2df;
23232 case V4SF_FTYPE_V4SF:
23233 case V4SF_FTYPE_V4SF_VEC_MERGE:
23234 type = v4sf_ftype_v4sf;
23236 case V4SF_FTYPE_V8SF:
23237 type = v4sf_ftype_v8sf;
23239 case V4SF_FTYPE_V4SI:
23240 type = v4sf_ftype_v4si;
23242 case V4SF_FTYPE_V4DF:
23243 type = v4sf_ftype_v4df;
23245 case V4SF_FTYPE_V2DF:
23246 type = v4sf_ftype_v2df;
23248 case V2DI_FTYPE_V2DI:
23249 type = v2di_ftype_v2di;
23251 case V2DI_FTYPE_V16QI:
23252 type = v2di_ftype_v16qi;
23254 case V2DI_FTYPE_V8HI:
23255 type = v2di_ftype_v8hi;
23257 case V2DI_FTYPE_V4SI:
23258 type = v2di_ftype_v4si;
23260 case V2SI_FTYPE_V2SI:
23261 type = v2si_ftype_v2si;
23263 case V2SI_FTYPE_V4SF:
23264 type = v2si_ftype_v4sf;
23266 case V2SI_FTYPE_V2DF:
23267 type = v2si_ftype_v2df;
23269 case V2SI_FTYPE_V2SF:
23270 type = v2si_ftype_v2sf;
23272 case V2DF_FTYPE_V4DF:
23273 type = v2df_ftype_v4df;
23275 case V2DF_FTYPE_V4SF:
23276 type = v2df_ftype_v4sf;
23278 case V2DF_FTYPE_V2DF:
23279 case V2DF_FTYPE_V2DF_VEC_MERGE:
23280 type = v2df_ftype_v2df;
23282 case V2DF_FTYPE_V2SI:
23283 type = v2df_ftype_v2si;
23285 case V2DF_FTYPE_V4SI:
23286 type = v2df_ftype_v4si;
23288 case V2SF_FTYPE_V2SF:
23289 type = v2sf_ftype_v2sf;
23291 case V2SF_FTYPE_V2SI:
23292 type = v2sf_ftype_v2si;
23294 case V16QI_FTYPE_V16QI_V16QI:
23295 type = v16qi_ftype_v16qi_v16qi;
23297 case V16QI_FTYPE_V8HI_V8HI:
23298 type = v16qi_ftype_v8hi_v8hi;
23300 case V8QI_FTYPE_V8QI_V8QI:
23301 type = v8qi_ftype_v8qi_v8qi;
23303 case V8QI_FTYPE_V4HI_V4HI:
23304 type = v8qi_ftype_v4hi_v4hi;
23306 case V8HI_FTYPE_V8HI_V8HI:
23307 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23308 type = v8hi_ftype_v8hi_v8hi;
23310 case V8HI_FTYPE_V16QI_V16QI:
23311 type = v8hi_ftype_v16qi_v16qi;
23313 case V8HI_FTYPE_V4SI_V4SI:
23314 type = v8hi_ftype_v4si_v4si;
23316 case V8HI_FTYPE_V8HI_SI_COUNT:
23317 type = v8hi_ftype_v8hi_int;
23319 case V8SF_FTYPE_V8SF_V8SF:
23320 type = v8sf_ftype_v8sf_v8sf;
23322 case V8SF_FTYPE_V8SF_V8SI:
23323 type = v8sf_ftype_v8sf_v8si;
23325 case V4SI_FTYPE_V4SI_V4SI:
23326 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23327 type = v4si_ftype_v4si_v4si;
23329 case V4SI_FTYPE_V8HI_V8HI:
23330 type = v4si_ftype_v8hi_v8hi;
23332 case V4SI_FTYPE_V4SF_V4SF:
23333 type = v4si_ftype_v4sf_v4sf;
23335 case V4SI_FTYPE_V2DF_V2DF:
23336 type = v4si_ftype_v2df_v2df;
23338 case V4SI_FTYPE_V4SI_SI_COUNT:
23339 type = v4si_ftype_v4si_int;
23341 case V4HI_FTYPE_V4HI_V4HI:
23342 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23343 type = v4hi_ftype_v4hi_v4hi;
23345 case V4HI_FTYPE_V8QI_V8QI:
23346 type = v4hi_ftype_v8qi_v8qi;
23348 case V4HI_FTYPE_V2SI_V2SI:
23349 type = v4hi_ftype_v2si_v2si;
23351 case V4HI_FTYPE_V4HI_SI_COUNT:
23352 type = v4hi_ftype_v4hi_int;
23354 case V4DF_FTYPE_V4DF_V4DF:
23355 type = v4df_ftype_v4df_v4df;
23357 case V4DF_FTYPE_V4DF_V4DI:
23358 type = v4df_ftype_v4df_v4di;
23360 case V4SF_FTYPE_V4SF_V4SF:
23361 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23362 type = v4sf_ftype_v4sf_v4sf;
23364 case V4SF_FTYPE_V4SF_V4SI:
23365 type = v4sf_ftype_v4sf_v4si;
23367 case V4SF_FTYPE_V4SF_V2SI:
23368 type = v4sf_ftype_v4sf_v2si;
23370 case V4SF_FTYPE_V4SF_V2DF:
23371 type = v4sf_ftype_v4sf_v2df;
23373 case V4SF_FTYPE_V4SF_DI:
23374 type = v4sf_ftype_v4sf_int64;
23376 case V4SF_FTYPE_V4SF_SI:
23377 type = v4sf_ftype_v4sf_int;
23379 case V2DI_FTYPE_V2DI_V2DI:
23380 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23381 type = v2di_ftype_v2di_v2di;
23383 case V2DI_FTYPE_V16QI_V16QI:
23384 type = v2di_ftype_v16qi_v16qi;
23386 case V2DI_FTYPE_V4SI_V4SI:
23387 type = v2di_ftype_v4si_v4si;
23389 case V2DI_FTYPE_V2DI_V16QI:
23390 type = v2di_ftype_v2di_v16qi;
23392 case V2DI_FTYPE_V2DF_V2DF:
23393 type = v2di_ftype_v2df_v2df;
23395 case V2DI_FTYPE_V2DI_SI_COUNT:
23396 type = v2di_ftype_v2di_int;
23398 case V2SI_FTYPE_V2SI_V2SI:
23399 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23400 type = v2si_ftype_v2si_v2si;
23402 case V2SI_FTYPE_V4HI_V4HI:
23403 type = v2si_ftype_v4hi_v4hi;
23405 case V2SI_FTYPE_V2SF_V2SF:
23406 type = v2si_ftype_v2sf_v2sf;
23408 case V2SI_FTYPE_V2SI_SI_COUNT:
23409 type = v2si_ftype_v2si_int;
23411 case V2DF_FTYPE_V2DF_V2DF:
23412 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23413 type = v2df_ftype_v2df_v2df;
23415 case V2DF_FTYPE_V2DF_V4SF:
23416 type = v2df_ftype_v2df_v4sf;
23418 case V2DF_FTYPE_V2DF_V2DI:
23419 type = v2df_ftype_v2df_v2di;
23421 case V2DF_FTYPE_V2DF_DI:
23422 type = v2df_ftype_v2df_int64;
23424 case V2DF_FTYPE_V2DF_SI:
23425 type = v2df_ftype_v2df_int;
23427 case V2SF_FTYPE_V2SF_V2SF:
23428 type = v2sf_ftype_v2sf_v2sf;
23430 case V1DI_FTYPE_V1DI_V1DI:
23431 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23432 type = v1di_ftype_v1di_v1di;
23434 case V1DI_FTYPE_V8QI_V8QI:
23435 type = v1di_ftype_v8qi_v8qi;
23437 case V1DI_FTYPE_V2SI_V2SI:
23438 type = v1di_ftype_v2si_v2si;
23440 case V1DI_FTYPE_V1DI_SI_COUNT:
23441 type = v1di_ftype_v1di_int;
23443 case UINT64_FTYPE_UINT64_UINT64:
23444 type = uint64_ftype_uint64_uint64;
23446 case UINT_FTYPE_UINT_UINT:
23447 type = unsigned_ftype_unsigned_unsigned;
23449 case UINT_FTYPE_UINT_USHORT:
23450 type = unsigned_ftype_unsigned_ushort;
23452 case UINT_FTYPE_UINT_UCHAR:
23453 type = unsigned_ftype_unsigned_uchar;
23455 case V8HI_FTYPE_V8HI_INT:
23456 type = v8hi_ftype_v8hi_int;
23458 case V8SF_FTYPE_V8SF_INT:
23459 type = v8sf_ftype_v8sf_int;
23461 case V4SI_FTYPE_V4SI_INT:
23462 type = v4si_ftype_v4si_int;
23464 case V4SI_FTYPE_V8SI_INT:
23465 type = v4si_ftype_v8si_int;
23467 case V4HI_FTYPE_V4HI_INT:
23468 type = v4hi_ftype_v4hi_int;
23470 case V4DF_FTYPE_V4DF_INT:
23471 type = v4df_ftype_v4df_int;
23473 case V4SF_FTYPE_V4SF_INT:
23474 type = v4sf_ftype_v4sf_int;
23476 case V4SF_FTYPE_V8SF_INT:
23477 type = v4sf_ftype_v8sf_int;
23479 case V2DI_FTYPE_V2DI_INT:
23480 case V2DI2TI_FTYPE_V2DI_INT:
23481 type = v2di_ftype_v2di_int;
23483 case V2DF_FTYPE_V2DF_INT:
23484 type = v2df_ftype_v2df_int;
23486 case V2DF_FTYPE_V4DF_INT:
23487 type = v2df_ftype_v4df_int;
23489 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23490 type = v16qi_ftype_v16qi_v16qi_v16qi;
23492 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23493 type = v8sf_ftype_v8sf_v8sf_v8sf;
23495 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23496 type = v4df_ftype_v4df_v4df_v4df;
23498 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23499 type = v4sf_ftype_v4sf_v4sf_v4sf;
23501 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23502 type = v2df_ftype_v2df_v2df_v2df;
23504 case V16QI_FTYPE_V16QI_V16QI_INT:
23505 type = v16qi_ftype_v16qi_v16qi_int;
23507 case V8SI_FTYPE_V8SI_V8SI_INT:
23508 type = v8si_ftype_v8si_v8si_int;
23510 case V8SI_FTYPE_V8SI_V4SI_INT:
23511 type = v8si_ftype_v8si_v4si_int;
23513 case V8HI_FTYPE_V8HI_V8HI_INT:
23514 type = v8hi_ftype_v8hi_v8hi_int;
23516 case V8SF_FTYPE_V8SF_V8SF_INT:
23517 type = v8sf_ftype_v8sf_v8sf_int;
23519 case V8SF_FTYPE_V8SF_V4SF_INT:
23520 type = v8sf_ftype_v8sf_v4sf_int;
23522 case V4SI_FTYPE_V4SI_V4SI_INT:
23523 type = v4si_ftype_v4si_v4si_int;
23525 case V4DF_FTYPE_V4DF_V4DF_INT:
23526 type = v4df_ftype_v4df_v4df_int;
23528 case V4DF_FTYPE_V4DF_V2DF_INT:
23529 type = v4df_ftype_v4df_v2df_int;
23531 case V4SF_FTYPE_V4SF_V4SF_INT:
23532 type = v4sf_ftype_v4sf_v4sf_int;
23534 case V2DI_FTYPE_V2DI_V2DI_INT:
23535 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23536 type = v2di_ftype_v2di_v2di_int;
23538 case V2DF_FTYPE_V2DF_V2DF_INT:
23539 type = v2df_ftype_v2df_v2df_int;
23541 case V2DI_FTYPE_V2DI_UINT_UINT:
23542 type = v2di_ftype_v2di_unsigned_unsigned;
23544 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23545 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23547 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23548 type = v1di_ftype_v1di_v1di_int;
23551 gcc_unreachable ();
23554 def_builtin_const (d->mask, d->name, type, d->code);
23557 /* pcmpestr[im] insns. */
23558 for (i = 0, d = bdesc_pcmpestr;
23559 i < ARRAY_SIZE (bdesc_pcmpestr);
23562 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23563 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23565 ftype = int_ftype_v16qi_int_v16qi_int_int;
23566 def_builtin_const (d->mask, d->name, ftype, d->code);
23569 /* pcmpistr[im] insns. */
23570 for (i = 0, d = bdesc_pcmpistr;
23571 i < ARRAY_SIZE (bdesc_pcmpistr);
23574 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23575 ftype = v16qi_ftype_v16qi_v16qi_int;
23577 ftype = int_ftype_v16qi_v16qi_int;
23578 def_builtin_const (d->mask, d->name, ftype, d->code);
23581 /* comi/ucomi insns. */
23582 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23583 if (d->mask == OPTION_MASK_ISA_SSE2)
23584 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23586 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23589 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23590 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23592 /* SSE or 3DNow!A */
23593 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23596 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23598 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23599 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23602 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23603 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23606 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23607 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23608 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23609 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23610 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23611 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23614 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23617 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23618 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23620 /* Access to the vec_init patterns. */
23621 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23622 integer_type_node, NULL_TREE);
23623 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23625 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23626 short_integer_type_node,
23627 short_integer_type_node,
23628 short_integer_type_node, NULL_TREE);
23629 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23631 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23632 char_type_node, char_type_node,
23633 char_type_node, char_type_node,
23634 char_type_node, char_type_node,
23635 char_type_node, NULL_TREE);
23636 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23638 /* Access to the vec_extract patterns. */
23639 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23640 integer_type_node, NULL_TREE);
23641 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23643 ftype = build_function_type_list (long_long_integer_type_node,
23644 V2DI_type_node, integer_type_node,
23646 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23648 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23649 integer_type_node, NULL_TREE);
23650 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23652 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23653 integer_type_node, NULL_TREE);
23654 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23656 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23657 integer_type_node, NULL_TREE);
23658 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23660 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23661 integer_type_node, NULL_TREE);
23662 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23664 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23665 integer_type_node, NULL_TREE);
23666 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23668 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23669 integer_type_node, NULL_TREE);
23670 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23672 /* Access to the vec_set patterns. */
23673 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23675 integer_type_node, NULL_TREE);
23676 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23678 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23680 integer_type_node, NULL_TREE);
23681 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23683 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23685 integer_type_node, NULL_TREE);
23686 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23688 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23690 integer_type_node, NULL_TREE);
23691 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23693 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23695 integer_type_node, NULL_TREE);
23696 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23698 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23700 integer_type_node, NULL_TREE);
23701 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23703 /* Add SSE5 multi-arg argument instructions */
23704 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23706 tree mtype = NULL_TREE;
23711 switch ((enum multi_arg_type)d->flag)
23713 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23714 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23715 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23716 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23717 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23718 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23719 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23720 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23721 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23722 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23723 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23724 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23725 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23726 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23727 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23728 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23729 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23730 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23731 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23732 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23733 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23734 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23735 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23736 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23737 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23738 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23739 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23740 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23741 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23742 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23743 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23744 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23745 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23746 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23747 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23748 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23749 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23750 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23751 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23752 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23753 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23754 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23755 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23756 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23757 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23758 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23759 case MULTI_ARG_UNKNOWN:
23761 gcc_unreachable ();
23765 def_builtin_const (d->mask, d->name, mtype, d->code);
23769 /* Internal method for ix86_init_builtins. */
23772 ix86_init_builtins_va_builtins_abi (void)
23774 tree ms_va_ref, sysv_va_ref;
23775 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23776 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23777 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23778 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23782 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23783 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23784 ms_va_ref = build_reference_type (ms_va_list_type_node);
23786 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23789 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23790 fnvoid_va_start_ms =
23791 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23792 fnvoid_va_end_sysv =
23793 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23794 fnvoid_va_start_sysv =
23795 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23797 fnvoid_va_copy_ms =
23798 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23800 fnvoid_va_copy_sysv =
23801 build_function_type_list (void_type_node, sysv_va_ref,
23802 sysv_va_ref, NULL_TREE);
23804 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23805 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23806 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23807 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23808 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23809 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23810 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23811 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23812 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23813 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23814 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23815 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23819 ix86_init_builtins (void)
23821 tree float128_type_node = make_node (REAL_TYPE);
23824 /* The __float80 type. */
23825 if (TYPE_MODE (long_double_type_node) == XFmode)
23826 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23830 /* The __float80 type. */
23831 tree float80_type_node = make_node (REAL_TYPE);
23833 TYPE_PRECISION (float80_type_node) = 80;
23834 layout_type (float80_type_node);
23835 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23839 /* The __float128 type. */
23840 TYPE_PRECISION (float128_type_node) = 128;
23841 layout_type (float128_type_node);
23842 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23845 /* TFmode support builtins. */
23846 ftype = build_function_type (float128_type_node, void_list_node);
23847 decl = add_builtin_function ("__builtin_infq", ftype,
23848 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23850 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23852 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23853 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23855 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23857 /* We will expand them to normal call if SSE2 isn't available since
23858 they are used by libgcc. */
23859 ftype = build_function_type_list (float128_type_node,
23860 float128_type_node,
23862 decl = add_builtin_function ("__builtin_fabsq", ftype,
23863 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23864 "__fabstf2", NULL_TREE);
23865 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23866 TREE_READONLY (decl) = 1;
23868 ftype = build_function_type_list (float128_type_node,
23869 float128_type_node,
23870 float128_type_node,
23872 decl = add_builtin_function ("__builtin_copysignq", ftype,
23873 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23874 "__copysigntf3", NULL_TREE);
23875 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23876 TREE_READONLY (decl) = 1;
23878 ix86_init_mmx_sse_builtins ();
23880 ix86_init_builtins_va_builtins_abi ();
23883 /* Errors in the source file can cause expand_expr to return const0_rtx
23884 where we expect a vector. To avoid crashing, use one of the vector
23885 clear instructions. */
23887 safe_vector_operand (rtx x, enum machine_mode mode)
23889 if (x == const0_rtx)
23890 x = CONST0_RTX (mode);
23894 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23897 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23900 tree arg0 = CALL_EXPR_ARG (exp, 0);
23901 tree arg1 = CALL_EXPR_ARG (exp, 1);
23902 rtx op0 = expand_normal (arg0);
23903 rtx op1 = expand_normal (arg1);
23904 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23905 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23906 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23908 if (VECTOR_MODE_P (mode0))
23909 op0 = safe_vector_operand (op0, mode0);
23910 if (VECTOR_MODE_P (mode1))
23911 op1 = safe_vector_operand (op1, mode1);
23913 if (optimize || !target
23914 || GET_MODE (target) != tmode
23915 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23916 target = gen_reg_rtx (tmode);
23918 if (GET_MODE (op1) == SImode && mode1 == TImode)
23920 rtx x = gen_reg_rtx (V4SImode);
23921 emit_insn (gen_sse2_loadd (x, op1));
23922 op1 = gen_lowpart (TImode, x);
23925 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23926 op0 = copy_to_mode_reg (mode0, op0);
23927 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23928 op1 = copy_to_mode_reg (mode1, op1);
23930 pat = GEN_FCN (icode) (target, op0, op1);
23939 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23942 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23943 enum multi_arg_type m_type,
23944 enum rtx_code sub_code)
23949 bool comparison_p = false;
23951 bool last_arg_constant = false;
23952 int num_memory = 0;
23955 enum machine_mode mode;
23958 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23962 case MULTI_ARG_3_SF:
23963 case MULTI_ARG_3_DF:
23964 case MULTI_ARG_3_DI:
23965 case MULTI_ARG_3_SI:
23966 case MULTI_ARG_3_SI_DI:
23967 case MULTI_ARG_3_HI:
23968 case MULTI_ARG_3_HI_SI:
23969 case MULTI_ARG_3_QI:
23970 case MULTI_ARG_3_PERMPS:
23971 case MULTI_ARG_3_PERMPD:
23975 case MULTI_ARG_2_SF:
23976 case MULTI_ARG_2_DF:
23977 case MULTI_ARG_2_DI:
23978 case MULTI_ARG_2_SI:
23979 case MULTI_ARG_2_HI:
23980 case MULTI_ARG_2_QI:
23984 case MULTI_ARG_2_DI_IMM:
23985 case MULTI_ARG_2_SI_IMM:
23986 case MULTI_ARG_2_HI_IMM:
23987 case MULTI_ARG_2_QI_IMM:
23989 last_arg_constant = true;
23992 case MULTI_ARG_1_SF:
23993 case MULTI_ARG_1_DF:
23994 case MULTI_ARG_1_DI:
23995 case MULTI_ARG_1_SI:
23996 case MULTI_ARG_1_HI:
23997 case MULTI_ARG_1_QI:
23998 case MULTI_ARG_1_SI_DI:
23999 case MULTI_ARG_1_HI_DI:
24000 case MULTI_ARG_1_HI_SI:
24001 case MULTI_ARG_1_QI_DI:
24002 case MULTI_ARG_1_QI_SI:
24003 case MULTI_ARG_1_QI_HI:
24004 case MULTI_ARG_1_PH2PS:
24005 case MULTI_ARG_1_PS2PH:
24009 case MULTI_ARG_2_SF_CMP:
24010 case MULTI_ARG_2_DF_CMP:
24011 case MULTI_ARG_2_DI_CMP:
24012 case MULTI_ARG_2_SI_CMP:
24013 case MULTI_ARG_2_HI_CMP:
24014 case MULTI_ARG_2_QI_CMP:
24016 comparison_p = true;
24019 case MULTI_ARG_2_SF_TF:
24020 case MULTI_ARG_2_DF_TF:
24021 case MULTI_ARG_2_DI_TF:
24022 case MULTI_ARG_2_SI_TF:
24023 case MULTI_ARG_2_HI_TF:
24024 case MULTI_ARG_2_QI_TF:
24029 case MULTI_ARG_UNKNOWN:
24031 gcc_unreachable ();
24034 if (optimize || !target
24035 || GET_MODE (target) != tmode
24036 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24037 target = gen_reg_rtx (tmode);
24039 gcc_assert (nargs <= 4);
24041 for (i = 0; i < nargs; i++)
24043 tree arg = CALL_EXPR_ARG (exp, i);
24044 rtx op = expand_normal (arg);
24045 int adjust = (comparison_p) ? 1 : 0;
24046 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24048 if (last_arg_constant && i == nargs-1)
24050 if (GET_CODE (op) != CONST_INT)
24052 error ("last argument must be an immediate");
24053 return gen_reg_rtx (tmode);
24058 if (VECTOR_MODE_P (mode))
24059 op = safe_vector_operand (op, mode);
24061 /* If we aren't optimizing, only allow one memory operand to be
24063 if (memory_operand (op, mode))
24066 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24069 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24071 op = force_reg (mode, op);
24075 args[i].mode = mode;
24081 pat = GEN_FCN (icode) (target, args[0].op);
24086 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24087 GEN_INT ((int)sub_code));
24088 else if (! comparison_p)
24089 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24092 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24096 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24101 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24105 gcc_unreachable ();
24115 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24116 insns with vec_merge. */
24119 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24123 tree arg0 = CALL_EXPR_ARG (exp, 0);
24124 rtx op1, op0 = expand_normal (arg0);
24125 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24126 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24128 if (optimize || !target
24129 || GET_MODE (target) != tmode
24130 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24131 target = gen_reg_rtx (tmode);
24133 if (VECTOR_MODE_P (mode0))
24134 op0 = safe_vector_operand (op0, mode0);
24136 if ((optimize && !register_operand (op0, mode0))
24137 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24138 op0 = copy_to_mode_reg (mode0, op0);
24141 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24142 op1 = copy_to_mode_reg (mode0, op1);
24144 pat = GEN_FCN (icode) (target, op0, op1);
24151 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24154 ix86_expand_sse_compare (const struct builtin_description *d,
24155 tree exp, rtx target, bool swap)
24158 tree arg0 = CALL_EXPR_ARG (exp, 0);
24159 tree arg1 = CALL_EXPR_ARG (exp, 1);
24160 rtx op0 = expand_normal (arg0);
24161 rtx op1 = expand_normal (arg1);
24163 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24164 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24165 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24166 enum rtx_code comparison = d->comparison;
24168 if (VECTOR_MODE_P (mode0))
24169 op0 = safe_vector_operand (op0, mode0);
24170 if (VECTOR_MODE_P (mode1))
24171 op1 = safe_vector_operand (op1, mode1);
24173 /* Swap operands if we have a comparison that isn't available in
24177 rtx tmp = gen_reg_rtx (mode1);
24178 emit_move_insn (tmp, op1);
24183 if (optimize || !target
24184 || GET_MODE (target) != tmode
24185 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24186 target = gen_reg_rtx (tmode);
24188 if ((optimize && !register_operand (op0, mode0))
24189 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24190 op0 = copy_to_mode_reg (mode0, op0);
24191 if ((optimize && !register_operand (op1, mode1))
24192 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24193 op1 = copy_to_mode_reg (mode1, op1);
24195 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24196 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24203 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24206 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24210 tree arg0 = CALL_EXPR_ARG (exp, 0);
24211 tree arg1 = CALL_EXPR_ARG (exp, 1);
24212 rtx op0 = expand_normal (arg0);
24213 rtx op1 = expand_normal (arg1);
24214 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24215 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24216 enum rtx_code comparison = d->comparison;
24218 if (VECTOR_MODE_P (mode0))
24219 op0 = safe_vector_operand (op0, mode0);
24220 if (VECTOR_MODE_P (mode1))
24221 op1 = safe_vector_operand (op1, mode1);
24223 /* Swap operands if we have a comparison that isn't available in
24225 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24232 target = gen_reg_rtx (SImode);
24233 emit_move_insn (target, const0_rtx);
24234 target = gen_rtx_SUBREG (QImode, target, 0);
24236 if ((optimize && !register_operand (op0, mode0))
24237 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24238 op0 = copy_to_mode_reg (mode0, op0);
24239 if ((optimize && !register_operand (op1, mode1))
24240 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24241 op1 = copy_to_mode_reg (mode1, op1);
24243 pat = GEN_FCN (d->icode) (op0, op1);
24247 emit_insn (gen_rtx_SET (VOIDmode,
24248 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24249 gen_rtx_fmt_ee (comparison, QImode,
24253 return SUBREG_REG (target);
24256 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24259 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24263 tree arg0 = CALL_EXPR_ARG (exp, 0);
24264 tree arg1 = CALL_EXPR_ARG (exp, 1);
24265 rtx op0 = expand_normal (arg0);
24266 rtx op1 = expand_normal (arg1);
24267 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24268 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24269 enum rtx_code comparison = d->comparison;
24271 if (VECTOR_MODE_P (mode0))
24272 op0 = safe_vector_operand (op0, mode0);
24273 if (VECTOR_MODE_P (mode1))
24274 op1 = safe_vector_operand (op1, mode1);
24276 target = gen_reg_rtx (SImode);
24277 emit_move_insn (target, const0_rtx);
24278 target = gen_rtx_SUBREG (QImode, target, 0);
24280 if ((optimize && !register_operand (op0, mode0))
24281 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24282 op0 = copy_to_mode_reg (mode0, op0);
24283 if ((optimize && !register_operand (op1, mode1))
24284 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24285 op1 = copy_to_mode_reg (mode1, op1);
24287 pat = GEN_FCN (d->icode) (op0, op1);
24291 emit_insn (gen_rtx_SET (VOIDmode,
24292 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24293 gen_rtx_fmt_ee (comparison, QImode,
24297 return SUBREG_REG (target);
24300 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24303 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24304 tree exp, rtx target)
24307 tree arg0 = CALL_EXPR_ARG (exp, 0);
24308 tree arg1 = CALL_EXPR_ARG (exp, 1);
24309 tree arg2 = CALL_EXPR_ARG (exp, 2);
24310 tree arg3 = CALL_EXPR_ARG (exp, 3);
24311 tree arg4 = CALL_EXPR_ARG (exp, 4);
24312 rtx scratch0, scratch1;
24313 rtx op0 = expand_normal (arg0);
24314 rtx op1 = expand_normal (arg1);
24315 rtx op2 = expand_normal (arg2);
24316 rtx op3 = expand_normal (arg3);
24317 rtx op4 = expand_normal (arg4);
24318 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24320 tmode0 = insn_data[d->icode].operand[0].mode;
24321 tmode1 = insn_data[d->icode].operand[1].mode;
24322 modev2 = insn_data[d->icode].operand[2].mode;
24323 modei3 = insn_data[d->icode].operand[3].mode;
24324 modev4 = insn_data[d->icode].operand[4].mode;
24325 modei5 = insn_data[d->icode].operand[5].mode;
24326 modeimm = insn_data[d->icode].operand[6].mode;
24328 if (VECTOR_MODE_P (modev2))
24329 op0 = safe_vector_operand (op0, modev2);
24330 if (VECTOR_MODE_P (modev4))
24331 op2 = safe_vector_operand (op2, modev4);
24333 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24334 op0 = copy_to_mode_reg (modev2, op0);
24335 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24336 op1 = copy_to_mode_reg (modei3, op1);
24337 if ((optimize && !register_operand (op2, modev4))
24338 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24339 op2 = copy_to_mode_reg (modev4, op2);
24340 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24341 op3 = copy_to_mode_reg (modei5, op3);
24343 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24345 error ("the fifth argument must be a 8-bit immediate");
24349 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24351 if (optimize || !target
24352 || GET_MODE (target) != tmode0
24353 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24354 target = gen_reg_rtx (tmode0);
24356 scratch1 = gen_reg_rtx (tmode1);
24358 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24360 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24362 if (optimize || !target
24363 || GET_MODE (target) != tmode1
24364 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24365 target = gen_reg_rtx (tmode1);
24367 scratch0 = gen_reg_rtx (tmode0);
24369 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24373 gcc_assert (d->flag);
24375 scratch0 = gen_reg_rtx (tmode0);
24376 scratch1 = gen_reg_rtx (tmode1);
24378 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24388 target = gen_reg_rtx (SImode);
24389 emit_move_insn (target, const0_rtx);
24390 target = gen_rtx_SUBREG (QImode, target, 0);
24393 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24394 gen_rtx_fmt_ee (EQ, QImode,
24395 gen_rtx_REG ((enum machine_mode) d->flag,
24398 return SUBREG_REG (target);
24405 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24408 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24409 tree exp, rtx target)
24412 tree arg0 = CALL_EXPR_ARG (exp, 0);
24413 tree arg1 = CALL_EXPR_ARG (exp, 1);
24414 tree arg2 = CALL_EXPR_ARG (exp, 2);
24415 rtx scratch0, scratch1;
24416 rtx op0 = expand_normal (arg0);
24417 rtx op1 = expand_normal (arg1);
24418 rtx op2 = expand_normal (arg2);
24419 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24421 tmode0 = insn_data[d->icode].operand[0].mode;
24422 tmode1 = insn_data[d->icode].operand[1].mode;
24423 modev2 = insn_data[d->icode].operand[2].mode;
24424 modev3 = insn_data[d->icode].operand[3].mode;
24425 modeimm = insn_data[d->icode].operand[4].mode;
24427 if (VECTOR_MODE_P (modev2))
24428 op0 = safe_vector_operand (op0, modev2);
24429 if (VECTOR_MODE_P (modev3))
24430 op1 = safe_vector_operand (op1, modev3);
24432 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24433 op0 = copy_to_mode_reg (modev2, op0);
24434 if ((optimize && !register_operand (op1, modev3))
24435 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24436 op1 = copy_to_mode_reg (modev3, op1);
24438 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24440 error ("the third argument must be a 8-bit immediate");
24444 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24446 if (optimize || !target
24447 || GET_MODE (target) != tmode0
24448 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24449 target = gen_reg_rtx (tmode0);
24451 scratch1 = gen_reg_rtx (tmode1);
24453 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24455 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24457 if (optimize || !target
24458 || GET_MODE (target) != tmode1
24459 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24460 target = gen_reg_rtx (tmode1);
24462 scratch0 = gen_reg_rtx (tmode0);
24464 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24468 gcc_assert (d->flag);
24470 scratch0 = gen_reg_rtx (tmode0);
24471 scratch1 = gen_reg_rtx (tmode1);
24473 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24483 target = gen_reg_rtx (SImode);
24484 emit_move_insn (target, const0_rtx);
24485 target = gen_rtx_SUBREG (QImode, target, 0);
24488 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24489 gen_rtx_fmt_ee (EQ, QImode,
24490 gen_rtx_REG ((enum machine_mode) d->flag,
24493 return SUBREG_REG (target);
24499 /* Subroutine of ix86_expand_builtin to take care of insns with
24500 variable number of operands. */
24503 ix86_expand_args_builtin (const struct builtin_description *d,
24504 tree exp, rtx target)
24506 rtx pat, real_target;
24507 unsigned int i, nargs;
24508 unsigned int nargs_constant = 0;
24509 int num_memory = 0;
24513 enum machine_mode mode;
24515 bool last_arg_count = false;
24516 enum insn_code icode = d->icode;
24517 const struct insn_data *insn_p = &insn_data[icode];
24518 enum machine_mode tmode = insn_p->operand[0].mode;
24519 enum machine_mode rmode = VOIDmode;
24521 enum rtx_code comparison = d->comparison;
24523 switch ((enum ix86_builtin_type) d->flag)
24525 case INT_FTYPE_V8SF_V8SF_PTEST:
24526 case INT_FTYPE_V4DI_V4DI_PTEST:
24527 case INT_FTYPE_V4DF_V4DF_PTEST:
24528 case INT_FTYPE_V4SF_V4SF_PTEST:
24529 case INT_FTYPE_V2DI_V2DI_PTEST:
24530 case INT_FTYPE_V2DF_V2DF_PTEST:
24531 return ix86_expand_sse_ptest (d, exp, target);
24532 case FLOAT128_FTYPE_FLOAT128:
24533 case FLOAT_FTYPE_FLOAT:
24534 case INT64_FTYPE_V4SF:
24535 case INT64_FTYPE_V2DF:
24536 case INT_FTYPE_V16QI:
24537 case INT_FTYPE_V8QI:
24538 case INT_FTYPE_V8SF:
24539 case INT_FTYPE_V4DF:
24540 case INT_FTYPE_V4SF:
24541 case INT_FTYPE_V2DF:
24542 case V16QI_FTYPE_V16QI:
24543 case V8SI_FTYPE_V8SF:
24544 case V8SI_FTYPE_V4SI:
24545 case V8HI_FTYPE_V8HI:
24546 case V8HI_FTYPE_V16QI:
24547 case V8QI_FTYPE_V8QI:
24548 case V8SF_FTYPE_V8SF:
24549 case V8SF_FTYPE_V8SI:
24550 case V8SF_FTYPE_V4SF:
24551 case V4SI_FTYPE_V4SI:
24552 case V4SI_FTYPE_V16QI:
24553 case V4SI_FTYPE_V4SF:
24554 case V4SI_FTYPE_V8SI:
24555 case V4SI_FTYPE_V8HI:
24556 case V4SI_FTYPE_V4DF:
24557 case V4SI_FTYPE_V2DF:
24558 case V4HI_FTYPE_V4HI:
24559 case V4DF_FTYPE_V4DF:
24560 case V4DF_FTYPE_V4SI:
24561 case V4DF_FTYPE_V4SF:
24562 case V4DF_FTYPE_V2DF:
24563 case V4SF_FTYPE_V4SF:
24564 case V4SF_FTYPE_V4SI:
24565 case V4SF_FTYPE_V8SF:
24566 case V4SF_FTYPE_V4DF:
24567 case V4SF_FTYPE_V2DF:
24568 case V2DI_FTYPE_V2DI:
24569 case V2DI_FTYPE_V16QI:
24570 case V2DI_FTYPE_V8HI:
24571 case V2DI_FTYPE_V4SI:
24572 case V2DF_FTYPE_V2DF:
24573 case V2DF_FTYPE_V4SI:
24574 case V2DF_FTYPE_V4DF:
24575 case V2DF_FTYPE_V4SF:
24576 case V2DF_FTYPE_V2SI:
24577 case V2SI_FTYPE_V2SI:
24578 case V2SI_FTYPE_V4SF:
24579 case V2SI_FTYPE_V2SF:
24580 case V2SI_FTYPE_V2DF:
24581 case V2SF_FTYPE_V2SF:
24582 case V2SF_FTYPE_V2SI:
24585 case V4SF_FTYPE_V4SF_VEC_MERGE:
24586 case V2DF_FTYPE_V2DF_VEC_MERGE:
24587 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24588 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24589 case V16QI_FTYPE_V16QI_V16QI:
24590 case V16QI_FTYPE_V8HI_V8HI:
24591 case V8QI_FTYPE_V8QI_V8QI:
24592 case V8QI_FTYPE_V4HI_V4HI:
24593 case V8HI_FTYPE_V8HI_V8HI:
24594 case V8HI_FTYPE_V16QI_V16QI:
24595 case V8HI_FTYPE_V4SI_V4SI:
24596 case V8SF_FTYPE_V8SF_V8SF:
24597 case V8SF_FTYPE_V8SF_V8SI:
24598 case V4SI_FTYPE_V4SI_V4SI:
24599 case V4SI_FTYPE_V8HI_V8HI:
24600 case V4SI_FTYPE_V4SF_V4SF:
24601 case V4SI_FTYPE_V2DF_V2DF:
24602 case V4HI_FTYPE_V4HI_V4HI:
24603 case V4HI_FTYPE_V8QI_V8QI:
24604 case V4HI_FTYPE_V2SI_V2SI:
24605 case V4DF_FTYPE_V4DF_V4DF:
24606 case V4DF_FTYPE_V4DF_V4DI:
24607 case V4SF_FTYPE_V4SF_V4SF:
24608 case V4SF_FTYPE_V4SF_V4SI:
24609 case V4SF_FTYPE_V4SF_V2SI:
24610 case V4SF_FTYPE_V4SF_V2DF:
24611 case V4SF_FTYPE_V4SF_DI:
24612 case V4SF_FTYPE_V4SF_SI:
24613 case V2DI_FTYPE_V2DI_V2DI:
24614 case V2DI_FTYPE_V16QI_V16QI:
24615 case V2DI_FTYPE_V4SI_V4SI:
24616 case V2DI_FTYPE_V2DI_V16QI:
24617 case V2DI_FTYPE_V2DF_V2DF:
24618 case V2SI_FTYPE_V2SI_V2SI:
24619 case V2SI_FTYPE_V4HI_V4HI:
24620 case V2SI_FTYPE_V2SF_V2SF:
24621 case V2DF_FTYPE_V2DF_V2DF:
24622 case V2DF_FTYPE_V2DF_V4SF:
24623 case V2DF_FTYPE_V2DF_V2DI:
24624 case V2DF_FTYPE_V2DF_DI:
24625 case V2DF_FTYPE_V2DF_SI:
24626 case V2SF_FTYPE_V2SF_V2SF:
24627 case V1DI_FTYPE_V1DI_V1DI:
24628 case V1DI_FTYPE_V8QI_V8QI:
24629 case V1DI_FTYPE_V2SI_V2SI:
24630 if (comparison == UNKNOWN)
24631 return ix86_expand_binop_builtin (icode, exp, target);
24634 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24635 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24636 gcc_assert (comparison != UNKNOWN);
24640 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24641 case V8HI_FTYPE_V8HI_SI_COUNT:
24642 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24643 case V4SI_FTYPE_V4SI_SI_COUNT:
24644 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24645 case V4HI_FTYPE_V4HI_SI_COUNT:
24646 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24647 case V2DI_FTYPE_V2DI_SI_COUNT:
24648 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24649 case V2SI_FTYPE_V2SI_SI_COUNT:
24650 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24651 case V1DI_FTYPE_V1DI_SI_COUNT:
24653 last_arg_count = true;
24655 case UINT64_FTYPE_UINT64_UINT64:
24656 case UINT_FTYPE_UINT_UINT:
24657 case UINT_FTYPE_UINT_USHORT:
24658 case UINT_FTYPE_UINT_UCHAR:
24661 case V2DI2TI_FTYPE_V2DI_INT:
24664 nargs_constant = 1;
24666 case V8HI_FTYPE_V8HI_INT:
24667 case V8SF_FTYPE_V8SF_INT:
24668 case V4SI_FTYPE_V4SI_INT:
24669 case V4SI_FTYPE_V8SI_INT:
24670 case V4HI_FTYPE_V4HI_INT:
24671 case V4DF_FTYPE_V4DF_INT:
24672 case V4SF_FTYPE_V4SF_INT:
24673 case V4SF_FTYPE_V8SF_INT:
24674 case V2DI_FTYPE_V2DI_INT:
24675 case V2DF_FTYPE_V2DF_INT:
24676 case V2DF_FTYPE_V4DF_INT:
24678 nargs_constant = 1;
24680 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24681 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24682 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24683 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24684 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24687 case V16QI_FTYPE_V16QI_V16QI_INT:
24688 case V8HI_FTYPE_V8HI_V8HI_INT:
24689 case V8SI_FTYPE_V8SI_V8SI_INT:
24690 case V8SI_FTYPE_V8SI_V4SI_INT:
24691 case V8SF_FTYPE_V8SF_V8SF_INT:
24692 case V8SF_FTYPE_V8SF_V4SF_INT:
24693 case V4SI_FTYPE_V4SI_V4SI_INT:
24694 case V4DF_FTYPE_V4DF_V4DF_INT:
24695 case V4DF_FTYPE_V4DF_V2DF_INT:
24696 case V4SF_FTYPE_V4SF_V4SF_INT:
24697 case V2DI_FTYPE_V2DI_V2DI_INT:
24698 case V2DF_FTYPE_V2DF_V2DF_INT:
24700 nargs_constant = 1;
24702 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24705 nargs_constant = 1;
24707 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24710 nargs_constant = 1;
24712 case V2DI_FTYPE_V2DI_UINT_UINT:
24714 nargs_constant = 2;
24716 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24718 nargs_constant = 2;
24721 gcc_unreachable ();
24724 gcc_assert (nargs <= ARRAY_SIZE (args));
24726 if (comparison != UNKNOWN)
24728 gcc_assert (nargs == 2);
24729 return ix86_expand_sse_compare (d, exp, target, swap);
24732 if (rmode == VOIDmode || rmode == tmode)
24736 || GET_MODE (target) != tmode
24737 || ! (*insn_p->operand[0].predicate) (target, tmode))
24738 target = gen_reg_rtx (tmode);
24739 real_target = target;
24743 target = gen_reg_rtx (rmode);
24744 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24747 for (i = 0; i < nargs; i++)
24749 tree arg = CALL_EXPR_ARG (exp, i);
24750 rtx op = expand_normal (arg);
24751 enum machine_mode mode = insn_p->operand[i + 1].mode;
24752 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24754 if (last_arg_count && (i + 1) == nargs)
24756 /* SIMD shift insns take either an 8-bit immediate or
24757 register as count. But builtin functions take int as
24758 count. If count doesn't match, we put it in register. */
24761 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24762 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24763 op = copy_to_reg (op);
24766 else if ((nargs - i) <= nargs_constant)
24771 case CODE_FOR_sse4_1_roundpd:
24772 case CODE_FOR_sse4_1_roundps:
24773 case CODE_FOR_sse4_1_roundsd:
24774 case CODE_FOR_sse4_1_roundss:
24775 case CODE_FOR_sse4_1_blendps:
24776 case CODE_FOR_avx_blendpd256:
24777 case CODE_FOR_avx_vpermilv4df:
24778 case CODE_FOR_avx_roundpd256:
24779 case CODE_FOR_avx_roundps256:
24780 error ("the last argument must be a 4-bit immediate");
24783 case CODE_FOR_sse4_1_blendpd:
24784 case CODE_FOR_avx_vpermilv2df:
24785 error ("the last argument must be a 2-bit immediate");
24788 case CODE_FOR_avx_vextractf128v4df:
24789 case CODE_FOR_avx_vextractf128v8sf:
24790 case CODE_FOR_avx_vextractf128v8si:
24791 case CODE_FOR_avx_vinsertf128v4df:
24792 case CODE_FOR_avx_vinsertf128v8sf:
24793 case CODE_FOR_avx_vinsertf128v8si:
24794 error ("the last argument must be a 1-bit immediate");
24797 case CODE_FOR_avx_cmpsdv2df3:
24798 case CODE_FOR_avx_cmpssv4sf3:
24799 case CODE_FOR_avx_cmppdv2df3:
24800 case CODE_FOR_avx_cmppsv4sf3:
24801 case CODE_FOR_avx_cmppdv4df3:
24802 case CODE_FOR_avx_cmppsv8sf3:
24803 error ("the last argument must be a 5-bit immediate");
24807 switch (nargs_constant)
24810 if ((nargs - i) == nargs_constant)
24812 error ("the next to last argument must be an 8-bit immediate");
24816 error ("the last argument must be an 8-bit immediate");
24819 gcc_unreachable ();
24826 if (VECTOR_MODE_P (mode))
24827 op = safe_vector_operand (op, mode);
24829 /* If we aren't optimizing, only allow one memory operand to
24831 if (memory_operand (op, mode))
24834 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24836 if (optimize || !match || num_memory > 1)
24837 op = copy_to_mode_reg (mode, op);
24841 op = copy_to_reg (op);
24842 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24847 args[i].mode = mode;
24853 pat = GEN_FCN (icode) (real_target, args[0].op);
24856 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24859 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24863 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24864 args[2].op, args[3].op);
24867 gcc_unreachable ();
24877 /* Subroutine of ix86_expand_builtin to take care of special insns
24878 with variable number of operands. */
24881 ix86_expand_special_args_builtin (const struct builtin_description *d,
24882 tree exp, rtx target)
24886 unsigned int i, nargs, arg_adjust, memory;
24890 enum machine_mode mode;
24892 enum insn_code icode = d->icode;
24893 bool last_arg_constant = false;
24894 const struct insn_data *insn_p = &insn_data[icode];
24895 enum machine_mode tmode = insn_p->operand[0].mode;
24896 enum { load, store } klass;
24898 switch ((enum ix86_special_builtin_type) d->flag)
24900 case VOID_FTYPE_VOID:
24901 emit_insn (GEN_FCN (icode) (target));
24903 case V2DI_FTYPE_PV2DI:
24904 case V32QI_FTYPE_PCCHAR:
24905 case V16QI_FTYPE_PCCHAR:
24906 case V8SF_FTYPE_PCV4SF:
24907 case V8SF_FTYPE_PCFLOAT:
24908 case V4SF_FTYPE_PCFLOAT:
24909 case V4DF_FTYPE_PCV2DF:
24910 case V4DF_FTYPE_PCDOUBLE:
24911 case V2DF_FTYPE_PCDOUBLE:
24916 case VOID_FTYPE_PV2SF_V4SF:
24917 case VOID_FTYPE_PV4DI_V4DI:
24918 case VOID_FTYPE_PV2DI_V2DI:
24919 case VOID_FTYPE_PCHAR_V32QI:
24920 case VOID_FTYPE_PCHAR_V16QI:
24921 case VOID_FTYPE_PFLOAT_V8SF:
24922 case VOID_FTYPE_PFLOAT_V4SF:
24923 case VOID_FTYPE_PDOUBLE_V4DF:
24924 case VOID_FTYPE_PDOUBLE_V2DF:
24925 case VOID_FTYPE_PDI_DI:
24926 case VOID_FTYPE_PINT_INT:
24929 /* Reserve memory operand for target. */
24930 memory = ARRAY_SIZE (args);
24932 case V4SF_FTYPE_V4SF_PCV2SF:
24933 case V2DF_FTYPE_V2DF_PCDOUBLE:
24938 case V8SF_FTYPE_PCV8SF_V8SF:
24939 case V4DF_FTYPE_PCV4DF_V4DF:
24940 case V4SF_FTYPE_PCV4SF_V4SF:
24941 case V2DF_FTYPE_PCV2DF_V2DF:
24946 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24947 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24948 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24949 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24952 /* Reserve memory operand for target. */
24953 memory = ARRAY_SIZE (args);
24956 gcc_unreachable ();
24959 gcc_assert (nargs <= ARRAY_SIZE (args));
24961 if (klass == store)
24963 arg = CALL_EXPR_ARG (exp, 0);
24964 op = expand_normal (arg);
24965 gcc_assert (target == 0);
24966 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24974 || GET_MODE (target) != tmode
24975 || ! (*insn_p->operand[0].predicate) (target, tmode))
24976 target = gen_reg_rtx (tmode);
24979 for (i = 0; i < nargs; i++)
24981 enum machine_mode mode = insn_p->operand[i + 1].mode;
24984 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24985 op = expand_normal (arg);
24986 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24988 if (last_arg_constant && (i + 1) == nargs)
24994 error ("the last argument must be an 8-bit immediate");
25002 /* This must be the memory operand. */
25003 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25004 gcc_assert (GET_MODE (op) == mode
25005 || GET_MODE (op) == VOIDmode);
25009 /* This must be register. */
25010 if (VECTOR_MODE_P (mode))
25011 op = safe_vector_operand (op, mode);
25013 gcc_assert (GET_MODE (op) == mode
25014 || GET_MODE (op) == VOIDmode);
25015 op = copy_to_mode_reg (mode, op);
25020 args[i].mode = mode;
25026 pat = GEN_FCN (icode) (target, args[0].op);
25029 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25032 gcc_unreachable ();
25038 return klass == store ? 0 : target;
25041 /* Return the integer constant in ARG. Constrain it to be in the range
25042 of the subparts of VEC_TYPE; issue an error if not. */
25045 get_element_number (tree vec_type, tree arg)
25047 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25049 if (!host_integerp (arg, 1)
25050 || (elt = tree_low_cst (arg, 1), elt > max))
25052 error ("selector must be an integer constant in the range 0..%wi", max);
25059 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25060 ix86_expand_vector_init. We DO have language-level syntax for this, in
25061 the form of (type){ init-list }. Except that since we can't place emms
25062 instructions from inside the compiler, we can't allow the use of MMX
25063 registers unless the user explicitly asks for it. So we do *not* define
25064 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25065 we have builtins invoked by mmintrin.h that gives us license to emit
25066 these sorts of instructions. */
25069 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25071 enum machine_mode tmode = TYPE_MODE (type);
25072 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25073 int i, n_elt = GET_MODE_NUNITS (tmode);
25074 rtvec v = rtvec_alloc (n_elt);
25076 gcc_assert (VECTOR_MODE_P (tmode));
25077 gcc_assert (call_expr_nargs (exp) == n_elt);
25079 for (i = 0; i < n_elt; ++i)
25081 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25082 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25085 if (!target || !register_operand (target, tmode))
25086 target = gen_reg_rtx (tmode);
25088 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25092 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25093 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25094 had a language-level syntax for referencing vector elements. */
25097 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25099 enum machine_mode tmode, mode0;
25104 arg0 = CALL_EXPR_ARG (exp, 0);
25105 arg1 = CALL_EXPR_ARG (exp, 1);
25107 op0 = expand_normal (arg0);
25108 elt = get_element_number (TREE_TYPE (arg0), arg1);
25110 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25111 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25112 gcc_assert (VECTOR_MODE_P (mode0));
25114 op0 = force_reg (mode0, op0);
25116 if (optimize || !target || !register_operand (target, tmode))
25117 target = gen_reg_rtx (tmode);
25119 ix86_expand_vector_extract (true, target, op0, elt);
25124 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25125 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25126 a language-level syntax for referencing vector elements. */
25129 ix86_expand_vec_set_builtin (tree exp)
25131 enum machine_mode tmode, mode1;
25132 tree arg0, arg1, arg2;
25134 rtx op0, op1, target;
25136 arg0 = CALL_EXPR_ARG (exp, 0);
25137 arg1 = CALL_EXPR_ARG (exp, 1);
25138 arg2 = CALL_EXPR_ARG (exp, 2);
25140 tmode = TYPE_MODE (TREE_TYPE (arg0));
25141 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25142 gcc_assert (VECTOR_MODE_P (tmode));
25144 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25145 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25146 elt = get_element_number (TREE_TYPE (arg0), arg2);
25148 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25149 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25151 op0 = force_reg (tmode, op0);
25152 op1 = force_reg (mode1, op1);
25154 /* OP0 is the source of these builtin functions and shouldn't be
25155 modified. Create a copy, use it and return it as target. */
25156 target = gen_reg_rtx (tmode);
25157 emit_move_insn (target, op0);
25158 ix86_expand_vector_set (true, target, op1, elt);
25163 /* Expand an expression EXP that calls a built-in function,
25164 with result going to TARGET if that's convenient
25165 (and in mode MODE if that's convenient).
25166 SUBTARGET may be used as the target for computing one of EXP's operands.
25167 IGNORE is nonzero if the value is to be ignored. */
25170 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25171 enum machine_mode mode ATTRIBUTE_UNUSED,
25172 int ignore ATTRIBUTE_UNUSED)
25174 const struct builtin_description *d;
25176 enum insn_code icode;
25177 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25178 tree arg0, arg1, arg2;
25179 rtx op0, op1, op2, pat;
25180 enum machine_mode mode0, mode1, mode2;
25181 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25183 /* Determine whether the builtin function is available under the current ISA.
25184 Originally the builtin was not created if it wasn't applicable to the
25185 current ISA based on the command line switches. With function specific
25186 options, we need to check in the context of the function making the call
25187 whether it is supported. */
25188 if (ix86_builtins_isa[fcode].isa
25189 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25191 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25192 NULL, NULL, false);
25195 error ("%qE needs unknown isa option", fndecl);
25198 gcc_assert (opts != NULL);
25199 error ("%qE needs isa option %s", fndecl, opts);
25207 case IX86_BUILTIN_MASKMOVQ:
25208 case IX86_BUILTIN_MASKMOVDQU:
25209 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25210 ? CODE_FOR_mmx_maskmovq
25211 : CODE_FOR_sse2_maskmovdqu);
25212 /* Note the arg order is different from the operand order. */
25213 arg1 = CALL_EXPR_ARG (exp, 0);
25214 arg2 = CALL_EXPR_ARG (exp, 1);
25215 arg0 = CALL_EXPR_ARG (exp, 2);
25216 op0 = expand_normal (arg0);
25217 op1 = expand_normal (arg1);
25218 op2 = expand_normal (arg2);
25219 mode0 = insn_data[icode].operand[0].mode;
25220 mode1 = insn_data[icode].operand[1].mode;
25221 mode2 = insn_data[icode].operand[2].mode;
25223 op0 = force_reg (Pmode, op0);
25224 op0 = gen_rtx_MEM (mode1, op0);
25226 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25227 op0 = copy_to_mode_reg (mode0, op0);
25228 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25229 op1 = copy_to_mode_reg (mode1, op1);
25230 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25231 op2 = copy_to_mode_reg (mode2, op2);
25232 pat = GEN_FCN (icode) (op0, op1, op2);
25238 case IX86_BUILTIN_LDMXCSR:
25239 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25240 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25241 emit_move_insn (target, op0);
25242 emit_insn (gen_sse_ldmxcsr (target));
25245 case IX86_BUILTIN_STMXCSR:
25246 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25247 emit_insn (gen_sse_stmxcsr (target));
25248 return copy_to_mode_reg (SImode, target);
25250 case IX86_BUILTIN_CLFLUSH:
25251 arg0 = CALL_EXPR_ARG (exp, 0);
25252 op0 = expand_normal (arg0);
25253 icode = CODE_FOR_sse2_clflush;
25254 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25255 op0 = copy_to_mode_reg (Pmode, op0);
25257 emit_insn (gen_sse2_clflush (op0));
25260 case IX86_BUILTIN_MONITOR:
25261 arg0 = CALL_EXPR_ARG (exp, 0);
25262 arg1 = CALL_EXPR_ARG (exp, 1);
25263 arg2 = CALL_EXPR_ARG (exp, 2);
25264 op0 = expand_normal (arg0);
25265 op1 = expand_normal (arg1);
25266 op2 = expand_normal (arg2);
25268 op0 = copy_to_mode_reg (Pmode, op0);
25270 op1 = copy_to_mode_reg (SImode, op1);
25272 op2 = copy_to_mode_reg (SImode, op2);
25273 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25276 case IX86_BUILTIN_MWAIT:
25277 arg0 = CALL_EXPR_ARG (exp, 0);
25278 arg1 = CALL_EXPR_ARG (exp, 1);
25279 op0 = expand_normal (arg0);
25280 op1 = expand_normal (arg1);
25282 op0 = copy_to_mode_reg (SImode, op0);
25284 op1 = copy_to_mode_reg (SImode, op1);
25285 emit_insn (gen_sse3_mwait (op0, op1));
25288 case IX86_BUILTIN_VEC_INIT_V2SI:
25289 case IX86_BUILTIN_VEC_INIT_V4HI:
25290 case IX86_BUILTIN_VEC_INIT_V8QI:
25291 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25293 case IX86_BUILTIN_VEC_EXT_V2DF:
25294 case IX86_BUILTIN_VEC_EXT_V2DI:
25295 case IX86_BUILTIN_VEC_EXT_V4SF:
25296 case IX86_BUILTIN_VEC_EXT_V4SI:
25297 case IX86_BUILTIN_VEC_EXT_V8HI:
25298 case IX86_BUILTIN_VEC_EXT_V2SI:
25299 case IX86_BUILTIN_VEC_EXT_V4HI:
25300 case IX86_BUILTIN_VEC_EXT_V16QI:
25301 return ix86_expand_vec_ext_builtin (exp, target);
25303 case IX86_BUILTIN_VEC_SET_V2DI:
25304 case IX86_BUILTIN_VEC_SET_V4SF:
25305 case IX86_BUILTIN_VEC_SET_V4SI:
25306 case IX86_BUILTIN_VEC_SET_V8HI:
25307 case IX86_BUILTIN_VEC_SET_V4HI:
25308 case IX86_BUILTIN_VEC_SET_V16QI:
25309 return ix86_expand_vec_set_builtin (exp);
25311 case IX86_BUILTIN_INFQ:
25312 case IX86_BUILTIN_HUGE_VALQ:
25314 REAL_VALUE_TYPE inf;
25318 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25320 tmp = validize_mem (force_const_mem (mode, tmp));
25323 target = gen_reg_rtx (mode);
25325 emit_move_insn (target, tmp);
25333 for (i = 0, d = bdesc_special_args;
25334 i < ARRAY_SIZE (bdesc_special_args);
25336 if (d->code == fcode)
25337 return ix86_expand_special_args_builtin (d, exp, target);
25339 for (i = 0, d = bdesc_args;
25340 i < ARRAY_SIZE (bdesc_args);
25342 if (d->code == fcode)
25345 case IX86_BUILTIN_FABSQ:
25346 case IX86_BUILTIN_COPYSIGNQ:
25348 /* Emit a normal call if SSE2 isn't available. */
25349 return expand_call (exp, target, ignore);
25351 return ix86_expand_args_builtin (d, exp, target);
25354 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25355 if (d->code == fcode)
25356 return ix86_expand_sse_comi (d, exp, target);
25358 for (i = 0, d = bdesc_pcmpestr;
25359 i < ARRAY_SIZE (bdesc_pcmpestr);
25361 if (d->code == fcode)
25362 return ix86_expand_sse_pcmpestr (d, exp, target);
25364 for (i = 0, d = bdesc_pcmpistr;
25365 i < ARRAY_SIZE (bdesc_pcmpistr);
25367 if (d->code == fcode)
25368 return ix86_expand_sse_pcmpistr (d, exp, target);
25370 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25371 if (d->code == fcode)
25372 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25373 (enum multi_arg_type)d->flag,
25376 gcc_unreachable ();
25379 /* Returns a function decl for a vectorized version of the builtin function
25380 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25381 if it is not available. */
25384 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25387 enum machine_mode in_mode, out_mode;
25390 if (TREE_CODE (type_out) != VECTOR_TYPE
25391 || TREE_CODE (type_in) != VECTOR_TYPE)
25394 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25395 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25396 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25397 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25401 case BUILT_IN_SQRT:
25402 if (out_mode == DFmode && out_n == 2
25403 && in_mode == DFmode && in_n == 2)
25404 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25407 case BUILT_IN_SQRTF:
25408 if (out_mode == SFmode && out_n == 4
25409 && in_mode == SFmode && in_n == 4)
25410 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25413 case BUILT_IN_LRINT:
25414 if (out_mode == SImode && out_n == 4
25415 && in_mode == DFmode && in_n == 2)
25416 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25419 case BUILT_IN_LRINTF:
25420 if (out_mode == SImode && out_n == 4
25421 && in_mode == SFmode && in_n == 4)
25422 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25429 /* Dispatch to a handler for a vectorization library. */
25430 if (ix86_veclib_handler)
25431 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25437 /* Handler for an SVML-style interface to
25438 a library with vectorized intrinsics. */
25441 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25444 tree fntype, new_fndecl, args;
25447 enum machine_mode el_mode, in_mode;
25450 /* The SVML is suitable for unsafe math only. */
25451 if (!flag_unsafe_math_optimizations)
25454 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25455 n = TYPE_VECTOR_SUBPARTS (type_out);
25456 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25457 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25458 if (el_mode != in_mode
25466 case BUILT_IN_LOG10:
25468 case BUILT_IN_TANH:
25470 case BUILT_IN_ATAN:
25471 case BUILT_IN_ATAN2:
25472 case BUILT_IN_ATANH:
25473 case BUILT_IN_CBRT:
25474 case BUILT_IN_SINH:
25476 case BUILT_IN_ASINH:
25477 case BUILT_IN_ASIN:
25478 case BUILT_IN_COSH:
25480 case BUILT_IN_ACOSH:
25481 case BUILT_IN_ACOS:
25482 if (el_mode != DFmode || n != 2)
25486 case BUILT_IN_EXPF:
25487 case BUILT_IN_LOGF:
25488 case BUILT_IN_LOG10F:
25489 case BUILT_IN_POWF:
25490 case BUILT_IN_TANHF:
25491 case BUILT_IN_TANF:
25492 case BUILT_IN_ATANF:
25493 case BUILT_IN_ATAN2F:
25494 case BUILT_IN_ATANHF:
25495 case BUILT_IN_CBRTF:
25496 case BUILT_IN_SINHF:
25497 case BUILT_IN_SINF:
25498 case BUILT_IN_ASINHF:
25499 case BUILT_IN_ASINF:
25500 case BUILT_IN_COSHF:
25501 case BUILT_IN_COSF:
25502 case BUILT_IN_ACOSHF:
25503 case BUILT_IN_ACOSF:
25504 if (el_mode != SFmode || n != 4)
25512 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25514 if (fn == BUILT_IN_LOGF)
25515 strcpy (name, "vmlsLn4");
25516 else if (fn == BUILT_IN_LOG)
25517 strcpy (name, "vmldLn2");
25520 sprintf (name, "vmls%s", bname+10);
25521 name[strlen (name)-1] = '4';
25524 sprintf (name, "vmld%s2", bname+10);
25526 /* Convert to uppercase. */
25530 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25531 args = TREE_CHAIN (args))
25535 fntype = build_function_type_list (type_out, type_in, NULL);
25537 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25539 /* Build a function declaration for the vectorized function. */
25540 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25541 TREE_PUBLIC (new_fndecl) = 1;
25542 DECL_EXTERNAL (new_fndecl) = 1;
25543 DECL_IS_NOVOPS (new_fndecl) = 1;
25544 TREE_READONLY (new_fndecl) = 1;
25549 /* Handler for an ACML-style interface to
25550 a library with vectorized intrinsics. */
25553 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25555 char name[20] = "__vr.._";
25556 tree fntype, new_fndecl, args;
25559 enum machine_mode el_mode, in_mode;
25562 /* The ACML is 64bits only and suitable for unsafe math only as
25563 it does not correctly support parts of IEEE with the required
25564 precision such as denormals. */
25566 || !flag_unsafe_math_optimizations)
25569 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25570 n = TYPE_VECTOR_SUBPARTS (type_out);
25571 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25572 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25573 if (el_mode != in_mode
25583 case BUILT_IN_LOG2:
25584 case BUILT_IN_LOG10:
25587 if (el_mode != DFmode
25592 case BUILT_IN_SINF:
25593 case BUILT_IN_COSF:
25594 case BUILT_IN_EXPF:
25595 case BUILT_IN_POWF:
25596 case BUILT_IN_LOGF:
25597 case BUILT_IN_LOG2F:
25598 case BUILT_IN_LOG10F:
25601 if (el_mode != SFmode
25610 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25611 sprintf (name + 7, "%s", bname+10);
25614 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25615 args = TREE_CHAIN (args))
25619 fntype = build_function_type_list (type_out, type_in, NULL);
25621 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25623 /* Build a function declaration for the vectorized function. */
25624 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25625 TREE_PUBLIC (new_fndecl) = 1;
25626 DECL_EXTERNAL (new_fndecl) = 1;
25627 DECL_IS_NOVOPS (new_fndecl) = 1;
25628 TREE_READONLY (new_fndecl) = 1;
25634 /* Returns a decl of a function that implements conversion of an integer vector
25635 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25636 side of the conversion.
25637 Return NULL_TREE if it is not available. */
25640 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25642 if (TREE_CODE (type) != VECTOR_TYPE
25643 /* There are only conversions from/to signed integers. */
25644 || TYPE_UNSIGNED (TREE_TYPE (type)))
25650 switch (TYPE_MODE (type))
25653 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25658 case FIX_TRUNC_EXPR:
25659 switch (TYPE_MODE (type))
25662 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25672 /* Returns a code for a target-specific builtin that implements
25673 reciprocal of the function, or NULL_TREE if not available. */
25676 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25677 bool sqrt ATTRIBUTE_UNUSED)
25679 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25680 && flag_finite_math_only && !flag_trapping_math
25681 && flag_unsafe_math_optimizations))
25685 /* Machine dependent builtins. */
25688 /* Vectorized version of sqrt to rsqrt conversion. */
25689 case IX86_BUILTIN_SQRTPS_NR:
25690 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25696 /* Normal builtins. */
25699 /* Sqrt to rsqrt conversion. */
25700 case BUILT_IN_SQRTF:
25701 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25708 /* Store OPERAND to the memory after reload is completed. This means
25709 that we can't easily use assign_stack_local. */
25711 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25715 gcc_assert (reload_completed);
25716 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25718 result = gen_rtx_MEM (mode,
25719 gen_rtx_PLUS (Pmode,
25721 GEN_INT (-RED_ZONE_SIZE)));
25722 emit_move_insn (result, operand);
25724 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25730 operand = gen_lowpart (DImode, operand);
25734 gen_rtx_SET (VOIDmode,
25735 gen_rtx_MEM (DImode,
25736 gen_rtx_PRE_DEC (DImode,
25737 stack_pointer_rtx)),
25741 gcc_unreachable ();
25743 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25752 split_di (&operand, 1, operands, operands + 1);
25754 gen_rtx_SET (VOIDmode,
25755 gen_rtx_MEM (SImode,
25756 gen_rtx_PRE_DEC (Pmode,
25757 stack_pointer_rtx)),
25760 gen_rtx_SET (VOIDmode,
25761 gen_rtx_MEM (SImode,
25762 gen_rtx_PRE_DEC (Pmode,
25763 stack_pointer_rtx)),
25768 /* Store HImodes as SImodes. */
25769 operand = gen_lowpart (SImode, operand);
25773 gen_rtx_SET (VOIDmode,
25774 gen_rtx_MEM (GET_MODE (operand),
25775 gen_rtx_PRE_DEC (SImode,
25776 stack_pointer_rtx)),
25780 gcc_unreachable ();
25782 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25787 /* Free operand from the memory. */
25789 ix86_free_from_memory (enum machine_mode mode)
25791 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25795 if (mode == DImode || TARGET_64BIT)
25799 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25800 to pop or add instruction if registers are available. */
25801 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25802 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25807 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25808 QImode must go into class Q_REGS.
25809 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25810 movdf to do mem-to-mem moves through integer regs. */
25812 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25814 enum machine_mode mode = GET_MODE (x);
25816 /* We're only allowed to return a subclass of CLASS. Many of the
25817 following checks fail for NO_REGS, so eliminate that early. */
25818 if (regclass == NO_REGS)
25821 /* All classes can load zeros. */
25822 if (x == CONST0_RTX (mode))
25825 /* Force constants into memory if we are loading a (nonzero) constant into
25826 an MMX or SSE register. This is because there are no MMX/SSE instructions
25827 to load from a constant. */
25829 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25832 /* Prefer SSE regs only, if we can use them for math. */
25833 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25834 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25836 /* Floating-point constants need more complex checks. */
25837 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25839 /* General regs can load everything. */
25840 if (reg_class_subset_p (regclass, GENERAL_REGS))
25843 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25844 zero above. We only want to wind up preferring 80387 registers if
25845 we plan on doing computation with them. */
25847 && standard_80387_constant_p (x))
25849 /* Limit class to non-sse. */
25850 if (regclass == FLOAT_SSE_REGS)
25852 if (regclass == FP_TOP_SSE_REGS)
25854 if (regclass == FP_SECOND_SSE_REGS)
25855 return FP_SECOND_REG;
25856 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25863 /* Generally when we see PLUS here, it's the function invariant
25864 (plus soft-fp const_int). Which can only be computed into general
25866 if (GET_CODE (x) == PLUS)
25867 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25869 /* QImode constants are easy to load, but non-constant QImode data
25870 must go into Q_REGS. */
25871 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25873 if (reg_class_subset_p (regclass, Q_REGS))
25875 if (reg_class_subset_p (Q_REGS, regclass))
25883 /* Discourage putting floating-point values in SSE registers unless
25884 SSE math is being used, and likewise for the 387 registers. */
25886 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25888 enum machine_mode mode = GET_MODE (x);
25890 /* Restrict the output reload class to the register bank that we are doing
25891 math on. If we would like not to return a subset of CLASS, reject this
25892 alternative: if reload cannot do this, it will still use its choice. */
25893 mode = GET_MODE (x);
25894 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25895 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25897 if (X87_FLOAT_MODE_P (mode))
25899 if (regclass == FP_TOP_SSE_REGS)
25901 else if (regclass == FP_SECOND_SSE_REGS)
25902 return FP_SECOND_REG;
25904 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25910 static enum reg_class
25911 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25912 enum machine_mode mode,
25913 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25915 /* QImode spills from non-QI registers require
25916 intermediate register on 32bit targets. */
25917 if (!in_p && mode == QImode && !TARGET_64BIT
25918 && (rclass == GENERAL_REGS
25919 || rclass == LEGACY_REGS
25920 || rclass == INDEX_REGS))
25929 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25930 regno = true_regnum (x);
25932 /* Return Q_REGS if the operand is in memory. */
25940 /* If we are copying between general and FP registers, we need a memory
25941 location. The same is true for SSE and MMX registers.
25943 To optimize register_move_cost performance, allow inline variant.
25945 The macro can't work reliably when one of the CLASSES is class containing
25946 registers from multiple units (SSE, MMX, integer). We avoid this by never
25947 combining those units in single alternative in the machine description.
25948 Ensure that this constraint holds to avoid unexpected surprises.
25950 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25951 enforce these sanity checks. */
25954 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25955 enum machine_mode mode, int strict)
25957 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25958 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25959 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25960 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25961 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25962 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25964 gcc_assert (!strict);
25968 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25971 /* ??? This is a lie. We do have moves between mmx/general, and for
25972 mmx/sse2. But by saying we need secondary memory we discourage the
25973 register allocator from using the mmx registers unless needed. */
25974 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25977 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25979 /* SSE1 doesn't have any direct moves from other classes. */
25983 /* If the target says that inter-unit moves are more expensive
25984 than moving through memory, then don't generate them. */
25985 if (!TARGET_INTER_UNIT_MOVES)
25988 /* Between SSE and general, we have moves no larger than word size. */
25989 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25997 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25998 enum machine_mode mode, int strict)
26000 return inline_secondary_memory_needed (class1, class2, mode, strict);
26003 /* Return true if the registers in CLASS cannot represent the change from
26004 modes FROM to TO. */
26007 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26008 enum reg_class regclass)
26013 /* x87 registers can't do subreg at all, as all values are reformatted
26014 to extended precision. */
26015 if (MAYBE_FLOAT_CLASS_P (regclass))
26018 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26020 /* Vector registers do not support QI or HImode loads. If we don't
26021 disallow a change to these modes, reload will assume it's ok to
26022 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26023 the vec_dupv4hi pattern. */
26024 if (GET_MODE_SIZE (from) < 4)
26027 /* Vector registers do not support subreg with nonzero offsets, which
26028 are otherwise valid for integer registers. Since we can't see
26029 whether we have a nonzero offset from here, prohibit all
26030 nonparadoxical subregs changing size. */
26031 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26038 /* Return the cost of moving data of mode M between a
26039 register and memory. A value of 2 is the default; this cost is
26040 relative to those in `REGISTER_MOVE_COST'.
26042 This function is used extensively by register_move_cost that is used to
26043 build tables at startup. Make it inline in this case.
26044 When IN is 2, return maximum of in and out move cost.
26046 If moving between registers and memory is more expensive than
26047 between two registers, you should define this macro to express the
26050 Model also increased moving costs of QImode registers in non
26054 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26058 if (FLOAT_CLASS_P (regclass))
26076 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26077 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26079 if (SSE_CLASS_P (regclass))
26082 switch (GET_MODE_SIZE (mode))
26097 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26098 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26100 if (MMX_CLASS_P (regclass))
26103 switch (GET_MODE_SIZE (mode))
26115 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26116 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26118 switch (GET_MODE_SIZE (mode))
26121 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26124 return ix86_cost->int_store[0];
26125 if (TARGET_PARTIAL_REG_DEPENDENCY
26126 && optimize_function_for_speed_p (cfun))
26127 cost = ix86_cost->movzbl_load;
26129 cost = ix86_cost->int_load[0];
26131 return MAX (cost, ix86_cost->int_store[0]);
26137 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26139 return ix86_cost->movzbl_load;
26141 return ix86_cost->int_store[0] + 4;
26146 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26147 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26149 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26150 if (mode == TFmode)
26153 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26155 cost = ix86_cost->int_load[2];
26157 cost = ix86_cost->int_store[2];
26158 return (cost * (((int) GET_MODE_SIZE (mode)
26159 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26164 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26166 return inline_memory_move_cost (mode, regclass, in);
26170 /* Return the cost of moving data from a register in class CLASS1 to
26171 one in class CLASS2.
26173 It is not required that the cost always equal 2 when FROM is the same as TO;
26174 on some machines it is expensive to move between registers if they are not
26175 general registers. */
26178 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26179 enum reg_class class2)
26181 /* In case we require secondary memory, compute cost of the store followed
26182 by load. In order to avoid bad register allocation choices, we need
26183 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26185 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26189 cost += inline_memory_move_cost (mode, class1, 2);
26190 cost += inline_memory_move_cost (mode, class2, 2);
26192 /* In case of copying from general_purpose_register we may emit multiple
26193 stores followed by single load causing memory size mismatch stall.
26194 Count this as arbitrarily high cost of 20. */
26195 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26198 /* In the case of FP/MMX moves, the registers actually overlap, and we
26199 have to switch modes in order to treat them differently. */
26200 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26201 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26207 /* Moves between SSE/MMX and integer unit are expensive. */
26208 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26209 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26211 /* ??? By keeping returned value relatively high, we limit the number
26212 of moves between integer and MMX/SSE registers for all targets.
26213 Additionally, high value prevents problem with x86_modes_tieable_p(),
26214 where integer modes in MMX/SSE registers are not tieable
26215 because of missing QImode and HImode moves to, from or between
26216 MMX/SSE registers. */
26217 return MAX (8, ix86_cost->mmxsse_to_integer);
26219 if (MAYBE_FLOAT_CLASS_P (class1))
26220 return ix86_cost->fp_move;
26221 if (MAYBE_SSE_CLASS_P (class1))
26222 return ix86_cost->sse_move;
26223 if (MAYBE_MMX_CLASS_P (class1))
26224 return ix86_cost->mmx_move;
26228 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26231 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26233 /* Flags and only flags can only hold CCmode values. */
26234 if (CC_REGNO_P (regno))
26235 return GET_MODE_CLASS (mode) == MODE_CC;
26236 if (GET_MODE_CLASS (mode) == MODE_CC
26237 || GET_MODE_CLASS (mode) == MODE_RANDOM
26238 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26240 if (FP_REGNO_P (regno))
26241 return VALID_FP_MODE_P (mode);
26242 if (SSE_REGNO_P (regno))
26244 /* We implement the move patterns for all vector modes into and
26245 out of SSE registers, even when no operation instructions
26246 are available. OImode move is available only when AVX is
26248 return ((TARGET_AVX && mode == OImode)
26249 || VALID_AVX256_REG_MODE (mode)
26250 || VALID_SSE_REG_MODE (mode)
26251 || VALID_SSE2_REG_MODE (mode)
26252 || VALID_MMX_REG_MODE (mode)
26253 || VALID_MMX_REG_MODE_3DNOW (mode));
26255 if (MMX_REGNO_P (regno))
26257 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26258 so if the register is available at all, then we can move data of
26259 the given mode into or out of it. */
26260 return (VALID_MMX_REG_MODE (mode)
26261 || VALID_MMX_REG_MODE_3DNOW (mode));
26264 if (mode == QImode)
26266 /* Take care for QImode values - they can be in non-QI regs,
26267 but then they do cause partial register stalls. */
26268 if (regno <= BX_REG || TARGET_64BIT)
26270 if (!TARGET_PARTIAL_REG_STALL)
26272 return reload_in_progress || reload_completed;
26274 /* We handle both integer and floats in the general purpose registers. */
26275 else if (VALID_INT_MODE_P (mode))
26277 else if (VALID_FP_MODE_P (mode))
26279 else if (VALID_DFP_MODE_P (mode))
26281 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26282 on to use that value in smaller contexts, this can easily force a
26283 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26284 supporting DImode, allow it. */
26285 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26291 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26292 tieable integer mode. */
26295 ix86_tieable_integer_mode_p (enum machine_mode mode)
26304 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26307 return TARGET_64BIT;
26314 /* Return true if MODE1 is accessible in a register that can hold MODE2
26315 without copying. That is, all register classes that can hold MODE2
26316 can also hold MODE1. */
26319 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26321 if (mode1 == mode2)
26324 if (ix86_tieable_integer_mode_p (mode1)
26325 && ix86_tieable_integer_mode_p (mode2))
26328 /* MODE2 being XFmode implies fp stack or general regs, which means we
26329 can tie any smaller floating point modes to it. Note that we do not
26330 tie this with TFmode. */
26331 if (mode2 == XFmode)
26332 return mode1 == SFmode || mode1 == DFmode;
26334 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26335 that we can tie it with SFmode. */
26336 if (mode2 == DFmode)
26337 return mode1 == SFmode;
26339 /* If MODE2 is only appropriate for an SSE register, then tie with
26340 any other mode acceptable to SSE registers. */
26341 if (GET_MODE_SIZE (mode2) == 16
26342 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26343 return (GET_MODE_SIZE (mode1) == 16
26344 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26346 /* If MODE2 is appropriate for an MMX register, then tie
26347 with any other mode acceptable to MMX registers. */
26348 if (GET_MODE_SIZE (mode2) == 8
26349 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26350 return (GET_MODE_SIZE (mode1) == 8
26351 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26356 /* Compute a (partial) cost for rtx X. Return true if the complete
26357 cost has been computed, and false if subexpressions should be
26358 scanned. In either case, *TOTAL contains the cost result. */
26361 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26363 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26364 enum machine_mode mode = GET_MODE (x);
26365 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26373 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26375 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26377 else if (flag_pic && SYMBOLIC_CONST (x)
26379 || (!GET_CODE (x) != LABEL_REF
26380 && (GET_CODE (x) != SYMBOL_REF
26381 || !SYMBOL_REF_LOCAL_P (x)))))
26388 if (mode == VOIDmode)
26391 switch (standard_80387_constant_p (x))
26396 default: /* Other constants */
26401 /* Start with (MEM (SYMBOL_REF)), since that's where
26402 it'll probably end up. Add a penalty for size. */
26403 *total = (COSTS_N_INSNS (1)
26404 + (flag_pic != 0 && !TARGET_64BIT)
26405 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26411 /* The zero extensions is often completely free on x86_64, so make
26412 it as cheap as possible. */
26413 if (TARGET_64BIT && mode == DImode
26414 && GET_MODE (XEXP (x, 0)) == SImode)
26416 else if (TARGET_ZERO_EXTEND_WITH_AND)
26417 *total = cost->add;
26419 *total = cost->movzx;
26423 *total = cost->movsx;
26427 if (CONST_INT_P (XEXP (x, 1))
26428 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26430 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26433 *total = cost->add;
26436 if ((value == 2 || value == 3)
26437 && cost->lea <= cost->shift_const)
26439 *total = cost->lea;
26449 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26451 if (CONST_INT_P (XEXP (x, 1)))
26453 if (INTVAL (XEXP (x, 1)) > 32)
26454 *total = cost->shift_const + COSTS_N_INSNS (2);
26456 *total = cost->shift_const * 2;
26460 if (GET_CODE (XEXP (x, 1)) == AND)
26461 *total = cost->shift_var * 2;
26463 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26468 if (CONST_INT_P (XEXP (x, 1)))
26469 *total = cost->shift_const;
26471 *total = cost->shift_var;
26476 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26478 /* ??? SSE scalar cost should be used here. */
26479 *total = cost->fmul;
26482 else if (X87_FLOAT_MODE_P (mode))
26484 *total = cost->fmul;
26487 else if (FLOAT_MODE_P (mode))
26489 /* ??? SSE vector cost should be used here. */
26490 *total = cost->fmul;
26495 rtx op0 = XEXP (x, 0);
26496 rtx op1 = XEXP (x, 1);
26498 if (CONST_INT_P (XEXP (x, 1)))
26500 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26501 for (nbits = 0; value != 0; value &= value - 1)
26505 /* This is arbitrary. */
26508 /* Compute costs correctly for widening multiplication. */
26509 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26510 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26511 == GET_MODE_SIZE (mode))
26513 int is_mulwiden = 0;
26514 enum machine_mode inner_mode = GET_MODE (op0);
26516 if (GET_CODE (op0) == GET_CODE (op1))
26517 is_mulwiden = 1, op1 = XEXP (op1, 0);
26518 else if (CONST_INT_P (op1))
26520 if (GET_CODE (op0) == SIGN_EXTEND)
26521 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26524 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26528 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26531 *total = (cost->mult_init[MODE_INDEX (mode)]
26532 + nbits * cost->mult_bit
26533 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26542 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26543 /* ??? SSE cost should be used here. */
26544 *total = cost->fdiv;
26545 else if (X87_FLOAT_MODE_P (mode))
26546 *total = cost->fdiv;
26547 else if (FLOAT_MODE_P (mode))
26548 /* ??? SSE vector cost should be used here. */
26549 *total = cost->fdiv;
26551 *total = cost->divide[MODE_INDEX (mode)];
26555 if (GET_MODE_CLASS (mode) == MODE_INT
26556 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26558 if (GET_CODE (XEXP (x, 0)) == PLUS
26559 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26560 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26561 && CONSTANT_P (XEXP (x, 1)))
26563 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26564 if (val == 2 || val == 4 || val == 8)
26566 *total = cost->lea;
26567 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26568 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26569 outer_code, speed);
26570 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26574 else if (GET_CODE (XEXP (x, 0)) == MULT
26575 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26577 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26578 if (val == 2 || val == 4 || val == 8)
26580 *total = cost->lea;
26581 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26582 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26586 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26588 *total = cost->lea;
26589 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26590 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26591 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26598 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26600 /* ??? SSE cost should be used here. */
26601 *total = cost->fadd;
26604 else if (X87_FLOAT_MODE_P (mode))
26606 *total = cost->fadd;
26609 else if (FLOAT_MODE_P (mode))
26611 /* ??? SSE vector cost should be used here. */
26612 *total = cost->fadd;
26620 if (!TARGET_64BIT && mode == DImode)
26622 *total = (cost->add * 2
26623 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26624 << (GET_MODE (XEXP (x, 0)) != DImode))
26625 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26626 << (GET_MODE (XEXP (x, 1)) != DImode)));
26632 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26634 /* ??? SSE cost should be used here. */
26635 *total = cost->fchs;
26638 else if (X87_FLOAT_MODE_P (mode))
26640 *total = cost->fchs;
26643 else if (FLOAT_MODE_P (mode))
26645 /* ??? SSE vector cost should be used here. */
26646 *total = cost->fchs;
26652 if (!TARGET_64BIT && mode == DImode)
26653 *total = cost->add * 2;
26655 *total = cost->add;
26659 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26660 && XEXP (XEXP (x, 0), 1) == const1_rtx
26661 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26662 && XEXP (x, 1) == const0_rtx)
26664 /* This kind of construct is implemented using test[bwl].
26665 Treat it as if we had an AND. */
26666 *total = (cost->add
26667 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26668 + rtx_cost (const1_rtx, outer_code, speed));
26674 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26679 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26680 /* ??? SSE cost should be used here. */
26681 *total = cost->fabs;
26682 else if (X87_FLOAT_MODE_P (mode))
26683 *total = cost->fabs;
26684 else if (FLOAT_MODE_P (mode))
26685 /* ??? SSE vector cost should be used here. */
26686 *total = cost->fabs;
26690 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26691 /* ??? SSE cost should be used here. */
26692 *total = cost->fsqrt;
26693 else if (X87_FLOAT_MODE_P (mode))
26694 *total = cost->fsqrt;
26695 else if (FLOAT_MODE_P (mode))
26696 /* ??? SSE vector cost should be used here. */
26697 *total = cost->fsqrt;
26701 if (XINT (x, 1) == UNSPEC_TP)
26712 static int current_machopic_label_num;
26714 /* Given a symbol name and its associated stub, write out the
26715 definition of the stub. */
26718 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26720 unsigned int length;
26721 char *binder_name, *symbol_name, lazy_ptr_name[32];
26722 int label = ++current_machopic_label_num;
26724 /* For 64-bit we shouldn't get here. */
26725 gcc_assert (!TARGET_64BIT);
26727 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26728 symb = (*targetm.strip_name_encoding) (symb);
26730 length = strlen (stub);
26731 binder_name = XALLOCAVEC (char, length + 32);
26732 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26734 length = strlen (symb);
26735 symbol_name = XALLOCAVEC (char, length + 32);
26736 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26738 sprintf (lazy_ptr_name, "L%d$lz", label);
26741 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26743 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26745 fprintf (file, "%s:\n", stub);
26746 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26750 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26751 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26752 fprintf (file, "\tjmp\t*%%edx\n");
26755 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26757 fprintf (file, "%s:\n", binder_name);
26761 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26762 fprintf (file, "\tpushl\t%%eax\n");
26765 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26767 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26769 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26770 fprintf (file, "%s:\n", lazy_ptr_name);
26771 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26772 fprintf (file, "\t.long %s\n", binder_name);
26776 darwin_x86_file_end (void)
26778 darwin_file_end ();
26781 #endif /* TARGET_MACHO */
26783 /* Order the registers for register allocator. */
26786 x86_order_regs_for_local_alloc (void)
26791 /* First allocate the local general purpose registers. */
26792 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26793 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26794 reg_alloc_order [pos++] = i;
26796 /* Global general purpose registers. */
26797 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26798 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26799 reg_alloc_order [pos++] = i;
26801 /* x87 registers come first in case we are doing FP math
26803 if (!TARGET_SSE_MATH)
26804 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26805 reg_alloc_order [pos++] = i;
26807 /* SSE registers. */
26808 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26809 reg_alloc_order [pos++] = i;
26810 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26811 reg_alloc_order [pos++] = i;
26813 /* x87 registers. */
26814 if (TARGET_SSE_MATH)
26815 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26816 reg_alloc_order [pos++] = i;
26818 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26819 reg_alloc_order [pos++] = i;
26821 /* Initialize the rest of array as we do not allocate some registers
26823 while (pos < FIRST_PSEUDO_REGISTER)
26824 reg_alloc_order [pos++] = 0;
26827 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26828 struct attribute_spec.handler. */
26830 ix86_handle_abi_attribute (tree *node, tree name,
26831 tree args ATTRIBUTE_UNUSED,
26832 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26834 if (TREE_CODE (*node) != FUNCTION_TYPE
26835 && TREE_CODE (*node) != METHOD_TYPE
26836 && TREE_CODE (*node) != FIELD_DECL
26837 && TREE_CODE (*node) != TYPE_DECL)
26839 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26841 *no_add_attrs = true;
26846 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
26848 *no_add_attrs = true;
26852 /* Can combine regparm with all attributes but fastcall. */
26853 if (is_attribute_p ("ms_abi", name))
26855 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26857 error ("ms_abi and sysv_abi attributes are not compatible");
26862 else if (is_attribute_p ("sysv_abi", name))
26864 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26866 error ("ms_abi and sysv_abi attributes are not compatible");
26875 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26876 struct attribute_spec.handler. */
26878 ix86_handle_struct_attribute (tree *node, tree name,
26879 tree args ATTRIBUTE_UNUSED,
26880 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26883 if (DECL_P (*node))
26885 if (TREE_CODE (*node) == TYPE_DECL)
26886 type = &TREE_TYPE (*node);
26891 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26892 || TREE_CODE (*type) == UNION_TYPE)))
26894 warning (OPT_Wattributes, "%qE attribute ignored",
26896 *no_add_attrs = true;
26899 else if ((is_attribute_p ("ms_struct", name)
26900 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26901 || ((is_attribute_p ("gcc_struct", name)
26902 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26904 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
26906 *no_add_attrs = true;
26913 ix86_ms_bitfield_layout_p (const_tree record_type)
26915 return (TARGET_MS_BITFIELD_LAYOUT &&
26916 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26917 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26920 /* Returns an expression indicating where the this parameter is
26921 located on entry to the FUNCTION. */
26924 x86_this_parameter (tree function)
26926 tree type = TREE_TYPE (function);
26927 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26932 const int *parm_regs;
26934 if (ix86_function_type_abi (type) == MS_ABI)
26935 parm_regs = x86_64_ms_abi_int_parameter_registers;
26937 parm_regs = x86_64_int_parameter_registers;
26938 return gen_rtx_REG (DImode, parm_regs[aggr]);
26941 nregs = ix86_function_regparm (type, function);
26943 if (nregs > 0 && !stdarg_p (type))
26947 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26948 regno = aggr ? DX_REG : CX_REG;
26956 return gen_rtx_MEM (SImode,
26957 plus_constant (stack_pointer_rtx, 4));
26960 return gen_rtx_REG (SImode, regno);
26963 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26966 /* Determine whether x86_output_mi_thunk can succeed. */
26969 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26970 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26971 HOST_WIDE_INT vcall_offset, const_tree function)
26973 /* 64-bit can handle anything. */
26977 /* For 32-bit, everything's fine if we have one free register. */
26978 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26981 /* Need a free register for vcall_offset. */
26985 /* Need a free register for GOT references. */
26986 if (flag_pic && !(*targetm.binds_local_p) (function))
26989 /* Otherwise ok. */
26993 /* Output the assembler code for a thunk function. THUNK_DECL is the
26994 declaration for the thunk function itself, FUNCTION is the decl for
26995 the target function. DELTA is an immediate constant offset to be
26996 added to THIS. If VCALL_OFFSET is nonzero, the word at
26997 *(*this + vcall_offset) should be added to THIS. */
27000 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27001 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27002 HOST_WIDE_INT vcall_offset, tree function)
27005 rtx this_param = x86_this_parameter (function);
27008 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27009 pull it in now and let DELTA benefit. */
27010 if (REG_P (this_param))
27011 this_reg = this_param;
27012 else if (vcall_offset)
27014 /* Put the this parameter into %eax. */
27015 xops[0] = this_param;
27016 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27017 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27020 this_reg = NULL_RTX;
27022 /* Adjust the this parameter by a fixed constant. */
27025 xops[0] = GEN_INT (delta);
27026 xops[1] = this_reg ? this_reg : this_param;
27029 if (!x86_64_general_operand (xops[0], DImode))
27031 tmp = gen_rtx_REG (DImode, R10_REG);
27033 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27035 xops[1] = this_param;
27037 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27040 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27043 /* Adjust the this parameter by a value stored in the vtable. */
27047 tmp = gen_rtx_REG (DImode, R10_REG);
27050 int tmp_regno = CX_REG;
27051 if (lookup_attribute ("fastcall",
27052 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27053 tmp_regno = AX_REG;
27054 tmp = gen_rtx_REG (SImode, tmp_regno);
27057 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27059 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27061 /* Adjust the this parameter. */
27062 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27063 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27065 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27066 xops[0] = GEN_INT (vcall_offset);
27068 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27069 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27071 xops[1] = this_reg;
27072 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27075 /* If necessary, drop THIS back to its stack slot. */
27076 if (this_reg && this_reg != this_param)
27078 xops[0] = this_reg;
27079 xops[1] = this_param;
27080 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27083 xops[0] = XEXP (DECL_RTL (function), 0);
27086 if (!flag_pic || (*targetm.binds_local_p) (function))
27087 output_asm_insn ("jmp\t%P0", xops);
27088 /* All thunks should be in the same object as their target,
27089 and thus binds_local_p should be true. */
27090 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27091 gcc_unreachable ();
27094 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27095 tmp = gen_rtx_CONST (Pmode, tmp);
27096 tmp = gen_rtx_MEM (QImode, tmp);
27098 output_asm_insn ("jmp\t%A0", xops);
27103 if (!flag_pic || (*targetm.binds_local_p) (function))
27104 output_asm_insn ("jmp\t%P0", xops);
27109 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27110 tmp = (gen_rtx_SYMBOL_REF
27112 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27113 tmp = gen_rtx_MEM (QImode, tmp);
27115 output_asm_insn ("jmp\t%0", xops);
27118 #endif /* TARGET_MACHO */
27120 tmp = gen_rtx_REG (SImode, CX_REG);
27121 output_set_got (tmp, NULL_RTX);
27124 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27125 output_asm_insn ("jmp\t{*}%1", xops);
27131 x86_file_start (void)
27133 default_file_start ();
27135 darwin_file_start ();
27137 if (X86_FILE_START_VERSION_DIRECTIVE)
27138 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27139 if (X86_FILE_START_FLTUSED)
27140 fputs ("\t.global\t__fltused\n", asm_out_file);
27141 if (ix86_asm_dialect == ASM_INTEL)
27142 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27146 x86_field_alignment (tree field, int computed)
27148 enum machine_mode mode;
27149 tree type = TREE_TYPE (field);
27151 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27153 mode = TYPE_MODE (strip_array_types (type));
27154 if (mode == DFmode || mode == DCmode
27155 || GET_MODE_CLASS (mode) == MODE_INT
27156 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27157 return MIN (32, computed);
27161 /* Output assembler code to FILE to increment profiler label # LABELNO
27162 for profiling a function entry. */
27164 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27168 #ifndef NO_PROFILE_COUNTERS
27169 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27172 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27173 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27175 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27179 #ifndef NO_PROFILE_COUNTERS
27180 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27181 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27183 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27187 #ifndef NO_PROFILE_COUNTERS
27188 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27189 PROFILE_COUNT_REGISTER);
27191 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27195 /* We don't have exact information about the insn sizes, but we may assume
27196 quite safely that we are informed about all 1 byte insns and memory
27197 address sizes. This is enough to eliminate unnecessary padding in
27201 min_insn_size (rtx insn)
27205 if (!INSN_P (insn) || !active_insn_p (insn))
27208 /* Discard alignments we've emit and jump instructions. */
27209 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27210 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27213 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
27214 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
27217 /* Important case - calls are always 5 bytes.
27218 It is common to have many calls in the row. */
27220 && symbolic_reference_mentioned_p (PATTERN (insn))
27221 && !SIBLING_CALL_P (insn))
27223 if (get_attr_length (insn) <= 1)
27226 /* For normal instructions we may rely on the sizes of addresses
27227 and the presence of symbol to require 4 bytes of encoding.
27228 This is not the case for jumps where references are PC relative. */
27229 if (!JUMP_P (insn))
27231 l = get_attr_length_address (insn);
27232 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27241 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27245 ix86_avoid_jump_misspredicts (void)
27247 rtx insn, start = get_insns ();
27248 int nbytes = 0, njumps = 0;
27251 /* Look for all minimal intervals of instructions containing 4 jumps.
27252 The intervals are bounded by START and INSN. NBYTES is the total
27253 size of instructions in the interval including INSN and not including
27254 START. When the NBYTES is smaller than 16 bytes, it is possible
27255 that the end of START and INSN ends up in the same 16byte page.
27257 The smallest offset in the page INSN can start is the case where START
27258 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27259 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
27261 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27264 nbytes += min_insn_size (insn);
27266 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
27267 INSN_UID (insn), min_insn_size (insn));
27269 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27270 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27278 start = NEXT_INSN (start);
27279 if ((JUMP_P (start)
27280 && GET_CODE (PATTERN (start)) != ADDR_VEC
27281 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27283 njumps--, isjump = 1;
27286 nbytes -= min_insn_size (start);
27288 gcc_assert (njumps >= 0);
27290 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27291 INSN_UID (start), INSN_UID (insn), nbytes);
27293 if (njumps == 3 && isjump && nbytes < 16)
27295 int padsize = 15 - nbytes + min_insn_size (insn);
27298 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27299 INSN_UID (insn), padsize);
27300 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
27305 /* AMD Athlon works faster
27306 when RET is not destination of conditional jump or directly preceded
27307 by other jump instruction. We avoid the penalty by inserting NOP just
27308 before the RET instructions in such cases. */
27310 ix86_pad_returns (void)
27315 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27317 basic_block bb = e->src;
27318 rtx ret = BB_END (bb);
27320 bool replace = false;
27322 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27323 || optimize_bb_for_size_p (bb))
27325 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27326 if (active_insn_p (prev) || LABEL_P (prev))
27328 if (prev && LABEL_P (prev))
27333 FOR_EACH_EDGE (e, ei, bb->preds)
27334 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27335 && !(e->flags & EDGE_FALLTHRU))
27340 prev = prev_active_insn (ret);
27342 && ((JUMP_P (prev) && any_condjump_p (prev))
27345 /* Empty functions get branch mispredict even when the jump destination
27346 is not visible to us. */
27347 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27352 emit_insn_before (gen_return_internal_long (), ret);
27358 /* Implement machine specific optimizations. We implement padding of returns
27359 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27363 if (TARGET_PAD_RETURNS && optimize
27364 && optimize_function_for_speed_p (cfun))
27365 ix86_pad_returns ();
27366 if (TARGET_FOUR_JUMP_LIMIT && optimize
27367 && optimize_function_for_speed_p (cfun))
27368 ix86_avoid_jump_misspredicts ();
27371 /* Return nonzero when QImode register that must be represented via REX prefix
27374 x86_extended_QIreg_mentioned_p (rtx insn)
27377 extract_insn_cached (insn);
27378 for (i = 0; i < recog_data.n_operands; i++)
27379 if (REG_P (recog_data.operand[i])
27380 && REGNO (recog_data.operand[i]) > BX_REG)
27385 /* Return nonzero when P points to register encoded via REX prefix.
27386 Called via for_each_rtx. */
27388 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27390 unsigned int regno;
27393 regno = REGNO (*p);
27394 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27397 /* Return true when INSN mentions register that must be encoded using REX
27400 x86_extended_reg_mentioned_p (rtx insn)
27402 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27403 extended_reg_mentioned_1, NULL);
27406 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27407 optabs would emit if we didn't have TFmode patterns. */
27410 x86_emit_floatuns (rtx operands[2])
27412 rtx neglab, donelab, i0, i1, f0, in, out;
27413 enum machine_mode mode, inmode;
27415 inmode = GET_MODE (operands[1]);
27416 gcc_assert (inmode == SImode || inmode == DImode);
27419 in = force_reg (inmode, operands[1]);
27420 mode = GET_MODE (out);
27421 neglab = gen_label_rtx ();
27422 donelab = gen_label_rtx ();
27423 f0 = gen_reg_rtx (mode);
27425 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27427 expand_float (out, in, 0);
27429 emit_jump_insn (gen_jump (donelab));
27432 emit_label (neglab);
27434 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27436 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27438 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27440 expand_float (f0, i0, 0);
27442 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27444 emit_label (donelab);
27447 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27448 with all elements equal to VAR. Return true if successful. */
27451 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27452 rtx target, rtx val)
27454 enum machine_mode hmode, smode, wsmode, wvmode;
27469 val = force_reg (GET_MODE_INNER (mode), val);
27470 x = gen_rtx_VEC_DUPLICATE (mode, val);
27471 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27477 if (TARGET_SSE || TARGET_3DNOW_A)
27479 val = gen_lowpart (SImode, val);
27480 x = gen_rtx_TRUNCATE (HImode, val);
27481 x = gen_rtx_VEC_DUPLICATE (mode, x);
27482 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27504 /* Extend HImode to SImode using a paradoxical SUBREG. */
27505 tmp1 = gen_reg_rtx (SImode);
27506 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27507 /* Insert the SImode value as low element of V4SImode vector. */
27508 tmp2 = gen_reg_rtx (V4SImode);
27509 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27510 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27511 CONST0_RTX (V4SImode),
27513 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27514 /* Cast the V4SImode vector back to a V8HImode vector. */
27515 tmp1 = gen_reg_rtx (V8HImode);
27516 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27517 /* Duplicate the low short through the whole low SImode word. */
27518 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27519 /* Cast the V8HImode vector back to a V4SImode vector. */
27520 tmp2 = gen_reg_rtx (V4SImode);
27521 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27522 /* Replicate the low element of the V4SImode vector. */
27523 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27524 /* Cast the V2SImode back to V8HImode, and store in target. */
27525 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27536 /* Extend QImode to SImode using a paradoxical SUBREG. */
27537 tmp1 = gen_reg_rtx (SImode);
27538 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27539 /* Insert the SImode value as low element of V4SImode vector. */
27540 tmp2 = gen_reg_rtx (V4SImode);
27541 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27542 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27543 CONST0_RTX (V4SImode),
27545 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27546 /* Cast the V4SImode vector back to a V16QImode vector. */
27547 tmp1 = gen_reg_rtx (V16QImode);
27548 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27549 /* Duplicate the low byte through the whole low SImode word. */
27550 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27551 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27552 /* Cast the V16QImode vector back to a V4SImode vector. */
27553 tmp2 = gen_reg_rtx (V4SImode);
27554 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27555 /* Replicate the low element of the V4SImode vector. */
27556 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27557 /* Cast the V2SImode back to V16QImode, and store in target. */
27558 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27566 /* Replicate the value once into the next wider mode and recurse. */
27567 val = convert_modes (wsmode, smode, val, true);
27568 x = expand_simple_binop (wsmode, ASHIFT, val,
27569 GEN_INT (GET_MODE_BITSIZE (smode)),
27570 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27571 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27573 x = gen_reg_rtx (wvmode);
27574 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27575 gcc_unreachable ();
27576 emit_move_insn (target, gen_lowpart (mode, x));
27599 rtx tmp = gen_reg_rtx (hmode);
27600 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27601 emit_insn (gen_rtx_SET (VOIDmode, target,
27602 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27611 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27612 whose ONE_VAR element is VAR, and other elements are zero. Return true
27616 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27617 rtx target, rtx var, int one_var)
27619 enum machine_mode vsimode;
27622 bool use_vector_set = false;
27627 /* For SSE4.1, we normally use vector set. But if the second
27628 element is zero and inter-unit moves are OK, we use movq
27630 use_vector_set = (TARGET_64BIT
27632 && !(TARGET_INTER_UNIT_MOVES
27638 use_vector_set = TARGET_SSE4_1;
27641 use_vector_set = TARGET_SSE2;
27644 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27651 use_vector_set = TARGET_AVX;
27654 /* Use ix86_expand_vector_set in 64bit mode only. */
27655 use_vector_set = TARGET_AVX && TARGET_64BIT;
27661 if (use_vector_set)
27663 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27664 var = force_reg (GET_MODE_INNER (mode), var);
27665 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27681 var = force_reg (GET_MODE_INNER (mode), var);
27682 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27683 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27688 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27689 new_target = gen_reg_rtx (mode);
27691 new_target = target;
27692 var = force_reg (GET_MODE_INNER (mode), var);
27693 x = gen_rtx_VEC_DUPLICATE (mode, var);
27694 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27695 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27698 /* We need to shuffle the value to the correct position, so
27699 create a new pseudo to store the intermediate result. */
27701 /* With SSE2, we can use the integer shuffle insns. */
27702 if (mode != V4SFmode && TARGET_SSE2)
27704 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27706 GEN_INT (one_var == 1 ? 0 : 1),
27707 GEN_INT (one_var == 2 ? 0 : 1),
27708 GEN_INT (one_var == 3 ? 0 : 1)));
27709 if (target != new_target)
27710 emit_move_insn (target, new_target);
27714 /* Otherwise convert the intermediate result to V4SFmode and
27715 use the SSE1 shuffle instructions. */
27716 if (mode != V4SFmode)
27718 tmp = gen_reg_rtx (V4SFmode);
27719 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27724 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27726 GEN_INT (one_var == 1 ? 0 : 1),
27727 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27728 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27730 if (mode != V4SFmode)
27731 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27732 else if (tmp != target)
27733 emit_move_insn (target, tmp);
27735 else if (target != new_target)
27736 emit_move_insn (target, new_target);
27741 vsimode = V4SImode;
27747 vsimode = V2SImode;
27753 /* Zero extend the variable element to SImode and recurse. */
27754 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27756 x = gen_reg_rtx (vsimode);
27757 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27759 gcc_unreachable ();
27761 emit_move_insn (target, gen_lowpart (mode, x));
27769 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27770 consisting of the values in VALS. It is known that all elements
27771 except ONE_VAR are constants. Return true if successful. */
27774 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27775 rtx target, rtx vals, int one_var)
27777 rtx var = XVECEXP (vals, 0, one_var);
27778 enum machine_mode wmode;
27781 const_vec = copy_rtx (vals);
27782 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27783 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27791 /* For the two element vectors, it's just as easy to use
27792 the general case. */
27796 /* Use ix86_expand_vector_set in 64bit mode only. */
27819 /* There's no way to set one QImode entry easily. Combine
27820 the variable value with its adjacent constant value, and
27821 promote to an HImode set. */
27822 x = XVECEXP (vals, 0, one_var ^ 1);
27825 var = convert_modes (HImode, QImode, var, true);
27826 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27827 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27828 x = GEN_INT (INTVAL (x) & 0xff);
27832 var = convert_modes (HImode, QImode, var, true);
27833 x = gen_int_mode (INTVAL (x) << 8, HImode);
27835 if (x != const0_rtx)
27836 var = expand_simple_binop (HImode, IOR, var, x, var,
27837 1, OPTAB_LIB_WIDEN);
27839 x = gen_reg_rtx (wmode);
27840 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27841 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27843 emit_move_insn (target, gen_lowpart (mode, x));
27850 emit_move_insn (target, const_vec);
27851 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27855 /* A subroutine of ix86_expand_vector_init_general. Use vector
27856 concatenate to handle the most general case: all values variable,
27857 and none identical. */
27860 ix86_expand_vector_init_concat (enum machine_mode mode,
27861 rtx target, rtx *ops, int n)
27863 enum machine_mode cmode, hmode = VOIDmode;
27864 rtx first[8], second[4];
27904 gcc_unreachable ();
27907 if (!register_operand (ops[1], cmode))
27908 ops[1] = force_reg (cmode, ops[1]);
27909 if (!register_operand (ops[0], cmode))
27910 ops[0] = force_reg (cmode, ops[0]);
27911 emit_insn (gen_rtx_SET (VOIDmode, target,
27912 gen_rtx_VEC_CONCAT (mode, ops[0],
27932 gcc_unreachable ();
27948 gcc_unreachable ();
27953 /* FIXME: We process inputs backward to help RA. PR 36222. */
27956 for (; i > 0; i -= 2, j--)
27958 first[j] = gen_reg_rtx (cmode);
27959 v = gen_rtvec (2, ops[i - 1], ops[i]);
27960 ix86_expand_vector_init (false, first[j],
27961 gen_rtx_PARALLEL (cmode, v));
27967 gcc_assert (hmode != VOIDmode);
27968 for (i = j = 0; i < n; i += 2, j++)
27970 second[j] = gen_reg_rtx (hmode);
27971 ix86_expand_vector_init_concat (hmode, second [j],
27975 ix86_expand_vector_init_concat (mode, target, second, n);
27978 ix86_expand_vector_init_concat (mode, target, first, n);
27982 gcc_unreachable ();
27986 /* A subroutine of ix86_expand_vector_init_general. Use vector
27987 interleave to handle the most general case: all values variable,
27988 and none identical. */
27991 ix86_expand_vector_init_interleave (enum machine_mode mode,
27992 rtx target, rtx *ops, int n)
27994 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27997 rtx (*gen_load_even) (rtx, rtx, rtx);
27998 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27999 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28004 gen_load_even = gen_vec_setv8hi;
28005 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28006 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28007 inner_mode = HImode;
28008 first_imode = V4SImode;
28009 second_imode = V2DImode;
28010 third_imode = VOIDmode;
28013 gen_load_even = gen_vec_setv16qi;
28014 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28015 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28016 inner_mode = QImode;
28017 first_imode = V8HImode;
28018 second_imode = V4SImode;
28019 third_imode = V2DImode;
28022 gcc_unreachable ();
28025 for (i = 0; i < n; i++)
28027 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28028 op0 = gen_reg_rtx (SImode);
28029 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28031 /* Insert the SImode value as low element of V4SImode vector. */
28032 op1 = gen_reg_rtx (V4SImode);
28033 op0 = gen_rtx_VEC_MERGE (V4SImode,
28034 gen_rtx_VEC_DUPLICATE (V4SImode,
28036 CONST0_RTX (V4SImode),
28038 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28040 /* Cast the V4SImode vector back to a vector in orignal mode. */
28041 op0 = gen_reg_rtx (mode);
28042 emit_move_insn (op0, gen_lowpart (mode, op1));
28044 /* Load even elements into the second positon. */
28045 emit_insn ((*gen_load_even) (op0,
28046 force_reg (inner_mode,
28050 /* Cast vector to FIRST_IMODE vector. */
28051 ops[i] = gen_reg_rtx (first_imode);
28052 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28055 /* Interleave low FIRST_IMODE vectors. */
28056 for (i = j = 0; i < n; i += 2, j++)
28058 op0 = gen_reg_rtx (first_imode);
28059 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28061 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28062 ops[j] = gen_reg_rtx (second_imode);
28063 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28066 /* Interleave low SECOND_IMODE vectors. */
28067 switch (second_imode)
28070 for (i = j = 0; i < n / 2; i += 2, j++)
28072 op0 = gen_reg_rtx (second_imode);
28073 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28076 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28078 ops[j] = gen_reg_rtx (third_imode);
28079 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28081 second_imode = V2DImode;
28082 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28086 op0 = gen_reg_rtx (second_imode);
28087 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28090 /* Cast the SECOND_IMODE vector back to a vector on original
28092 emit_insn (gen_rtx_SET (VOIDmode, target,
28093 gen_lowpart (mode, op0)));
28097 gcc_unreachable ();
28101 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28102 all values variable, and none identical. */
28105 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28106 rtx target, rtx vals)
28108 rtx ops[32], op0, op1;
28109 enum machine_mode half_mode = VOIDmode;
28116 if (!mmx_ok && !TARGET_SSE)
28128 n = GET_MODE_NUNITS (mode);
28129 for (i = 0; i < n; i++)
28130 ops[i] = XVECEXP (vals, 0, i);
28131 ix86_expand_vector_init_concat (mode, target, ops, n);
28135 half_mode = V16QImode;
28139 half_mode = V8HImode;
28143 n = GET_MODE_NUNITS (mode);
28144 for (i = 0; i < n; i++)
28145 ops[i] = XVECEXP (vals, 0, i);
28146 op0 = gen_reg_rtx (half_mode);
28147 op1 = gen_reg_rtx (half_mode);
28148 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28150 ix86_expand_vector_init_interleave (half_mode, op1,
28151 &ops [n >> 1], n >> 2);
28152 emit_insn (gen_rtx_SET (VOIDmode, target,
28153 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28157 if (!TARGET_SSE4_1)
28165 /* Don't use ix86_expand_vector_init_interleave if we can't
28166 move from GPR to SSE register directly. */
28167 if (!TARGET_INTER_UNIT_MOVES)
28170 n = GET_MODE_NUNITS (mode);
28171 for (i = 0; i < n; i++)
28172 ops[i] = XVECEXP (vals, 0, i);
28173 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28181 gcc_unreachable ();
28185 int i, j, n_elts, n_words, n_elt_per_word;
28186 enum machine_mode inner_mode;
28187 rtx words[4], shift;
28189 inner_mode = GET_MODE_INNER (mode);
28190 n_elts = GET_MODE_NUNITS (mode);
28191 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28192 n_elt_per_word = n_elts / n_words;
28193 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28195 for (i = 0; i < n_words; ++i)
28197 rtx word = NULL_RTX;
28199 for (j = 0; j < n_elt_per_word; ++j)
28201 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28202 elt = convert_modes (word_mode, inner_mode, elt, true);
28208 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28209 word, 1, OPTAB_LIB_WIDEN);
28210 word = expand_simple_binop (word_mode, IOR, word, elt,
28211 word, 1, OPTAB_LIB_WIDEN);
28219 emit_move_insn (target, gen_lowpart (mode, words[0]));
28220 else if (n_words == 2)
28222 rtx tmp = gen_reg_rtx (mode);
28223 emit_clobber (tmp);
28224 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28225 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28226 emit_move_insn (target, tmp);
28228 else if (n_words == 4)
28230 rtx tmp = gen_reg_rtx (V4SImode);
28231 gcc_assert (word_mode == SImode);
28232 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28233 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28234 emit_move_insn (target, gen_lowpart (mode, tmp));
28237 gcc_unreachable ();
28241 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28242 instructions unless MMX_OK is true. */
28245 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28247 enum machine_mode mode = GET_MODE (target);
28248 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28249 int n_elts = GET_MODE_NUNITS (mode);
28250 int n_var = 0, one_var = -1;
28251 bool all_same = true, all_const_zero = true;
28255 for (i = 0; i < n_elts; ++i)
28257 x = XVECEXP (vals, 0, i);
28258 if (!(CONST_INT_P (x)
28259 || GET_CODE (x) == CONST_DOUBLE
28260 || GET_CODE (x) == CONST_FIXED))
28261 n_var++, one_var = i;
28262 else if (x != CONST0_RTX (inner_mode))
28263 all_const_zero = false;
28264 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28268 /* Constants are best loaded from the constant pool. */
28271 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28275 /* If all values are identical, broadcast the value. */
28277 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28278 XVECEXP (vals, 0, 0)))
28281 /* Values where only one field is non-constant are best loaded from
28282 the pool and overwritten via move later. */
28286 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28287 XVECEXP (vals, 0, one_var),
28291 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28295 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28299 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28301 enum machine_mode mode = GET_MODE (target);
28302 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28303 enum machine_mode half_mode;
28304 bool use_vec_merge = false;
28306 static rtx (*gen_extract[6][2]) (rtx, rtx)
28308 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28309 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28310 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28311 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28312 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28313 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28315 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28317 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28318 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28319 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28320 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28321 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28322 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28332 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28333 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28335 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28337 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28338 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28344 use_vec_merge = TARGET_SSE4_1;
28352 /* For the two element vectors, we implement a VEC_CONCAT with
28353 the extraction of the other element. */
28355 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28356 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28359 op0 = val, op1 = tmp;
28361 op0 = tmp, op1 = val;
28363 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28364 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28369 use_vec_merge = TARGET_SSE4_1;
28376 use_vec_merge = true;
28380 /* tmp = target = A B C D */
28381 tmp = copy_to_reg (target);
28382 /* target = A A B B */
28383 emit_insn (gen_sse_unpcklps (target, target, target));
28384 /* target = X A B B */
28385 ix86_expand_vector_set (false, target, val, 0);
28386 /* target = A X C D */
28387 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28388 GEN_INT (1), GEN_INT (0),
28389 GEN_INT (2+4), GEN_INT (3+4)));
28393 /* tmp = target = A B C D */
28394 tmp = copy_to_reg (target);
28395 /* tmp = X B C D */
28396 ix86_expand_vector_set (false, tmp, val, 0);
28397 /* target = A B X D */
28398 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28399 GEN_INT (0), GEN_INT (1),
28400 GEN_INT (0+4), GEN_INT (3+4)));
28404 /* tmp = target = A B C D */
28405 tmp = copy_to_reg (target);
28406 /* tmp = X B C D */
28407 ix86_expand_vector_set (false, tmp, val, 0);
28408 /* target = A B X D */
28409 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28410 GEN_INT (0), GEN_INT (1),
28411 GEN_INT (2+4), GEN_INT (0+4)));
28415 gcc_unreachable ();
28420 use_vec_merge = TARGET_SSE4_1;
28424 /* Element 0 handled by vec_merge below. */
28427 use_vec_merge = true;
28433 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28434 store into element 0, then shuffle them back. */
28438 order[0] = GEN_INT (elt);
28439 order[1] = const1_rtx;
28440 order[2] = const2_rtx;
28441 order[3] = GEN_INT (3);
28442 order[elt] = const0_rtx;
28444 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28445 order[1], order[2], order[3]));
28447 ix86_expand_vector_set (false, target, val, 0);
28449 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28450 order[1], order[2], order[3]));
28454 /* For SSE1, we have to reuse the V4SF code. */
28455 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28456 gen_lowpart (SFmode, val), elt);
28461 use_vec_merge = TARGET_SSE2;
28464 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28468 use_vec_merge = TARGET_SSE4_1;
28475 half_mode = V16QImode;
28481 half_mode = V8HImode;
28487 half_mode = V4SImode;
28493 half_mode = V2DImode;
28499 half_mode = V4SFmode;
28505 half_mode = V2DFmode;
28511 /* Compute offset. */
28515 gcc_assert (i <= 1);
28517 /* Extract the half. */
28518 tmp = gen_reg_rtx (half_mode);
28519 emit_insn ((*gen_extract[j][i]) (tmp, target));
28521 /* Put val in tmp at elt. */
28522 ix86_expand_vector_set (false, tmp, val, elt);
28525 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28534 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28535 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28536 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28540 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28542 emit_move_insn (mem, target);
28544 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28545 emit_move_insn (tmp, val);
28547 emit_move_insn (target, mem);
28552 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28554 enum machine_mode mode = GET_MODE (vec);
28555 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28556 bool use_vec_extr = false;
28569 use_vec_extr = true;
28573 use_vec_extr = TARGET_SSE4_1;
28585 tmp = gen_reg_rtx (mode);
28586 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28587 GEN_INT (elt), GEN_INT (elt),
28588 GEN_INT (elt+4), GEN_INT (elt+4)));
28592 tmp = gen_reg_rtx (mode);
28593 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28597 gcc_unreachable ();
28600 use_vec_extr = true;
28605 use_vec_extr = TARGET_SSE4_1;
28619 tmp = gen_reg_rtx (mode);
28620 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28621 GEN_INT (elt), GEN_INT (elt),
28622 GEN_INT (elt), GEN_INT (elt)));
28626 tmp = gen_reg_rtx (mode);
28627 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28631 gcc_unreachable ();
28634 use_vec_extr = true;
28639 /* For SSE1, we have to reuse the V4SF code. */
28640 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28641 gen_lowpart (V4SFmode, vec), elt);
28647 use_vec_extr = TARGET_SSE2;
28650 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28654 use_vec_extr = TARGET_SSE4_1;
28658 /* ??? Could extract the appropriate HImode element and shift. */
28665 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28666 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28668 /* Let the rtl optimizers know about the zero extension performed. */
28669 if (inner_mode == QImode || inner_mode == HImode)
28671 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28672 target = gen_lowpart (SImode, target);
28675 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28679 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28681 emit_move_insn (mem, vec);
28683 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28684 emit_move_insn (target, tmp);
28688 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28689 pattern to reduce; DEST is the destination; IN is the input vector. */
28692 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28694 rtx tmp1, tmp2, tmp3;
28696 tmp1 = gen_reg_rtx (V4SFmode);
28697 tmp2 = gen_reg_rtx (V4SFmode);
28698 tmp3 = gen_reg_rtx (V4SFmode);
28700 emit_insn (gen_sse_movhlps (tmp1, in, in));
28701 emit_insn (fn (tmp2, tmp1, in));
28703 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28704 GEN_INT (1), GEN_INT (1),
28705 GEN_INT (1+4), GEN_INT (1+4)));
28706 emit_insn (fn (dest, tmp2, tmp3));
28709 /* Target hook for scalar_mode_supported_p. */
28711 ix86_scalar_mode_supported_p (enum machine_mode mode)
28713 if (DECIMAL_FLOAT_MODE_P (mode))
28715 else if (mode == TFmode)
28718 return default_scalar_mode_supported_p (mode);
28721 /* Implements target hook vector_mode_supported_p. */
28723 ix86_vector_mode_supported_p (enum machine_mode mode)
28725 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28727 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28729 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28731 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28733 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28738 /* Target hook for c_mode_for_suffix. */
28739 static enum machine_mode
28740 ix86_c_mode_for_suffix (char suffix)
28750 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28752 We do this in the new i386 backend to maintain source compatibility
28753 with the old cc0-based compiler. */
28756 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28757 tree inputs ATTRIBUTE_UNUSED,
28760 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28762 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28767 /* Implements target vector targetm.asm.encode_section_info. This
28768 is not used by netware. */
28770 static void ATTRIBUTE_UNUSED
28771 ix86_encode_section_info (tree decl, rtx rtl, int first)
28773 default_encode_section_info (decl, rtl, first);
28775 if (TREE_CODE (decl) == VAR_DECL
28776 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28777 && ix86_in_large_data_p (decl))
28778 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28781 /* Worker function for REVERSE_CONDITION. */
28784 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28786 return (mode != CCFPmode && mode != CCFPUmode
28787 ? reverse_condition (code)
28788 : reverse_condition_maybe_unordered (code));
28791 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28795 output_387_reg_move (rtx insn, rtx *operands)
28797 if (REG_P (operands[0]))
28799 if (REG_P (operands[1])
28800 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28802 if (REGNO (operands[0]) == FIRST_STACK_REG)
28803 return output_387_ffreep (operands, 0);
28804 return "fstp\t%y0";
28806 if (STACK_TOP_P (operands[0]))
28807 return "fld%Z1\t%y1";
28810 else if (MEM_P (operands[0]))
28812 gcc_assert (REG_P (operands[1]));
28813 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28814 return "fstp%Z0\t%y0";
28817 /* There is no non-popping store to memory for XFmode.
28818 So if we need one, follow the store with a load. */
28819 if (GET_MODE (operands[0]) == XFmode)
28820 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
28822 return "fst%Z0\t%y0";
28829 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28830 FP status register is set. */
28833 ix86_emit_fp_unordered_jump (rtx label)
28835 rtx reg = gen_reg_rtx (HImode);
28838 emit_insn (gen_x86_fnstsw_1 (reg));
28840 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28842 emit_insn (gen_x86_sahf_1 (reg));
28844 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28845 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28849 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28851 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28852 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28855 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28856 gen_rtx_LABEL_REF (VOIDmode, label),
28858 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28860 emit_jump_insn (temp);
28861 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28864 /* Output code to perform a log1p XFmode calculation. */
28866 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28868 rtx label1 = gen_label_rtx ();
28869 rtx label2 = gen_label_rtx ();
28871 rtx tmp = gen_reg_rtx (XFmode);
28872 rtx tmp2 = gen_reg_rtx (XFmode);
28874 emit_insn (gen_absxf2 (tmp, op1));
28875 emit_insn (gen_cmpxf (tmp,
28876 CONST_DOUBLE_FROM_REAL_VALUE (
28877 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28879 emit_jump_insn (gen_bge (label1));
28881 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28882 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28883 emit_jump (label2);
28885 emit_label (label1);
28886 emit_move_insn (tmp, CONST1_RTX (XFmode));
28887 emit_insn (gen_addxf3 (tmp, op1, tmp));
28888 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28889 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28891 emit_label (label2);
28894 /* Output code to perform a Newton-Rhapson approximation of a single precision
28895 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28897 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28899 rtx x0, x1, e0, e1, two;
28901 x0 = gen_reg_rtx (mode);
28902 e0 = gen_reg_rtx (mode);
28903 e1 = gen_reg_rtx (mode);
28904 x1 = gen_reg_rtx (mode);
28906 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28908 if (VECTOR_MODE_P (mode))
28909 two = ix86_build_const_vector (SFmode, true, two);
28911 two = force_reg (mode, two);
28913 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28915 /* x0 = rcp(b) estimate */
28916 emit_insn (gen_rtx_SET (VOIDmode, x0,
28917 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28920 emit_insn (gen_rtx_SET (VOIDmode, e0,
28921 gen_rtx_MULT (mode, x0, b)));
28923 emit_insn (gen_rtx_SET (VOIDmode, e1,
28924 gen_rtx_MINUS (mode, two, e0)));
28926 emit_insn (gen_rtx_SET (VOIDmode, x1,
28927 gen_rtx_MULT (mode, x0, e1)));
28929 emit_insn (gen_rtx_SET (VOIDmode, res,
28930 gen_rtx_MULT (mode, a, x1)));
28933 /* Output code to perform a Newton-Rhapson approximation of a
28934 single precision floating point [reciprocal] square root. */
28936 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28939 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28942 x0 = gen_reg_rtx (mode);
28943 e0 = gen_reg_rtx (mode);
28944 e1 = gen_reg_rtx (mode);
28945 e2 = gen_reg_rtx (mode);
28946 e3 = gen_reg_rtx (mode);
28948 real_from_integer (&r, VOIDmode, -3, -1, 0);
28949 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28951 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28952 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28954 if (VECTOR_MODE_P (mode))
28956 mthree = ix86_build_const_vector (SFmode, true, mthree);
28957 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28960 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28961 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28963 /* x0 = rsqrt(a) estimate */
28964 emit_insn (gen_rtx_SET (VOIDmode, x0,
28965 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28968 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28973 zero = gen_reg_rtx (mode);
28974 mask = gen_reg_rtx (mode);
28976 zero = force_reg (mode, CONST0_RTX(mode));
28977 emit_insn (gen_rtx_SET (VOIDmode, mask,
28978 gen_rtx_NE (mode, zero, a)));
28980 emit_insn (gen_rtx_SET (VOIDmode, x0,
28981 gen_rtx_AND (mode, x0, mask)));
28985 emit_insn (gen_rtx_SET (VOIDmode, e0,
28986 gen_rtx_MULT (mode, x0, a)));
28988 emit_insn (gen_rtx_SET (VOIDmode, e1,
28989 gen_rtx_MULT (mode, e0, x0)));
28992 mthree = force_reg (mode, mthree);
28993 emit_insn (gen_rtx_SET (VOIDmode, e2,
28994 gen_rtx_PLUS (mode, e1, mthree)));
28996 mhalf = force_reg (mode, mhalf);
28998 /* e3 = -.5 * x0 */
28999 emit_insn (gen_rtx_SET (VOIDmode, e3,
29000 gen_rtx_MULT (mode, x0, mhalf)));
29002 /* e3 = -.5 * e0 */
29003 emit_insn (gen_rtx_SET (VOIDmode, e3,
29004 gen_rtx_MULT (mode, e0, mhalf)));
29005 /* ret = e2 * e3 */
29006 emit_insn (gen_rtx_SET (VOIDmode, res,
29007 gen_rtx_MULT (mode, e2, e3)));
29010 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29012 static void ATTRIBUTE_UNUSED
29013 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29016 /* With Binutils 2.15, the "@unwind" marker must be specified on
29017 every occurrence of the ".eh_frame" section, not just the first
29020 && strcmp (name, ".eh_frame") == 0)
29022 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29023 flags & SECTION_WRITE ? "aw" : "a");
29026 default_elf_asm_named_section (name, flags, decl);
29029 /* Return the mangling of TYPE if it is an extended fundamental type. */
29031 static const char *
29032 ix86_mangle_type (const_tree type)
29034 type = TYPE_MAIN_VARIANT (type);
29036 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29037 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29040 switch (TYPE_MODE (type))
29043 /* __float128 is "g". */
29046 /* "long double" or __float80 is "e". */
29053 /* For 32-bit code we can save PIC register setup by using
29054 __stack_chk_fail_local hidden function instead of calling
29055 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29056 register, so it is better to call __stack_chk_fail directly. */
29059 ix86_stack_protect_fail (void)
29061 return TARGET_64BIT
29062 ? default_external_stack_protect_fail ()
29063 : default_hidden_stack_protect_fail ();
29066 /* Select a format to encode pointers in exception handling data. CODE
29067 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29068 true if the symbol may be affected by dynamic relocations.
29070 ??? All x86 object file formats are capable of representing this.
29071 After all, the relocation needed is the same as for the call insn.
29072 Whether or not a particular assembler allows us to enter such, I
29073 guess we'll have to see. */
29075 asm_preferred_eh_data_format (int code, int global)
29079 int type = DW_EH_PE_sdata8;
29081 || ix86_cmodel == CM_SMALL_PIC
29082 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29083 type = DW_EH_PE_sdata4;
29084 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29086 if (ix86_cmodel == CM_SMALL
29087 || (ix86_cmodel == CM_MEDIUM && code))
29088 return DW_EH_PE_udata4;
29089 return DW_EH_PE_absptr;
29092 /* Expand copysign from SIGN to the positive value ABS_VALUE
29093 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29096 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29098 enum machine_mode mode = GET_MODE (sign);
29099 rtx sgn = gen_reg_rtx (mode);
29100 if (mask == NULL_RTX)
29102 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29103 if (!VECTOR_MODE_P (mode))
29105 /* We need to generate a scalar mode mask in this case. */
29106 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29107 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29108 mask = gen_reg_rtx (mode);
29109 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29113 mask = gen_rtx_NOT (mode, mask);
29114 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29115 gen_rtx_AND (mode, mask, sign)));
29116 emit_insn (gen_rtx_SET (VOIDmode, result,
29117 gen_rtx_IOR (mode, abs_value, sgn)));
29120 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29121 mask for masking out the sign-bit is stored in *SMASK, if that is
29124 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29126 enum machine_mode mode = GET_MODE (op0);
29129 xa = gen_reg_rtx (mode);
29130 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29131 if (!VECTOR_MODE_P (mode))
29133 /* We need to generate a scalar mode mask in this case. */
29134 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29135 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29136 mask = gen_reg_rtx (mode);
29137 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29139 emit_insn (gen_rtx_SET (VOIDmode, xa,
29140 gen_rtx_AND (mode, op0, mask)));
29148 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29149 swapping the operands if SWAP_OPERANDS is true. The expanded
29150 code is a forward jump to a newly created label in case the
29151 comparison is true. The generated label rtx is returned. */
29153 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29154 bool swap_operands)
29165 label = gen_label_rtx ();
29166 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29167 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29168 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29169 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29170 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29171 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29172 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29173 JUMP_LABEL (tmp) = label;
29178 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29179 using comparison code CODE. Operands are swapped for the comparison if
29180 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29182 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29183 bool swap_operands)
29185 enum machine_mode mode = GET_MODE (op0);
29186 rtx mask = gen_reg_rtx (mode);
29195 if (mode == DFmode)
29196 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29197 gen_rtx_fmt_ee (code, mode, op0, op1)));
29199 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29200 gen_rtx_fmt_ee (code, mode, op0, op1)));
29205 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29206 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29208 ix86_gen_TWO52 (enum machine_mode mode)
29210 REAL_VALUE_TYPE TWO52r;
29213 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29214 TWO52 = const_double_from_real_value (TWO52r, mode);
29215 TWO52 = force_reg (mode, TWO52);
29220 /* Expand SSE sequence for computing lround from OP1 storing
29223 ix86_expand_lround (rtx op0, rtx op1)
29225 /* C code for the stuff we're doing below:
29226 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29229 enum machine_mode mode = GET_MODE (op1);
29230 const struct real_format *fmt;
29231 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29234 /* load nextafter (0.5, 0.0) */
29235 fmt = REAL_MODE_FORMAT (mode);
29236 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29237 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29239 /* adj = copysign (0.5, op1) */
29240 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29241 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29243 /* adj = op1 + adj */
29244 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29246 /* op0 = (imode)adj */
29247 expand_fix (op0, adj, 0);
29250 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29253 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29255 /* C code for the stuff we're doing below (for do_floor):
29257 xi -= (double)xi > op1 ? 1 : 0;
29260 enum machine_mode fmode = GET_MODE (op1);
29261 enum machine_mode imode = GET_MODE (op0);
29262 rtx ireg, freg, label, tmp;
29264 /* reg = (long)op1 */
29265 ireg = gen_reg_rtx (imode);
29266 expand_fix (ireg, op1, 0);
29268 /* freg = (double)reg */
29269 freg = gen_reg_rtx (fmode);
29270 expand_float (freg, ireg, 0);
29272 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29273 label = ix86_expand_sse_compare_and_jump (UNLE,
29274 freg, op1, !do_floor);
29275 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29276 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29277 emit_move_insn (ireg, tmp);
29279 emit_label (label);
29280 LABEL_NUSES (label) = 1;
29282 emit_move_insn (op0, ireg);
29285 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29286 result in OPERAND0. */
29288 ix86_expand_rint (rtx operand0, rtx operand1)
29290 /* C code for the stuff we're doing below:
29291 xa = fabs (operand1);
29292 if (!isless (xa, 2**52))
29294 xa = xa + 2**52 - 2**52;
29295 return copysign (xa, operand1);
29297 enum machine_mode mode = GET_MODE (operand0);
29298 rtx res, xa, label, TWO52, mask;
29300 res = gen_reg_rtx (mode);
29301 emit_move_insn (res, operand1);
29303 /* xa = abs (operand1) */
29304 xa = ix86_expand_sse_fabs (res, &mask);
29306 /* if (!isless (xa, TWO52)) goto label; */
29307 TWO52 = ix86_gen_TWO52 (mode);
29308 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29310 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29311 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29313 ix86_sse_copysign_to_positive (res, xa, res, mask);
29315 emit_label (label);
29316 LABEL_NUSES (label) = 1;
29318 emit_move_insn (operand0, res);
29321 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29324 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29326 /* C code for the stuff we expand below.
29327 double xa = fabs (x), x2;
29328 if (!isless (xa, TWO52))
29330 xa = xa + TWO52 - TWO52;
29331 x2 = copysign (xa, x);
29340 enum machine_mode mode = GET_MODE (operand0);
29341 rtx xa, TWO52, tmp, label, one, res, mask;
29343 TWO52 = ix86_gen_TWO52 (mode);
29345 /* Temporary for holding the result, initialized to the input
29346 operand to ease control flow. */
29347 res = gen_reg_rtx (mode);
29348 emit_move_insn (res, operand1);
29350 /* xa = abs (operand1) */
29351 xa = ix86_expand_sse_fabs (res, &mask);
29353 /* if (!isless (xa, TWO52)) goto label; */
29354 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29356 /* xa = xa + TWO52 - TWO52; */
29357 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29358 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29360 /* xa = copysign (xa, operand1) */
29361 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29363 /* generate 1.0 or -1.0 */
29364 one = force_reg (mode,
29365 const_double_from_real_value (do_floor
29366 ? dconst1 : dconstm1, mode));
29368 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29369 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29370 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29371 gen_rtx_AND (mode, one, tmp)));
29372 /* We always need to subtract here to preserve signed zero. */
29373 tmp = expand_simple_binop (mode, MINUS,
29374 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29375 emit_move_insn (res, tmp);
29377 emit_label (label);
29378 LABEL_NUSES (label) = 1;
29380 emit_move_insn (operand0, res);
29383 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29386 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29388 /* C code for the stuff we expand below.
29389 double xa = fabs (x), x2;
29390 if (!isless (xa, TWO52))
29392 x2 = (double)(long)x;
29399 if (HONOR_SIGNED_ZEROS (mode))
29400 return copysign (x2, x);
29403 enum machine_mode mode = GET_MODE (operand0);
29404 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29406 TWO52 = ix86_gen_TWO52 (mode);
29408 /* Temporary for holding the result, initialized to the input
29409 operand to ease control flow. */
29410 res = gen_reg_rtx (mode);
29411 emit_move_insn (res, operand1);
29413 /* xa = abs (operand1) */
29414 xa = ix86_expand_sse_fabs (res, &mask);
29416 /* if (!isless (xa, TWO52)) goto label; */
29417 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29419 /* xa = (double)(long)x */
29420 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29421 expand_fix (xi, res, 0);
29422 expand_float (xa, xi, 0);
29425 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29427 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29428 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29429 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29430 gen_rtx_AND (mode, one, tmp)));
29431 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29432 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29433 emit_move_insn (res, tmp);
29435 if (HONOR_SIGNED_ZEROS (mode))
29436 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29438 emit_label (label);
29439 LABEL_NUSES (label) = 1;
29441 emit_move_insn (operand0, res);
29444 /* Expand SSE sequence for computing round from OPERAND1 storing
29445 into OPERAND0. Sequence that works without relying on DImode truncation
29446 via cvttsd2siq that is only available on 64bit targets. */
29448 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29450 /* C code for the stuff we expand below.
29451 double xa = fabs (x), xa2, x2;
29452 if (!isless (xa, TWO52))
29454 Using the absolute value and copying back sign makes
29455 -0.0 -> -0.0 correct.
29456 xa2 = xa + TWO52 - TWO52;
29461 else if (dxa > 0.5)
29463 x2 = copysign (xa2, x);
29466 enum machine_mode mode = GET_MODE (operand0);
29467 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29469 TWO52 = ix86_gen_TWO52 (mode);
29471 /* Temporary for holding the result, initialized to the input
29472 operand to ease control flow. */
29473 res = gen_reg_rtx (mode);
29474 emit_move_insn (res, operand1);
29476 /* xa = abs (operand1) */
29477 xa = ix86_expand_sse_fabs (res, &mask);
29479 /* if (!isless (xa, TWO52)) goto label; */
29480 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29482 /* xa2 = xa + TWO52 - TWO52; */
29483 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29484 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29486 /* dxa = xa2 - xa; */
29487 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29489 /* generate 0.5, 1.0 and -0.5 */
29490 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29491 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29492 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29496 tmp = gen_reg_rtx (mode);
29497 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29498 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29499 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29500 gen_rtx_AND (mode, one, tmp)));
29501 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29502 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29503 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29504 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29505 gen_rtx_AND (mode, one, tmp)));
29506 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29508 /* res = copysign (xa2, operand1) */
29509 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29511 emit_label (label);
29512 LABEL_NUSES (label) = 1;
29514 emit_move_insn (operand0, res);
29517 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29520 ix86_expand_trunc (rtx operand0, rtx operand1)
29522 /* C code for SSE variant we expand below.
29523 double xa = fabs (x), x2;
29524 if (!isless (xa, TWO52))
29526 x2 = (double)(long)x;
29527 if (HONOR_SIGNED_ZEROS (mode))
29528 return copysign (x2, x);
29531 enum machine_mode mode = GET_MODE (operand0);
29532 rtx xa, xi, TWO52, label, res, mask;
29534 TWO52 = ix86_gen_TWO52 (mode);
29536 /* Temporary for holding the result, initialized to the input
29537 operand to ease control flow. */
29538 res = gen_reg_rtx (mode);
29539 emit_move_insn (res, operand1);
29541 /* xa = abs (operand1) */
29542 xa = ix86_expand_sse_fabs (res, &mask);
29544 /* if (!isless (xa, TWO52)) goto label; */
29545 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29547 /* x = (double)(long)x */
29548 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29549 expand_fix (xi, res, 0);
29550 expand_float (res, xi, 0);
29552 if (HONOR_SIGNED_ZEROS (mode))
29553 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29555 emit_label (label);
29556 LABEL_NUSES (label) = 1;
29558 emit_move_insn (operand0, res);
29561 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29564 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29566 enum machine_mode mode = GET_MODE (operand0);
29567 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29569 /* C code for SSE variant we expand below.
29570 double xa = fabs (x), x2;
29571 if (!isless (xa, TWO52))
29573 xa2 = xa + TWO52 - TWO52;
29577 x2 = copysign (xa2, x);
29581 TWO52 = ix86_gen_TWO52 (mode);
29583 /* Temporary for holding the result, initialized to the input
29584 operand to ease control flow. */
29585 res = gen_reg_rtx (mode);
29586 emit_move_insn (res, operand1);
29588 /* xa = abs (operand1) */
29589 xa = ix86_expand_sse_fabs (res, &smask);
29591 /* if (!isless (xa, TWO52)) goto label; */
29592 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29594 /* res = xa + TWO52 - TWO52; */
29595 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29596 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29597 emit_move_insn (res, tmp);
29600 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29602 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29603 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29604 emit_insn (gen_rtx_SET (VOIDmode, mask,
29605 gen_rtx_AND (mode, mask, one)));
29606 tmp = expand_simple_binop (mode, MINUS,
29607 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29608 emit_move_insn (res, tmp);
29610 /* res = copysign (res, operand1) */
29611 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29613 emit_label (label);
29614 LABEL_NUSES (label) = 1;
29616 emit_move_insn (operand0, res);
29619 /* Expand SSE sequence for computing round from OPERAND1 storing
29622 ix86_expand_round (rtx operand0, rtx operand1)
29624 /* C code for the stuff we're doing below:
29625 double xa = fabs (x);
29626 if (!isless (xa, TWO52))
29628 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29629 return copysign (xa, x);
29631 enum machine_mode mode = GET_MODE (operand0);
29632 rtx res, TWO52, xa, label, xi, half, mask;
29633 const struct real_format *fmt;
29634 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29636 /* Temporary for holding the result, initialized to the input
29637 operand to ease control flow. */
29638 res = gen_reg_rtx (mode);
29639 emit_move_insn (res, operand1);
29641 TWO52 = ix86_gen_TWO52 (mode);
29642 xa = ix86_expand_sse_fabs (res, &mask);
29643 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29645 /* load nextafter (0.5, 0.0) */
29646 fmt = REAL_MODE_FORMAT (mode);
29647 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29648 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29650 /* xa = xa + 0.5 */
29651 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29652 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29654 /* xa = (double)(int64_t)xa */
29655 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29656 expand_fix (xi, xa, 0);
29657 expand_float (xa, xi, 0);
29659 /* res = copysign (xa, operand1) */
29660 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29662 emit_label (label);
29663 LABEL_NUSES (label) = 1;
29665 emit_move_insn (operand0, res);
29669 /* Validate whether a SSE5 instruction is valid or not.
29670 OPERANDS is the array of operands.
29671 NUM is the number of operands.
29672 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29673 NUM_MEMORY is the maximum number of memory operands to accept.
29674 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29677 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29678 bool uses_oc0, int num_memory, bool commutative)
29684 /* Count the number of memory arguments */
29687 for (i = 0; i < num; i++)
29689 enum machine_mode mode = GET_MODE (operands[i]);
29690 if (register_operand (operands[i], mode))
29693 else if (memory_operand (operands[i], mode))
29695 mem_mask |= (1 << i);
29701 rtx pattern = PATTERN (insn);
29703 /* allow 0 for pcmov */
29704 if (GET_CODE (pattern) != SET
29705 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29707 || operands[i] != CONST0_RTX (mode))
29712 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29713 a memory operation. */
29714 if (num_memory < 0)
29716 num_memory = -num_memory;
29717 if ((mem_mask & (1 << (num-1))) != 0)
29719 mem_mask &= ~(1 << (num-1));
29724 /* If there were no memory operations, allow the insn */
29728 /* Do not allow the destination register to be a memory operand. */
29729 else if (mem_mask & (1 << 0))
29732 /* If there are too many memory operations, disallow the instruction. While
29733 the hardware only allows 1 memory reference, before register allocation
29734 for some insns, we allow two memory operations sometimes in order to allow
29735 code like the following to be optimized:
29737 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29739 or similar cases that are vectorized into using the fmaddss
29741 else if (mem_count > num_memory)
29744 /* Don't allow more than one memory operation if not optimizing. */
29745 else if (mem_count > 1 && !optimize)
29748 else if (num == 4 && mem_count == 1)
29750 /* formats (destination is the first argument), example fmaddss:
29751 xmm1, xmm1, xmm2, xmm3/mem
29752 xmm1, xmm1, xmm2/mem, xmm3
29753 xmm1, xmm2, xmm3/mem, xmm1
29754 xmm1, xmm2/mem, xmm3, xmm1 */
29756 return ((mem_mask == (1 << 1))
29757 || (mem_mask == (1 << 2))
29758 || (mem_mask == (1 << 3)));
29760 /* format, example pmacsdd:
29761 xmm1, xmm2, xmm3/mem, xmm1 */
29763 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29765 return (mem_mask == (1 << 2));
29768 else if (num == 4 && num_memory == 2)
29770 /* If there are two memory operations, we can load one of the memory ops
29771 into the destination register. This is for optimizing the
29772 multiply/add ops, which the combiner has optimized both the multiply
29773 and the add insns to have a memory operation. We have to be careful
29774 that the destination doesn't overlap with the inputs. */
29775 rtx op0 = operands[0];
29777 if (reg_mentioned_p (op0, operands[1])
29778 || reg_mentioned_p (op0, operands[2])
29779 || reg_mentioned_p (op0, operands[3]))
29782 /* formats (destination is the first argument), example fmaddss:
29783 xmm1, xmm1, xmm2, xmm3/mem
29784 xmm1, xmm1, xmm2/mem, xmm3
29785 xmm1, xmm2, xmm3/mem, xmm1
29786 xmm1, xmm2/mem, xmm3, xmm1
29788 For the oc0 case, we will load either operands[1] or operands[3] into
29789 operands[0], so any combination of 2 memory operands is ok. */
29793 /* format, example pmacsdd:
29794 xmm1, xmm2, xmm3/mem, xmm1
29796 For the integer multiply/add instructions be more restrictive and
29797 require operands[2] and operands[3] to be the memory operands. */
29799 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29801 return (mem_mask == ((1 << 2) | (1 << 3)));
29804 else if (num == 3 && num_memory == 1)
29806 /* formats, example protb:
29807 xmm1, xmm2, xmm3/mem
29808 xmm1, xmm2/mem, xmm3 */
29810 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29812 /* format, example comeq:
29813 xmm1, xmm2, xmm3/mem */
29815 return (mem_mask == (1 << 2));
29819 gcc_unreachable ();
29825 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29826 hardware will allow by using the destination register to load one of the
29827 memory operations. Presently this is used by the multiply/add routines to
29828 allow 2 memory references. */
29831 ix86_expand_sse5_multiple_memory (rtx operands[],
29833 enum machine_mode mode)
29835 rtx op0 = operands[0];
29837 || memory_operand (op0, mode)
29838 || reg_mentioned_p (op0, operands[1])
29839 || reg_mentioned_p (op0, operands[2])
29840 || reg_mentioned_p (op0, operands[3]))
29841 gcc_unreachable ();
29843 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29844 the destination register. */
29845 if (memory_operand (operands[1], mode))
29847 emit_move_insn (op0, operands[1]);
29850 else if (memory_operand (operands[3], mode))
29852 emit_move_insn (op0, operands[3]);
29856 gcc_unreachable ();
29862 /* Table of valid machine attributes. */
29863 static const struct attribute_spec ix86_attribute_table[] =
29865 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29866 /* Stdcall attribute says callee is responsible for popping arguments
29867 if they are not variable. */
29868 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29869 /* Fastcall attribute says callee is responsible for popping arguments
29870 if they are not variable. */
29871 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29872 /* Cdecl attribute says the callee is a normal C declaration */
29873 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29874 /* Regparm attribute specifies how many integer arguments are to be
29875 passed in registers. */
29876 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29877 /* Sseregparm attribute says we are using x86_64 calling conventions
29878 for FP arguments. */
29879 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29880 /* force_align_arg_pointer says this function realigns the stack at entry. */
29881 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29882 false, true, true, ix86_handle_cconv_attribute },
29883 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29884 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29885 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29886 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29888 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29889 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29890 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29891 SUBTARGET_ATTRIBUTE_TABLE,
29893 /* ms_abi and sysv_abi calling convention function attributes. */
29894 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29895 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29897 { NULL, 0, 0, false, false, false, NULL }
29900 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29902 x86_builtin_vectorization_cost (bool runtime_test)
29904 /* If the branch of the runtime test is taken - i.e. - the vectorized
29905 version is skipped - this incurs a misprediction cost (because the
29906 vectorized version is expected to be the fall-through). So we subtract
29907 the latency of a mispredicted branch from the costs that are incured
29908 when the vectorized version is executed.
29910 TODO: The values in individual target tables have to be tuned or new
29911 fields may be needed. For eg. on K8, the default branch path is the
29912 not-taken path. If the taken path is predicted correctly, the minimum
29913 penalty of going down the taken-path is 1 cycle. If the taken-path is
29914 not predicted correctly, then the minimum penalty is 10 cycles. */
29918 return (-(ix86_cost->cond_taken_branch_cost));
29924 /* This function returns the calling abi specific va_list type node.
29925 It returns the FNDECL specific va_list type. */
29928 ix86_fn_abi_va_list (tree fndecl)
29931 return va_list_type_node;
29932 gcc_assert (fndecl != NULL_TREE);
29934 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
29935 return ms_va_list_type_node;
29937 return sysv_va_list_type_node;
29940 /* Returns the canonical va_list type specified by TYPE. If there
29941 is no valid TYPE provided, it return NULL_TREE. */
29944 ix86_canonical_va_list_type (tree type)
29948 /* Resolve references and pointers to va_list type. */
29949 if (INDIRECT_REF_P (type))
29950 type = TREE_TYPE (type);
29951 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29952 type = TREE_TYPE (type);
29956 wtype = va_list_type_node;
29957 gcc_assert (wtype != NULL_TREE);
29959 if (TREE_CODE (wtype) == ARRAY_TYPE)
29961 /* If va_list is an array type, the argument may have decayed
29962 to a pointer type, e.g. by being passed to another function.
29963 In that case, unwrap both types so that we can compare the
29964 underlying records. */
29965 if (TREE_CODE (htype) == ARRAY_TYPE
29966 || POINTER_TYPE_P (htype))
29968 wtype = TREE_TYPE (wtype);
29969 htype = TREE_TYPE (htype);
29972 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29973 return va_list_type_node;
29974 wtype = sysv_va_list_type_node;
29975 gcc_assert (wtype != NULL_TREE);
29977 if (TREE_CODE (wtype) == ARRAY_TYPE)
29979 /* If va_list is an array type, the argument may have decayed
29980 to a pointer type, e.g. by being passed to another function.
29981 In that case, unwrap both types so that we can compare the
29982 underlying records. */
29983 if (TREE_CODE (htype) == ARRAY_TYPE
29984 || POINTER_TYPE_P (htype))
29986 wtype = TREE_TYPE (wtype);
29987 htype = TREE_TYPE (htype);
29990 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29991 return sysv_va_list_type_node;
29992 wtype = ms_va_list_type_node;
29993 gcc_assert (wtype != NULL_TREE);
29995 if (TREE_CODE (wtype) == ARRAY_TYPE)
29997 /* If va_list is an array type, the argument may have decayed
29998 to a pointer type, e.g. by being passed to another function.
29999 In that case, unwrap both types so that we can compare the
30000 underlying records. */
30001 if (TREE_CODE (htype) == ARRAY_TYPE
30002 || POINTER_TYPE_P (htype))
30004 wtype = TREE_TYPE (wtype);
30005 htype = TREE_TYPE (htype);
30008 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30009 return ms_va_list_type_node;
30012 return std_canonical_va_list_type (type);
30015 /* Iterate through the target-specific builtin types for va_list.
30016 IDX denotes the iterator, *PTREE is set to the result type of
30017 the va_list builtin, and *PNAME to its internal type.
30018 Returns zero if there is no element for this index, otherwise
30019 IDX should be increased upon the next call.
30020 Note, do not iterate a base builtin's name like __builtin_va_list.
30021 Used from c_common_nodes_and_builtins. */
30024 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30030 *ptree = ms_va_list_type_node;
30031 *pname = "__builtin_ms_va_list";
30034 *ptree = sysv_va_list_type_node;
30035 *pname = "__builtin_sysv_va_list";
30043 /* Initialize the GCC target structure. */
30044 #undef TARGET_RETURN_IN_MEMORY
30045 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30047 #undef TARGET_LEGITIMIZE_ADDRESS
30048 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30050 #undef TARGET_ATTRIBUTE_TABLE
30051 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30052 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30053 # undef TARGET_MERGE_DECL_ATTRIBUTES
30054 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30057 #undef TARGET_COMP_TYPE_ATTRIBUTES
30058 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30060 #undef TARGET_INIT_BUILTINS
30061 #define TARGET_INIT_BUILTINS ix86_init_builtins
30062 #undef TARGET_EXPAND_BUILTIN
30063 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30065 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30066 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30067 ix86_builtin_vectorized_function
30069 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30070 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30072 #undef TARGET_BUILTIN_RECIPROCAL
30073 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30075 #undef TARGET_ASM_FUNCTION_EPILOGUE
30076 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30078 #undef TARGET_ENCODE_SECTION_INFO
30079 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30080 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30082 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30085 #undef TARGET_ASM_OPEN_PAREN
30086 #define TARGET_ASM_OPEN_PAREN ""
30087 #undef TARGET_ASM_CLOSE_PAREN
30088 #define TARGET_ASM_CLOSE_PAREN ""
30090 #undef TARGET_ASM_ALIGNED_HI_OP
30091 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30092 #undef TARGET_ASM_ALIGNED_SI_OP
30093 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30095 #undef TARGET_ASM_ALIGNED_DI_OP
30096 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30099 #undef TARGET_ASM_UNALIGNED_HI_OP
30100 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30101 #undef TARGET_ASM_UNALIGNED_SI_OP
30102 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30103 #undef TARGET_ASM_UNALIGNED_DI_OP
30104 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30106 #undef TARGET_SCHED_ADJUST_COST
30107 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30108 #undef TARGET_SCHED_ISSUE_RATE
30109 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30110 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30111 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30112 ia32_multipass_dfa_lookahead
30114 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30115 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30118 #undef TARGET_HAVE_TLS
30119 #define TARGET_HAVE_TLS true
30121 #undef TARGET_CANNOT_FORCE_CONST_MEM
30122 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30123 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30124 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30126 #undef TARGET_DELEGITIMIZE_ADDRESS
30127 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30129 #undef TARGET_MS_BITFIELD_LAYOUT_P
30130 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30133 #undef TARGET_BINDS_LOCAL_P
30134 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30136 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30137 #undef TARGET_BINDS_LOCAL_P
30138 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30141 #undef TARGET_ASM_OUTPUT_MI_THUNK
30142 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30143 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30144 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30146 #undef TARGET_ASM_FILE_START
30147 #define TARGET_ASM_FILE_START x86_file_start
30149 #undef TARGET_DEFAULT_TARGET_FLAGS
30150 #define TARGET_DEFAULT_TARGET_FLAGS \
30152 | TARGET_SUBTARGET_DEFAULT \
30153 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30155 #undef TARGET_HANDLE_OPTION
30156 #define TARGET_HANDLE_OPTION ix86_handle_option
30158 #undef TARGET_RTX_COSTS
30159 #define TARGET_RTX_COSTS ix86_rtx_costs
30160 #undef TARGET_ADDRESS_COST
30161 #define TARGET_ADDRESS_COST ix86_address_cost
30163 #undef TARGET_FIXED_CONDITION_CODE_REGS
30164 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30165 #undef TARGET_CC_MODES_COMPATIBLE
30166 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30168 #undef TARGET_MACHINE_DEPENDENT_REORG
30169 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30171 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30172 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30174 #undef TARGET_BUILD_BUILTIN_VA_LIST
30175 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30177 #undef TARGET_FN_ABI_VA_LIST
30178 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30180 #undef TARGET_CANONICAL_VA_LIST_TYPE
30181 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30183 #undef TARGET_EXPAND_BUILTIN_VA_START
30184 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30186 #undef TARGET_MD_ASM_CLOBBERS
30187 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30189 #undef TARGET_PROMOTE_PROTOTYPES
30190 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30191 #undef TARGET_STRUCT_VALUE_RTX
30192 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30193 #undef TARGET_SETUP_INCOMING_VARARGS
30194 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30195 #undef TARGET_MUST_PASS_IN_STACK
30196 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30197 #undef TARGET_PASS_BY_REFERENCE
30198 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30199 #undef TARGET_INTERNAL_ARG_POINTER
30200 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30201 #undef TARGET_UPDATE_STACK_BOUNDARY
30202 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30203 #undef TARGET_GET_DRAP_RTX
30204 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30205 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
30206 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
30207 #undef TARGET_STRICT_ARGUMENT_NAMING
30208 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30210 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30211 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30213 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30214 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30216 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30217 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30219 #undef TARGET_C_MODE_FOR_SUFFIX
30220 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30223 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30224 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30227 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30228 #undef TARGET_INSERT_ATTRIBUTES
30229 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30232 #undef TARGET_MANGLE_TYPE
30233 #define TARGET_MANGLE_TYPE ix86_mangle_type
30235 #undef TARGET_STACK_PROTECT_FAIL
30236 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30238 #undef TARGET_FUNCTION_VALUE
30239 #define TARGET_FUNCTION_VALUE ix86_function_value
30241 #undef TARGET_SECONDARY_RELOAD
30242 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30244 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30245 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30247 #undef TARGET_SET_CURRENT_FUNCTION
30248 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30250 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30251 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30253 #undef TARGET_OPTION_SAVE
30254 #define TARGET_OPTION_SAVE ix86_function_specific_save
30256 #undef TARGET_OPTION_RESTORE
30257 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30259 #undef TARGET_OPTION_PRINT
30260 #define TARGET_OPTION_PRINT ix86_function_specific_print
30262 #undef TARGET_OPTION_CAN_INLINE_P
30263 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30265 #undef TARGET_EXPAND_TO_RTL_HOOK
30266 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30268 struct gcc_target targetm = TARGET_INITIALIZER;
30270 #include "gt-i386.h"